#!/usr/local/bin/perl 
# PROGRAM USED TO CALCULATE PD DISTANCE 
# Surendra S Negi   
# March 10th 2014
print "\n Help: perl PD_Distance.pl \n
# Please cite following papers in your publications:\n 
# Ivanciuc, O., Schein, C.H. and Braun, W.  SDAP:  database and computational tools for allergenic proteins.  # Nucleic Acids Res.  31(1):359-362, 2003. 
# Ivanciuc, O.I., Schein, C.H. and Braun, W.  Data mining of sequences and 3D structures of allergenic proteins.  # Bioinformatics  18(10):1358-1364, 2002.
# Ivanciuc, O., Midoro-Horiuti, T., Schein, C.H., Xie, L., Hillman, G.R., Goldblum, R.M. and Braun, W.  The property distance index PD predicts peptides that cross-react with IgE antibodies. # Mol. Immunol., 46(5), 873-883, 2009.
# Negi, S.S. and Braun, W.  Automated Detection of Conformational Epitopes using Phage Display Peptide Sequences.  Bioinform. Biol. Insights,  3, 71-81, 2009.
# Venkatarajan, M.S. and Braun, W.  New quantitative descriptors of amino acids based on multidimensional scaling of a large number of physical-chemical properties.  # J. Mol. Model.  7(12):445-453, 2001. \n\n\n ";
#

use strict;
use warnings;
# Please feel free to modify your peptide sequences
# Input sequence1
my $Sequence1 ="VIPAARLFK";
# Input sequence2
my $Sequence2 ="VIPAARLWD";
# Gap penalty, Please feel free to modify this number. Default value is 20;
my $gap = 20.0;

my $k;
my $m;
my $value1;
my $value2;
my $value3;
my $value4;
my $value5;
my $xvalue1;
my $xvalue2;
my $xvalue3;
my $xvalue4;
my $xvalue5;
my $seq_lgth1;
my $seq_lgth2;
my $seq1;
my $seq2;
my $DifferencePD=0;
my $PD_Distance=0;
my $finalpd_distance=0;
my  %vector1 = ("A",0.354,"R",7.573,"N",11.294,"D",13.420,"C",-5.846,
                "Q",6.599,"E",9.788,"G", 9.655,"H", 1.019,"I",-15.634,
                "L",-11.825,"K",10.762,"M",-10.585,"F",-14.571,"P",7.662,
                "S",8.813,"T",3.012,"W",-13.110,"Y",-6.245,"V",-12.135,"-",0.000);

my %vector2 = ("A",3.762,"R",-10.135,"N",1.067,"D",-1.600,"C",4.885,
               "Q",-5.166,"E",-7.861,"G",15.778,"H",-4.969,"I",1.993,
               "L",0.505,"K",-9.517,"M",-3.959,"F",-0.646,"P",8.029,
               "S",6.682,"T",4.127,"W",-5.222,"Y",-1.600,"V",3.818,"-",0.000);

my  %vector3 = ("A",-11.036,"R",2.486,"N",2.718,"D",-0.325,"C",1.626,
                "Q",-0.697,"E",-7.318,"G",-0.558,"H",0.953,"I",-2.045,
                "L",-6.157,"K",-1.022,"M",-3.601,"F",1.673,"P",9.456,
                "S",-0.348,"T",-0.348,"W",9.038,"Y",9.874,"V",-4.345,"-",0.000);

my  %vector4 = ("A",-0.649,"R",-4.291,"N",1.963,"D",3.742,"C",9.397,
                "Q",0.582,"E",2.611,"G",0.299,"H",4.657,"I",-3.243,
                "L",-4.557,"K",-5.405,"M",5.339,"F",-0.033,"P",-3.576,
                "S",-1.131,"T",-2.195,"W",1.380,"Y",-1.597,"V",-3.260,"-",0.000);

my  %vector5 = ("A",2.828,"R",-5.687,"N",-0.859,"D",2.437,"C",-5.843,
                "Q",-1.750,"E",4.734,"G",1.656,"H",-0.328,"I",-1.672,
                "L",3.219,"K",-0.422,"M",1.203,"F",3.250,"P",6.000,
                "S",-3.062,"T",-4.281,"W",4.640,"Y",-1.422,"V",-4.672,"-",0.000);

$seq_lgth1 = length $Sequence1;
$seq_lgth2 = length $Sequence2;

if($seq_lgth1 != $seq_lgth2)
  {
	print " ERROR: Peptides are of different length. Length of Peptide1: $seq_lgth1 and Peptide2 : $seq_lgth2 \n";
	exit;
  }

for ($m=0;$m<$seq_lgth1; $m++)
 {
    $seq1=substr($Sequence1,$m,1);
    $seq2=substr($Sequence2,$m,1);
 
    if($seq1 eq "X"){$seq1="-";}
    if($seq2 eq "X"){$seq2="-";}

    if($seq1 eq "-" || $seq2 eq "-")
	{
	print " Gap or unknown residue found in peptide sequence \n";
    	$DifferencePD=sqrt(5.0* $gap* $gap);
	}else{
    	$value1  = $vector1{$seq1};
	$value2  = $vector2{$seq1};
    	$value3  = $vector3{$seq1};
    	$value4  = $vector4{$seq1};
    	$value5  = $vector5{$seq1};
    	$xvalue1 = $vector1{$seq2};
    	$xvalue2 = $vector2{$seq2};
    	$xvalue3 = $vector3{$seq2};
    	$xvalue4 = $vector4{$seq2};
    	$xvalue5 = $vector5{$seq2};
    	$DifferencePD=sqrt(($value1-$xvalue1)*($value1-$xvalue1)+($value2-$xvalue2)*($value2-$xvalue2)+($value3-$xvalue3)*($value3-$xvalue3)+($value4-$xvalue4)*($value4-$xvalue4)+($value5-$xvalue5)*($value5-$xvalue5));
	}
    $PD_Distance=$PD_Distance+$DifferencePD;
 }
 $finalpd_distance=$PD_Distance/$seq_lgth1;
 print "  Sequence 1:: $Sequence1  :: Sequence 2 :: $Sequence2 :PD_DISTANCE::$finalpd_distance\n";
 exit;
