blob: ecb2f161be816fe9839e672b3b46033e96115d08 [file] [log] [blame]
#include "mltaln.h"
#define DEBUG 0
int seqlen( char *seq )
{
int val = 0;
while( *seq )
if( *seq++ != '-' ) val++;
return( val );
}
int intlen( int *num )
{
int value = 0;
while( *num++ != -1 ) value++;
return( value );
}
char seqcheck( char **seq )
{
int i, len;
char **seqbk = seq;
while( *seq )
{
len = strlen( *seq );
for( i=0; i<len; i++ )
{
if( amino_n[(int)(*seq)[i]] == -1 )
{
fprintf( stderr, "========================================================================= \n" );
fprintf( stderr, "========================================================================= \n" );
fprintf( stderr, "========================================================================= \n" );
fprintf( stderr, "=== \n" );
fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
fprintf( stderr, "=== \n" );
fprintf( stderr, "========================================================================= \n" );
fprintf( stderr, "========================================================================= \n" );
fprintf( stderr, "========================================================================= \n" );
return( (int)(*seq)[i] );
}
}
seq++;
}
return( 0 );
}
void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )
{
int i, j, lgth;
lgth = strlen( aseq[0] );
for( j=0; j<lgth; j++ )
{
for( i=0; i<26; i++ )
{
scmx[i][j] = 0;
}
}
for( i=0; i<icyc+1; i++ )
{
int id;
id = amino_n[(int)aseq[i][0]];
scmx[id][0] += (float)effarr[i];
}
for( j=1; j<lgth-1; j++ )
{
for( i=0; i<icyc+1; i++ )
{
int id;
id = amino_n[(int)aseq[i][j]];
scmx[id][j] += (float)effarr[i];
}
}
for( i=0; i<icyc+1; i++ )
{
int id;
id = amino_n[(int)aseq[i][lgth-1]];
scmx[id][lgth-1] += (float)effarr[i];
}
}
void exitall( char arr[] )
{
fprintf( stderr, "%s\n", arr );
exit( 1 );
}
void display( char **seq, int nseq )
{
int i, imax;
char b[121];
if( !disp ) return;
if( nseq > DISPSEQF ) imax = DISPSEQF;
else imax = nseq;
fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
for( i=0; i<+imax; i++ )
{
strncpy( b, seq[i]+DISPSITEI, 120 );
b[120] = 0;
fprintf( stderr, "%3d %s\n", i+1, b );
}
}
#if 0
double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
{
int i, j, k;
double score;
double tmpscore;
char *mseq1, *mseq2;
double efficient;
char xxx[100];
// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
score = 0.0;
for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ )
{
efficient = eff1[i] * eff2[j];
mseq1 = seq1[i];
mseq2 = seq2[j];
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
if( mseq1[k] == '-' )
{
tmpscore += penalty;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
while( mseq1[++k] == '-' )
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
k--;
if( k >len-2 ) break;
continue;
}
if( mseq2[k] == '-' )
{
tmpscore += penalty;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
while( mseq2[++k] == '-' )
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
}
}
score += (double)tmpscore * efficient;
#if 1
sprintf( xxx, "%f", score );
// fprintf( stderr, "## score in intergroup_score = %f\n", score );
#endif
}
#if 0
fprintf( stderr, "###score = %f\n", score );
#endif
#if 0
fprintf( stderr, "## score in intergroup_score = %f\n", score );
#endif
return( score );
}
#endif
void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
{
int i, j, k;
int len2 = len - 2;
int ms1, ms2;
double tmpscore;
char *mseq1, *mseq2;
double efficient;
// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
*value = 0.0;
for( i=0; i<clus1; i++ )
{
for( j=0; j<clus2; j++ )
{
efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
mseq1 = seq1[i];
mseq2 = seq2[j];
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
ms1 = (int)mseq1[k];
ms2 = (int)mseq2[k];
if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
if( ms1 == (int)'-' )
{
tmpscore += (double)penalty;
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
while( (ms1=(int)mseq1[++k]) == (int)'-' )
;
// tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
k--;
if( k >len2 ) break;
continue;
}
if( ms2 == (int)'-' )
{
tmpscore += (double)penalty;
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
while( (ms2=(int)mseq2[++k]) == (int)'-' )
;
// tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
k--;
if( k > len2 ) break;
continue;
}
}
*value += (double)tmpscore * (double)efficient;
// fprintf( stderr, "val in _gapnomi = %f\n", *value );
}
}
#if 0
fprintf( stdout, "###score = %f\n", score );
#endif
#if DEBUG
fprintf( stderr, "score in intergroup_score = %f\n", score );
#endif
// return( score );
}
void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
{
int i, j, k;
int len2 = len - 2;
int ms1, ms2;
double tmpscore;
char *mseq1, *mseq2;
double efficient;
// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
*value = 0.0;
for( i=0; i<clus1; i++ )
{
for( j=0; j<clus2; j++ )
{
efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
mseq1 = seq1[i];
mseq2 = seq2[j];
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
ms1 = (int)mseq1[k];
ms2 = (int)mseq2[k];
if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
// tmpscore += (double)amino_dis[ms1][ms2];
if( ms1 == (int)'-' )
{
tmpscore += (double)penalty;
// tmpscore += (double)amino_dis[ms1][ms2];
while( (ms1=(int)mseq1[++k]) == (int)'-' )
;
// tmpscore += (double)amino_dis[ms1][ms2];
k--;
if( k >len2 ) break;
continue;
}
if( ms2 == (int)'-' )
{
tmpscore += (double)penalty;
// tmpscore += (double)amino_dis[ms1][ms2];
while( (ms2=(int)mseq2[++k]) == (int)'-' )
;
// tmpscore += (double)amino_dis[ms1][ms2];
k--;
if( k > len2 ) break;
continue;
}
}
*value += (double)tmpscore * (double)efficient;
// fprintf( stderr, "val in _gapnomi = %f\n", *value );
}
}
#if 0
fprintf( stdout, "###score = %f\n", score );
#endif
#if DEBUG
fprintf( stderr, "score in intergroup_score = %f\n", score );
#endif
// return( score );
}
void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
{
int i, j, k;
int len2 = len - 2;
int ms1, ms2;
double tmpscore;
char *mseq1, *mseq2;
double efficient;
double gaptmpscore;
double gapscore = 0.0;
// fprintf( stderr, "#### in intergroup_score\n" );
// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
*value = 0.0;
for( i=0; i<clus1; i++ )
{
for( j=0; j<clus2; j++ )
{
efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
mseq1 = seq1[i];
mseq2 = seq2[j];
tmpscore = 0.0;
gaptmpscore = 0.0;
for( k=0; k<len; k++ )
{
ms1 = (int)mseq1[k];
ms2 = (int)mseq2[k];
if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
// tmpscore += (double)amino_dis[ms1][ms2];
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
if( ms1 == (int)'-' )
{
tmpscore += (double)penalty;
gaptmpscore += (double)penalty;
// tmpscore += (double)amino_dis[ms1][ms2];
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
while( (ms1=(int)mseq1[++k]) == (int)'-' )
// tmpscore += (double)amino_dis[ms1][ms2];
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
k--;
if( k >len2 ) break;
continue;
}
if( ms2 == (int)'-' )
{
tmpscore += (double)penalty;
gaptmpscore += (double)penalty;
// tmpscore += (double)amino_dis[ms1][ms2];
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
while( (ms2=(int)mseq2[++k]) == (int)'-' )
// tmpscore += (double)amino_dis[ms1][ms2];
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
k--;
if( k > len2 ) break;
continue;
}
}
*value += (double)tmpscore * (double)efficient;
gapscore += (double)gaptmpscore * (double)efficient;
}
}
#if 0
fprintf( stderr, "###gapscore = %f\n", gapscore );
#endif
#if DEBUG
fprintf( stderr, "score in intergroup_score = %f\n", score );
#endif
// return( score );
}
void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
{
int i, j, k;
int len2 = len - 2;
int ms1, ms2;
double tmpscore;
char *mseq1, *mseq2;
static double efficient[1];
// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
*value = 0.0;
for( i=0; i<clus1; i++ )
{
for( j=0; j<clus2; j++ )
{
*efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
mseq1 = seq1[i];
mseq2 = seq2[j];
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
ms1 = (int)mseq1[k];
ms2 = (int)mseq2[k];
if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
tmpscore += (double)amino_dis[ms1][ms2];
if( ms1 == (int)'-' )
{
tmpscore += (double)penalty;
tmpscore += (double)amino_dis[ms1][ms2];
while( (ms1=(int)mseq1[++k]) == (int)'-' )
tmpscore += (double)amino_dis[ms1][ms2];
k--;
if( k >len2 ) break;
continue;
}
if( ms2 == (int)'-' )
{
tmpscore += (double)penalty;
tmpscore += (double)amino_dis[ms1][ms2];
while( (ms2=(int)mseq2[++k]) == (int)'-' )
tmpscore += (double)amino_dis[ms1][ms2];
k--;
if( k > len2 ) break;
continue;
}
}
*value += (double)tmpscore * (double)*efficient;
}
}
#if 0
fprintf( stdout, "###score = %f\n", score );
#endif
#if DEBUG
fprintf( stderr, "score in intergroup_score = %f\n", score );
#endif
// return( score );
}
double score_calc3( char **seq, int s, double **eff, int ex ) /* method 3 */
{
int i, j, k, c;
int len = strlen( seq[0] );
double score;
long tmpscore;
static char mseq1[N*2], mseq2[N*2];
double totaleff;
switch( weight )
{
case 0:
totaleff = ( (double)s * ((double)s-1.0) ) / 2.0;
break;
case 2:
totaleff = s / 2;
break;
case 3:
totaleff = 0.0; for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ ) totaleff += eff[i][j];
break;
default:
fprintf( stderr, "error\n" );
exit( 1 );
}
score = 0.0;
for( i=0; i<s-1; i++ )
{
for( j=i+1; j<s; j++ )
{
strcpy( mseq1, seq[i] );
strcpy( mseq2, seq[j] );
tmpscore = 0;
c = 0;
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx;
c++;
if( mseq1[k] == '-' )
{
tmpscore += penalty - n_dis[0][24];
while( mseq1[++k] == '-' )
;
k--;
if( k > len-2 ) break;
continue;
}
if( mseq2[k] == '-' )
{
tmpscore += penalty - n_dis[0][24];
while( mseq2[++k] == '-' )
;
k--;
if( k > len-2 ) break;
continue;
}
}
/*
if( mseq1[0] == '-' || mseq2[0] == '-' )
{
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
if( !( mseq1[k] != '-' && mseq2[k] != '-' ) )
{
c--;
tmpscore -= SGAPP;
break;
}
else break;
}
}
if( mseq1[len-1] == '-' || mseq2[len-1] == '-' )
{
for( k=len-1; k>=0; k-- )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
if( !( mseq1[k] != '-' && mseq2[k] != '-' ) )
{
c--;
tmpscore -= SGAPP;
break;
}
else break;
}
}
*/
/*
if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len )
;
*/
score += (double)tmpscore / (double)c * eff[i][j];
}
}
if( weight == 0 )
score /= totaleff;
return( (double)score );
}
double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
{
int i, j, k;
double c;
int len = strlen( seq[0] );
double score;
double tmpscore;
char *mseq1, *mseq2;
double efficient;
#if DEBUG
FILE *fp;
#endif
score = 0.0;
c = 0.0;
for( i=0; i<s; i++ )
{
if( i == ex ) continue;
efficient = eff[i][ex];
mseq1 = seq[i];
mseq2 = seq[ex];
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
if( mseq1[k] == '-' )
{
tmpscore += penalty;
while( mseq1[++k] == '-' )
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
}
if( mseq2[k] == '-' )
{
tmpscore += penalty;
while( mseq2[++k] == '-' )
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
}
}
score += (double)tmpscore * efficient;
/*
fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
*/
}
/*
fprintf( stdout, "total score = %f\n", score );
*/
for( i=0; i<s-1; i++ )
{
for( j=i+1; j<s; j++ )
{
if( i == ex || j == ex ) continue;
efficient = eff[i][j];
mseq1 = seq[i];
mseq2 = seq[j];
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
if( mseq1[k] == '-' )
{
tmpscore += penalty;
while( mseq1[++k] == '-' )
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
}
if( mseq2[k] == '-' )
{
tmpscore += penalty;
while( mseq2[++k] == '-' )
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
}
}
score += (double)tmpscore * efficient;
}
}
/*
fprintf( stderr, "score in score_calc5 = %f\n", score );
*/
return( (double)score );
/*
fprintf( trap_g, "score by fast = %f\n", (float)score );
tmpscore = score = 0.0;
for( i=0; i<s; i++ )
{
if( i == ex ) continue;
tmpscore = Cscore_m_1( seq, i, eff );
fprintf( stdout, "%d %f\n", i, tmpscore );
score += tmpscore;
}
tmpscore = Cscore_m_1( seq, ex, eff );
fprintf( stdout, "ex%d %f\n", i, tmpscore );
score += tmpscore;
return( score );
*/
}
double score_calc4( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
{
int i, j, k;
double c;
int len = strlen( seq[0] );
double score;
long tmpscore;
char *mseq1, *mseq2;
double efficient;
score = 0.0;
c = 0.0;
/*
printf( "in score_calc4\n" );
for( i=0; i<s; i++ )
{
for( j=0; j<s; j++ )
{
printf( "% 5.3f", eff[i][j] );
}
printf( "\n" );
}
*/
for( i=0; i<s-1; i++ )
{
for( j=i+1; j<s; j++ )
{
efficient = eff[i][j];
if( mix == 1 ) efficient = 1.0;
/*
printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
*/
mseq1 = seq[i];
mseq2 = seq[j];
tmpscore = 0;
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
c += efficient;
if( mseq1[k] == '-' )
{
tmpscore += penalty - n_dis[24][0];
while( mseq1[++k] == '-' )
;
k--;
if( k > len-2 ) break;
continue;
}
if( mseq2[k] == '-' )
{
tmpscore += penalty - n_dis[24][0];
while( mseq2[++k] == '-' )
;
k--;
if( k > len-2 ) break;
continue;
}
}
/*
if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
*/
score += (double)tmpscore * efficient;
}
}
score /= c;
return( (double)score );
}
void upg2( int nseq, double **eff, int ***topol, double **len )
{
int i, j, k;
double tmplen[M];
static char **pair = NULL;
if( !pair )
{
pair = AllocateCharMtx( njob, njob );
}
for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
for( i=0; i<nseq; i++ ) pair[i][i] = 1;
for( k=0; k<nseq-1; k++ )
{
float minscore = 9999.0;
int im = -1, jm = -1;
int count;
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
{
if( eff[i][j] < minscore )
{
minscore = eff[i][j];
im = i; jm = j;
}
}
for( i=0, count=0; i<nseq; i++ )
if( pair[im][i] > 0 )
{
topol[k][0][count] = i;
count++;
}
topol[k][0][count] = -1;
for( i=0, count=0; i<nseq; i++ )
if( pair[jm][i] > 0 )
{
topol[k][1][count] = i;
count++;
}
topol[k][1][count] = -1;
len[k][0] = minscore / 2.0 - tmplen[im];
len[k][1] = minscore / 2.0 - tmplen[jm];
tmplen[im] = minscore / 2.0;
for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
for( i=0; i<nseq; i++ )
{
if( i != im && i != jm )
{
eff[MIN(i,im)][MAX(i,im)] =
( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
}
eff[im][jm] = 9999.0;
}
#if DEBUG
printf( "STEP-%03d:\n", k+1 );
printf( "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
printf( "\n" );
printf( "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
printf( "\n" );
#endif
}
}
static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos )
{
int j;
float tmpfloat;
float **effptpt;
Bchain *acptj;
*mindisfrompt = 999.9;
*nearestpt = -1;
// if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
// for( j=pos+1; j<nseq; j++ )
for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
{
j = acptj->pos;
// if( (tmpfloat=*effpt++) < *mindisfrompt )
if( (tmpfloat=eff[pos][j-pos]) < *mindisfrompt )
{
*mindisfrompt = tmpfloat;
*nearestpt = j;
}
}
effptpt = eff;
// for( j=0; j<pos; j++ )
for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
{
j = acptj->pos;
// if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
if( (tmpfloat=eff[j][pos-j]) < *mindisfrompt )
{
*mindisfrompt = tmpfloat;
*nearestpt = j;
}
}
}
static void loadtreeoneline( int *ar, float *len, FILE *fp )
{
static char gett[1000];
fgets( gett, 999, fp );
// fprintf( stderr, "gett=%s\n", gett );
sscanf( gett, "%d %d %a %a", ar, ar+1, len, len+1 );
ar[0]--;
ar[1]--;
if( ar[0] >= ar[1] )
{
fprintf( stderr, "Incorrect guide tree\n" );
exit( 1 );
}
// fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
// fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] );
}
void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
static int *hist = NULL;
static Bchain *ac = NULL;
int im = -1, jm = -1;
Bchain *acjmnext, *acjmprev;
int prevnode;
Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
static int *nmemar;
int nmemim, nmemjm;
float minscore;
int *nearest = NULL; // by D.Mathog, a guess
float *mindisfrom = NULL; // by D.Mathog, a guess
static char **tree;
static char *treetmp;
static char *nametmp;
FILE *fp;
int node[2];
fp = fopen( "_guidetree", "r" );
if( !fp )
{
fprintf( stderr, "cannot open _guidetree\n" );
exit( 1 );
}
if( !hist )
{
hist = AllocateIntVec( njob );
ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
nmemar = AllocateIntVec( njob );
mindisfrom = AllocateFloatVec( njob );
nearest = AllocateIntVec( njob );
treetmp = AllocateCharVec( njob*50 );
nametmp = AllocateCharVec( 30 );
tree = AllocateCharMtx( njob, njob*50 );
}
for( i=0; i<nseq; i++ )
{
for( j=0; j<30; j++ ) nametmp[j] = 0;
for( j=0; j<30; j++ )
{
if( isalnum( name[i][j] ) )
nametmp[j] = name[i][j];
else
nametmp[j] = '_';
}
nametmp[30] = 0;
// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
}
for( i=0; i<nseq; i++ )
{
ac[i].next = ac+i+1;
ac[i].prev = ac+i-1;
ac[i].pos = i;
}
ac[nseq-1].next = NULL;
for( i=0; i<nseq; i++ )
{
hist[i] = -1;
nmemar[i] = 1;
}
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
#if 0
minscore = 999.9;
for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
i = acpti->pos;
// fprintf( stderr, "k=%d i=%d\n", k, i );
if( mindisfrom[i] < minscore ) // muscle
{
im = i;
minscore = mindisfrom[i];
}
}
jm = nearest[im];
if( jm < im )
{
j=jm; jm=im; im=j;
}
#else
minscore = 0.0;
len[k][0] = len[k][1] = -1.0;
loadtreeoneline( node, len[k], fp );
im = node[0];
jm = node[1];
if( len[k][0] == -1.0 || len[k][1] == -1.0 )
{
fprintf( stderr, "\n\nERROR: Branch length is not given.\n" );
exit( 1 );
}
if( len[k][0] < 0.0 ) len[k][0] = 0.0;
if( len[k][1] < 0.0 ) len[k][1] = 0.0;
#endif
prevnode = hist[im];
nmemim = nmemar[im];
// fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
nmemjm = nmemar[jm];
prevnode = hist[jm];
// fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
if( !intpt )
{
fprintf( stderr, "Cannot reallocate topol\n" );
exit( 1 );
}
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
// len[k][0] = ( minscore - tmptmplen[im] );
// len[k][1] = ( minscore - tmptmplen[jm] );
// len[k][0] = -1;
// len[k][1] = -1;
hist[im] = k;
nmemar[im] = nmemim + nmemjm;
mindisfrom[im] = 999.9;
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
}
}
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
strcpy( tree[im], treetmp );
// fprintf( stderr, "im,jm=%d,%d\n", im, jm );
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
acjmprev->next = acjmnext;
if( acjmnext != NULL )
acjmnext->prev = acjmprev;
// free( (void *)eff[jm] ); eff[jm] = NULL;
#if 0 // muscle seems to miss this.
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
if( nearest[i] == im )
{
// fprintf( stderr, "calling setnearest\n" );
// setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
}
}
#endif
#if 0
fprintf( stdout, "vSTEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
fprintf( stdout, "\n" );
#endif
}
fclose( fp );
fp = fopen( "infile.tree", "w" );
fprintf( fp, "%s\n", treetmp );
fprintf( fp, "#by loadtree\n" );
fclose( fp );
FreeCharMtx( tree );
free( treetmp );
free( nametmp );
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
free( (void *)nmemar ); nmemar = NULL;
free( mindisfrom );
free( nearest );
}
static float sueff1, sueff05;
static double sueff1_double, sueff05_double;
static float cluster_mix_float( float d1, float d2 )
{
return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 );
}
static float cluster_average_float( float d1, float d2 )
{
return( ( d1 + d2 ) * 0.5 );
}
static float cluster_minimum_float( float d1, float d2 )
{
return( MIN( d1, d2 ) );
}
static double cluster_mix_double( double d1, double d2 )
{
return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double );
}
static double cluster_average_double( double d1, double d2 )
{
return( ( d1 + d2 ) * 0.5 );
}
static double cluster_minimum_double( double d1, double d2 )
{
return( MIN( d1, d2 ) );
}
void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes branch length BUG!!
{
int i, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
static Bchain *ac = NULL;
float eff1, eff0;
static float *tmptmplen = NULL;
static int *hist = NULL;
int im = -1, jm = -1;
Bchain *acjmnext, *acjmprev;
int prevnode;
Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
static int *nmemar;
int nmemim, nmemjm;
float minscore;
static char **tree;
static char *treetmp;
FILE *fp;
int node[2];
float dumfl[2];
float (*clusterfuncpt[1])(float,float);
sueff1 = 1 - SUEFF;
sueff05 = SUEFF * 0.5;
if ( treemethod == 'X' )
clusterfuncpt[0] = cluster_mix_float;
else if ( treemethod == 'E' )
clusterfuncpt[0] = cluster_average_float;
else if ( treemethod == 'q' )
clusterfuncpt[0] = cluster_minimum_float;
else
{
fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
exit( 1 );
}
fp = fopen( "_guidetree", "r" );
if( !fp )
{
fprintf( stderr, "cannot open _guidetree\n" );
exit( 1 );
}
if( !hist )
{
treetmp = AllocateCharVec( njob*50 );
tree = AllocateCharMtx( njob, njob*50 );
hist = AllocateIntVec( njob );
tmptmplen = AllocateFloatVec( njob );
ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
nmemar = AllocateIntVec( njob );
}
for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
for( i=0; i<nseq; i++ )
{
ac[i].next = ac+i+1;
ac[i].prev = ac+i-1;
ac[i].pos = i;
}
ac[nseq-1].next = NULL;
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ )
{
hist[i] = -1;
nmemar[i] = 1;
}
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
#if 0
minscore = 99999.9;
for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
effpt = eff[i=acpti->pos];
// i = acpti->pos;
for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
{
// j=acptj->pos;
// tmpfloat = eff[i][j-i];
// if( tmpfloat < minscore )
if( (tmpfloat= effpt[(j=acptj->pos)-i]) < minscore )
{
minscore = tmpfloat;
im = i; jm = j;
}
}
}
// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
#else
dumfl[0] = dumfl[1] = -1.0;
loadtreeoneline( node, dumfl, fp );
im = node[0];
jm = node[1];
minscore = eff[im][jm-im];
// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
{
fprintf( stderr, "\n\nERROR: Branch length should not be given.\n" );
exit( 1 );
}
#endif
prevnode = hist[im];
nmemim = nmemar[im];
intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
prevnode = hist[jm];
nmemjm = nmemar[jm];
intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
if( !intpt )
{
fprintf( stderr, "Cannot reallocate topol\n" );
exit( 1 );
}
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
len[k][0] = ( minscore - tmptmplen[im] );
len[k][1] = ( minscore - tmptmplen[jm] );
if( len[k][0] < 0.0 ) len[k][0] = 0.0;
if( len[k][1] < 0.0 ) len[k][1] = 0.0;
tmptmplen[im] = minscore;
hist[im] = k;
nmemar[im] = nmemim + nmemjm;
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim-miniim];
eff1 = eff[minijm][maxijm-minijm];
#if 0
eff[miniim][maxiim-miniim] =
MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
#else
eff[miniim][maxiim-miniim] =
(clusterfuncpt[0])( eff0, eff1 );
#endif
}
}
// sprintf( treetmp, "(%s,%s)", tree[im], tree[jm] );
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
strcpy( tree[im], treetmp );
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
acjmprev->next = acjmnext;
if( acjmnext != NULL )
acjmnext->prev = acjmprev;
free( (void *)eff[jm] ); eff[jm] = NULL;
#if 0
fprintf( stdout, "vSTEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
fprintf( stdout, "\n" );
#endif
}
#if 1
fclose( fp );
fp = fopen( "infile.tree", "w" );
fprintf( fp, "%s\n", treetmp );
fprintf( fp, "by loadtop\n" );
fclose( fp );
#endif
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
free( (void *)nmemar ); nmemar = NULL;
}
void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
float tmpfloat;
float eff1, eff0;
static float *tmptmplen = NULL;
static int *hist = NULL;
static Bchain *ac = NULL;
int im = -1, jm = -1;
Bchain *acjmnext, *acjmprev;
int prevnode;
Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
static int *nmemar;
int nmemim, nmemjm;
float minscore;
int *nearest = NULL; // by D.Mathog, a guess
float *mindisfrom = NULL; // by D.Mathog, a guess
static char **tree;
static char *treetmp;
static char *nametmp;
FILE *fp;
float (*clusterfuncpt[1])(float,float);
sueff1 = 1 - SUEFF;
sueff05 = SUEFF * 0.5;
if ( treemethod == 'X' )
clusterfuncpt[0] = cluster_mix_float;
else if ( treemethod == 'E' )
clusterfuncpt[0] = cluster_average_float;
else if ( treemethod == 'q' )
clusterfuncpt[0] = cluster_minimum_float;
else
{
fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
exit( 1 );
}
if( !hist )
{
hist = AllocateIntVec( njob );
tmptmplen = AllocateFloatVec( njob );
ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
nmemar = AllocateIntVec( njob );
mindisfrom = AllocateFloatVec( njob );
nearest = AllocateIntVec( njob );
treetmp = AllocateCharVec( njob*50 );
nametmp = AllocateCharVec( 30 );
tree = AllocateCharMtx( njob, njob*50 );
}
for( i=0; i<nseq; i++ )
{
for( j=0; j<30; j++ ) nametmp[j] = 0;
for( j=0; j<30; j++ )
{
if( isalnum( name[i][j] ) )
nametmp[j] = name[i][j];
else
nametmp[j] = '_';
}
nametmp[30] = 0;
// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
}
for( i=0; i<nseq; i++ )
{
ac[i].next = ac+i+1;
ac[i].prev = ac+i-1;
ac[i].pos = i;
}
ac[nseq-1].next = NULL;
for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ )
{
hist[i] = -1;
nmemar[i] = 1;
}
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
minscore = 999.9;
for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
i = acpti->pos;
// fprintf( stderr, "k=%d i=%d\n", k, i );
if( mindisfrom[i] < minscore ) // muscle
{
im = i;
minscore = mindisfrom[i];
}
}
jm = nearest[im];
if( jm < im )
{
j=jm; jm=im; im=j;
}
prevnode = hist[im];
nmemim = nmemar[im];
intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
prevnode = hist[jm];
nmemjm = nmemar[jm];
intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
if( !intpt )
{
fprintf( stderr, "Cannot reallocate topol\n" );
exit( 1 );
}
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
len[k][0] = ( minscore - tmptmplen[im] );
len[k][1] = ( minscore - tmptmplen[jm] );
tmptmplen[im] = minscore;
hist[im] = k;
nmemar[im] = nmemim + nmemjm;
mindisfrom[im] = 999.9;
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim-miniim];
eff1 = eff[minijm][maxijm-minijm];
#if 0
tmpfloat = eff[miniim][maxiim-miniim] =
MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
#else
tmpfloat = eff[miniim][maxiim-miniim] =
(clusterfuncpt[0])( eff0, eff1 );
#endif
if( tmpfloat < mindisfrom[i] )
{
mindisfrom[i] = tmpfloat;
nearest[i] = im;
}
if( tmpfloat < mindisfrom[im] )
{
mindisfrom[im] = tmpfloat;
nearest[im] = i;
}
if( nearest[i] == jm )
{
nearest[i] = im;
}
}
}
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
strcpy( tree[im], treetmp );
// fprintf( stderr, "im,jm=%d,%d\n", im, jm );
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
acjmprev->next = acjmnext;
if( acjmnext != NULL )
acjmnext->prev = acjmprev;
free( (void *)eff[jm] ); eff[jm] = NULL;
#if 1 // muscle seems to miss this.
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
if( nearest[i] == im )
{
// fprintf( stderr, "calling setnearest\n" );
setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
}
}
#endif
#if 0
fprintf( stdout, "vSTEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
fprintf( stdout, "\n" );
#endif
}
fp = fopen( "infile.tree", "w" );
fprintf( fp, "%s\n", treetmp );
fclose( fp );
FreeCharMtx( tree );
free( treetmp );
free( nametmp );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
free( (void *)nmemar ); nmemar = NULL;
free( mindisfrom );
free( nearest );
}
void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
float tmpfloat;
float eff1, eff0;
static float *tmptmplen = NULL;
static int *hist = NULL;
static Bchain *ac = NULL;
int im = -1, jm = -1;
Bchain *acjmnext, *acjmprev;
int prevnode;
Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
static int *nmemar;
int nmemim, nmemjm;
float minscore;
// float sueff1 = 1 - SUEFF;
// float sueff05 = SUEFF * 0.5;
int *nearest = NULL; // by Mathog, a guess
float *mindisfrom = NULL; // by Mathog, a guess
float (*clusterfuncpt[1])(float,float);
sueff1 = 1 - SUEFF;
sueff05 = SUEFF * 0.5;
if ( treemethod == 'X' )
clusterfuncpt[0] = cluster_mix_float;
else if ( treemethod == 'E' )
clusterfuncpt[0] = cluster_average_float;
else if ( treemethod == 'q' )
clusterfuncpt[0] = cluster_minimum_float;
else
{
fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
exit( 1 );
}
if( !hist )
{
hist = AllocateIntVec( njob );
tmptmplen = AllocateFloatVec( njob );
ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
nmemar = AllocateIntVec( njob );
mindisfrom = AllocateFloatVec( njob );
nearest = AllocateIntVec( njob );
}
for( i=0; i<nseq; i++ )
{
ac[i].next = ac+i+1;
ac[i].prev = ac+i-1;
ac[i].pos = i;
}
ac[nseq-1].next = NULL;
for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ )
{
hist[i] = -1;
nmemar[i] = 1;
}
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
minscore = 999.9;
for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
i = acpti->pos;
// fprintf( stderr, "k=%d i=%d\n", k, i );
if( mindisfrom[i] < minscore ) // muscle
{
im = i;
minscore = mindisfrom[i];
}
}
jm = nearest[im];
if( jm < im )
{
j=jm; jm=im; im=j;
}
prevnode = hist[im];
nmemim = nmemar[im];
intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
prevnode = hist[jm];
nmemjm = nmemar[jm];
intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
if( !intpt )
{
fprintf( stderr, "Cannot reallocate topol\n" );
exit( 1 );
}
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
len[k][0] = ( minscore - tmptmplen[im] );
len[k][1] = ( minscore - tmptmplen[jm] );
tmptmplen[im] = minscore;
hist[im] = k;
nmemar[im] = nmemim + nmemjm;
mindisfrom[im] = 999.9;
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim-miniim];
eff1 = eff[minijm][maxijm-minijm];
tmpfloat = eff[miniim][maxiim-miniim] =
#if 0
MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
#else
(clusterfuncpt[0])( eff0, eff1 );
#endif
if( tmpfloat < mindisfrom[i] )
{
mindisfrom[i] = tmpfloat;
nearest[i] = im;
}
if( tmpfloat < mindisfrom[im] )
{
mindisfrom[im] = tmpfloat;
nearest[im] = i;
}
if( nearest[i] == jm )
{
nearest[i] = im;
}
}
}
// fprintf( stderr, "im,jm=%d,%d\n", im, jm );
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
acjmprev->next = acjmnext;
if( acjmnext != NULL )
acjmnext->prev = acjmprev;
free( (void *)eff[jm] ); eff[jm] = NULL;
#if 1 // muscle seems to miss this.
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
if( nearest[i] == im )
{
// fprintf( stderr, "calling setnearest\n" );
setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
}
}
#endif
#if 0
fprintf( stdout, "vSTEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
fprintf( stdout, "\n" );
#endif
}
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
free( (void *)nmemar ); nmemar = NULL;
free( mindisfrom );
free( nearest );
}
void veryfastsupg_double_loadtop( int nseq, double **eff, int ***topol, double **len ) // BUG!!!
{
int i, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
double eff1, eff0;
static double *tmptmplen = NULL;
static int *hist = NULL;
static Achain *ac = NULL;
double minscore;
static char **tree;
static char *treetmp;
int im = -1, jm = -1;
int prevnode, acjmnext, acjmprev;
int *pt1, *pt2, *pt11, *pt22;
FILE *fp;
int node[2];
float dumfl[2];
fp = fopen( "_guidetree", "r" );
if( !fp )
{
fprintf( stderr, "cannot open _guidetree\n" );
exit( 1 );
}
if( !hist )
{
treetmp = AllocateCharVec( njob*50 );
tree = AllocateCharMtx( njob, njob*50 );
hist = AllocateIntVec( njob );
tmptmplen = (double *)malloc( njob * sizeof( double ) );
ac = (Achain *)malloc( njob * sizeof( Achain ) );
}
for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
for( i=0; i<nseq; i++ )
{
ac[i].next = i+1;
ac[i].prev = i-1;
// ac[i].curr = i;
}
ac[nseq-1].next = -1;
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) hist[i] = -1;
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
#if 0
minscore = 99999.9;
for( i=0; ac[i].next!=-1; i=ac[i].next )
{
for( j=ac[i].next; j!=-1; j=ac[j].next )
{
tmpdouble = eff[i][j];
if( tmpdouble < minscore )
{
minscore = tmpdouble;
im = i; jm = j;
}
}
}
#else
dumfl[0] = dumfl[1] = -1.0;
loadtreeoneline( node, dumfl, fp );
im = node[0];
jm = node[1];
minscore = eff[im][jm];
// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
{
fprintf( stderr, "\n\nBranch length should not given.\n" );
exit( 1 );
}
#endif
// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
intpt = topol[k][0];
prevnode = hist[im];
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
intpt = topol[k][1];
prevnode = hist[jm];
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
len[k][0] = minscore - tmptmplen[im];
len[k][1] = minscore - tmptmplen[jm];
if( len[k][0] < 0.0 ) len[k][0] = 0.0;
if( len[k][1] < 0.0 ) len[k][1] = 0.0;
tmptmplen[im] = minscore;
hist[im] = k;
for( i=0; i!=-1; i=ac[i].next )
{
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
eff[miniim][maxiim] =
MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
( eff0 + eff1 ) * 0.5 * SUEFF;
}
}
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
ac[acjmprev].next = acjmnext;
if( acjmnext != -1 )
ac[acjmnext].prev = acjmprev;
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
strcpy( tree[im], treetmp );
#if 0
fprintf( stdout, "STEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
fprintf( stdout, "\n" );
#endif
}
fclose( fp );
fp = fopen( "infile.tree", "w" );
fprintf( fp, "%s\n", treetmp );
// fprintf( fp, "by veryfastsupg_double_loadtop\n" );
fclose( fp );
#if 1
fprintf( stderr, "\n" );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
FreeCharMtx( tree );
free( treetmp );
#endif
}
void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len )
{
int i, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
double eff1, eff0;
static double *tmptmplen = NULL;
static int *hist = NULL;
static Achain *ac = NULL;
double minscore;
static char **tree;
static char *treetmp;
int im = -1, jm = -1;
int prevnode, acjmnext, acjmprev;
int *pt1, *pt2, *pt11, *pt22;
FILE *fp;
int node[2];
float lenfl[2];
fp = fopen( "_guidetree", "r" );
if( !fp )
{
fprintf( stderr, "cannot open _guidetree\n" );
exit( 1 );
}
if( !hist )
{
treetmp = AllocateCharVec( njob*50 );
tree = AllocateCharMtx( njob, njob*50 );
hist = AllocateIntVec( njob );
tmptmplen = (double *)malloc( njob * sizeof( double ) );
ac = (Achain *)malloc( njob * sizeof( Achain ) );
}
for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
for( i=0; i<nseq; i++ )
{
ac[i].next = i+1;
ac[i].prev = i-1;
// ac[i].curr = i;
}
ac[nseq-1].next = -1;
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) hist[i] = -1;
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
#if 0
minscore = 99999.9;
for( i=0; ac[i].next!=-1; i=ac[i].next )
{
for( j=ac[i].next; j!=-1; j=ac[j].next )
{
tmpdouble = eff[i][j];
if( tmpdouble < minscore )
{
minscore = tmpdouble;
im = i; jm = j;
}
}
}
#else
lenfl[0] = lenfl[1] = -1.0;
loadtreeoneline( node, lenfl, fp );
im = node[0];
jm = node[1];
minscore = eff[im][jm];
// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
{
fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" );
exit( 1 );
}
if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
#endif
// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
intpt = topol[k][0];
prevnode = hist[im];
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
intpt = topol[k][1];
prevnode = hist[jm];
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
#if 0
len[k][0] = minscore - tmptmplen[im];
len[k][1] = minscore - tmptmplen[jm];
#else
len[k][0] = lenfl[0];
len[k][1] = lenfl[1];
#endif
tmptmplen[im] = minscore;
hist[im] = k;
for( i=0; i!=-1; i=ac[i].next )
{
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
eff[miniim][maxiim] =
MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
( eff0 + eff1 ) * 0.5 * SUEFF;
}
}
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
ac[acjmprev].next = acjmnext;
if( acjmnext != -1 )
ac[acjmnext].prev = acjmprev;
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
strcpy( tree[im], treetmp );
#if 0
fprintf( stdout, "STEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
fprintf( stdout, "\n" );
#endif
}
fclose( fp );
fp = fopen( "infile.tree", "w" );
fprintf( fp, "%s\n", treetmp );
// fprintf( fp, "by veryfastsupg_double_loadtree\n" );
fclose( fp );
#if 1
fprintf( stderr, "\n" );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
FreeCharMtx( tree );
free( treetmp );
#endif
}
#if 0
void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
double tmpdouble;
double eff1, eff0;
static double *tmptmplen = NULL;
static int *hist = NULL;
static Achain *ac = NULL;
double minscore;
int im = -1, jm = -1;
int prevnode, acjmnext, acjmprev;
int *pt1, *pt2, *pt11, *pt22;
if( !hist )
{
hist = AllocateIntVec( njob );
tmptmplen = (double *)malloc( njob * sizeof( double ) );
ac = (Achain *)malloc( njob * sizeof( Achain ) );
}
for( i=0; i<nseq; i++ )
{
ac[i].next = i+1;
ac[i].prev = i-1;
// ac[i].curr = i;
}
ac[nseq-1].next = -1;
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) hist[i] = -1;
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
minscore = 99999.9;
for( i=0; ac[i].next!=-1; i=ac[i].next )
{
for( j=ac[i].next; j!=-1; j=ac[j].next )
{
tmpdouble = eff[i][j];
if( tmpdouble < minscore )
{
minscore = tmpdouble;
im = i; jm = j;
}
}
}
// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
intpt = topol[k][0];
prevnode = hist[im];
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
intpt = topol[k][1];
prevnode = hist[jm];
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
len[k][0] = minscore - tmptmplen[im];
len[k][1] = minscore - tmptmplen[jm];
tmptmplen[im] = minscore;
hist[im] = k;
for( i=0; i!=-1; i=ac[i].next )
{
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
eff[miniim][maxiim] =
MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
( eff0 + eff1 ) * 0.5 * SUEFF;
}
}
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
ac[acjmprev].next = acjmnext;
if( acjmnext != -1 )
ac[acjmnext].prev = acjmprev;
#if 0
fprintf( stdout, "STEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
fprintf( stdout, "\n" );
#endif
}
#if 1
fprintf( stderr, "\n" );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
#endif
}
#endif
void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char name[M][B] )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
double tmpdouble;
double eff1, eff0;
static double *tmptmplen = NULL;
static int *hist = NULL;
static Achain *ac = NULL;
double minscore;
static char **tree;
static char *treetmp;
static char *nametmp;
FILE *fpout;
int im = -1, jm = -1;
int prevnode, acjmnext, acjmprev;
int *pt1, *pt2, *pt11, *pt22;
double (*clusterfuncpt[1])(double,double);
sueff1_double = 1 - SUEFF;
sueff05_double = SUEFF * 0.5;
if ( treemethod == 'X' )
clusterfuncpt[0] = cluster_mix_double;
else if ( treemethod == 'E' )
clusterfuncpt[0] = cluster_average_double;
else if ( treemethod == 'q' )
clusterfuncpt[0] = cluster_minimum_double;
else
{
fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
exit( 1 );
}
if( !hist )
{
treetmp = AllocateCharVec( njob*50 );
tree = AllocateCharMtx( njob, njob*50 );
hist = AllocateIntVec( njob );
tmptmplen = (double *)malloc( njob * sizeof( double ) );
ac = (Achain *)malloc( njob * sizeof( Achain ) );
nametmp = AllocateCharVec( 30 );
}
// for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
for( i=0; i<nseq; i++ )
{
for( j=0; j<30; j++ ) nametmp[j] = 0;
for( j=0; j<30; j++ )
{
if( isalnum( name[i][j] ) )
nametmp[j] = name[i][j];
else
nametmp[j] = '_';
}
nametmp[30] = 0;
sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
}
for( i=0; i<nseq; i++ )
{
ac[i].next = i+1;
ac[i].prev = i-1;
// ac[i].curr = i;
}
ac[nseq-1].next = -1;
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) hist[i] = -1;
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
minscore = 99999.9;
for( i=0; ac[i].next!=-1; i=ac[i].next )
{
for( j=ac[i].next; j!=-1; j=ac[j].next )
{
tmpdouble = eff[i][j];
if( tmpdouble < minscore )
{
minscore = tmpdouble;
im = i; jm = j;
}
}
}
// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
intpt = topol[k][0];
prevnode = hist[im];
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
intpt = topol[k][1];
prevnode = hist[jm];
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
len[k][0] = minscore - tmptmplen[im];
len[k][1] = minscore - tmptmplen[jm];
tmptmplen[im] = minscore;
hist[im] = k;
for( i=0; i!=-1; i=ac[i].next )
{
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
#if 0
eff[miniim][maxiim] =
MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
( eff0 + eff1 ) * 0.5 * SUEFF;
#else
eff[miniim][maxiim] =
(clusterfuncpt[0])( eff0, eff1 );
#endif
}
}
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
ac[acjmprev].next = acjmnext;
if( acjmnext != -1 )
ac[acjmnext].prev = acjmprev;
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
strcpy( tree[im], treetmp );
#if 0
fprintf( stdout, "STEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
fprintf( stdout, "\n" );
#endif
}
fpout = fopen( "infile.tree", "w" );
fprintf( fpout, "%s\n", treetmp );
// fprintf( fpout, "by veryfastsupg_double_outtree\n" );
fclose( fpout );
#if 1
fprintf( stderr, "\n" );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
FreeCharMtx( tree );
free( treetmp );
free( nametmp );
#endif
}
void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
int tmpint;
int eff1, eff0;
static double *tmptmplen = NULL;
static int **eff = NULL;
static int *hist = NULL;
static Achain *ac = NULL;
int minscore;
double minscoref;
int im = -1, jm = -1;
int prevnode, acjmnext, acjmprev;
int *pt1, *pt2, *pt11, *pt22;
if( !eff )
{
eff = AllocateIntMtx( njob, njob );
hist = AllocateIntVec( njob );
tmptmplen = (double *)malloc( njob * sizeof( double ) );
ac = (Achain *)malloc( njob * sizeof( Achain ) );
}
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
}
}
for( i=0; i<nseq; i++ )
{
ac[i].next = i+1;
ac[i].prev = i-1;
// ac[i].curr = i;
}
ac[nseq-1].next = -1;
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) hist[i] = -1;
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
minscore = INTMTXSCALE*4;
for( i=0; ac[i].next!=-1; i=ac[i].next )
{
for( j=ac[i].next; j!=-1; j=ac[j].next )
{
tmpint = eff[i][j];
if( tmpint < minscore )
{
minscore = tmpint;
im = i; jm = j;
}
}
}
minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
#if 1
intpt = topol[k][0];
prevnode = hist[im];
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
intpt = topol[k][1];
prevnode = hist[jm];
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
#else
intpt = topol[k][0];
for( i=0; i<nseq; i++ )
if( pair[im][i] > -2 )
*intpt++ = i;
*intpt = -1;
intpt = topol[k][1];
for( i=0; i<nseq; i++ )
if( pair[jm][i] > -2 )
*intpt++ = i;
*intpt = -1;
#endif
len[k][0] = minscoref - tmptmplen[im];
len[k][1] = minscoref - tmptmplen[jm];
tmptmplen[im] = minscoref;
hist[im] = k;
for( i=0; i!=-1; i=ac[i].next )
{
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
eff[miniim][maxiim] =
MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
( eff0 + eff1 ) * 0.5 * SUEFF;
}
}
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
ac[acjmprev].next = acjmnext;
if( acjmnext != -1 )
ac[acjmnext].prev = acjmprev;
#if 0
fprintf( stdout, "STEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
fprintf( stdout, "\n" );
#endif
}
#if 1
FreeIntMtx( eff ); eff = NULL;
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
#endif
}
void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len )
/* len$B$O!"(B oeff$B$,@0?t!#(Blen$B$b<B$O@0?t!#(B
$BI,MW$K1~$8$F3d$C$F;H$&!#(B */
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
int tmpint;
int eff1, eff0;
static int *tmptmplen = NULL;
static int **eff = NULL;
static int *hist = NULL;
static Achain *ac = NULL;
int minscore;
int im = -1, jm = -1;
int prevnode, acjmnext, acjmprev;
int *pt1, *pt2, *pt11, *pt22;
if( !eff )
{
eff = AllocateIntMtx( njob, njob );
hist = AllocateIntVec( njob );
tmptmplen = AllocateIntVec( njob );
ac = (Achain *)malloc( njob * sizeof( Achain ) );
}
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
eff[i][j] = ( oeff[i][j] );
}
}
for( i=0; i<nseq; i++ )
{
ac[i].next = i+1;
ac[i].prev = i-1;
// ac[i].curr = i;
}
ac[nseq-1].next = -1;
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0;
for( i=0; i<nseq; i++ ) hist[i] = -1;
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
minscore = INTMTXSCALE*4;
for( i=0; ac[i].next!=-1; i=ac[i].next )
{
for( j=ac[i].next; j!=-1; j=ac[j].next )
{
tmpint = eff[i][j];
if( tmpint < minscore )
{
minscore = tmpint;
im = i; jm = j;
}
}
}
// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
intpt = topol[k][0];
prevnode = hist[im];
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
intpt = topol[k][1];
prevnode = hist[jm];
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
else
{
pt1 = topol[prevnode][0];
pt2 = topol[prevnode][1];
if( *pt1 > *pt2 )
{
pt11 = pt2;
pt22 = pt1;
}
else
{
pt11 = pt1;
pt22 = pt2;
}
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
}
minscore *= 0.5;
len[k][0] = (double)( minscore - tmptmplen[im] );
len[k][1] = (double)( minscore - tmptmplen[jm] );
tmptmplen[im] = minscore;
#if 0
free( tmptmplen );
tmptmplen = AllocateIntVec( nseq );
#endif
hist[im] = k;
for( i=0; i!=-1; i=ac[i].next )
{
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
eff[miniim][maxiim] =
(int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF );
}
}
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
ac[acjmprev].next = acjmnext;
if( acjmnext != -1 )
ac[acjmnext].prev = acjmprev;
#if 0
fprintf( stdout, "STEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
fprintf( stdout, "\n" );
#endif
}
FreeIntMtx( eff ); eff = NULL;
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
}
void fastsupg( int nseq, double **oeff, int ***topol, double **len )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
#if 0
double eff[nseq][nseq];
char pair[njob][njob];
#else
static float *tmplen;
int *intpt;
float tmpfloat;
float eff1, eff0;
static float **eff = NULL;
static char **pair = NULL;
static Achain *ac;
float minscore;
int im = -1, jm = -1;
if( !eff )
{
eff = AllocateFloatMtx( njob, njob );
pair = AllocateCharMtx( njob, njob );
tmplen = AllocateFloatVec( njob );
ac = (Achain *)calloc( njob, sizeof( Achain ) );
}
#endif
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
eff[i][j] = (float)oeff[i][j];
}
}
for( i=0; i<nseq; i++ )
{
ac[i].next = i+1;
ac[i].prev = i-1;
// ac[i].curr = i;
}
ac[nseq-1].next = -1;
for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
for( i=0; i<nseq; i++ ) pair[i][i] = 1;
fprintf( stderr, "\n" );
for( k=0; k<nseq-1; k++ )
{
if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
minscore = 9999.0;
for( i=0; ac[i].next!=-1; i=ac[i].next )
// for( i=0; i<nseq-1; i++ )
{
for( j=ac[i].next; j!=-1; j=ac[j].next )
// for( j=i+1; j<nseq; j++ )
{
tmpfloat = eff[i][j];
if( tmpfloat < minscore )
{
minscore = tmpfloat;
im = i; jm = j;
}
}
}
// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
intpt = topol[k][0];
for( i=0; i<nseq; i++ )
if( pair[im][i] > 0 )
*intpt++ = i;
*intpt = -1;
intpt = topol[k][1];
for( i=0; i<nseq; i++ )
if( pair[jm][i] > 0 )
*intpt++ = i;
*intpt = -1;
minscore /= 2.0;
len[k][0] = (double)minscore - tmplen[im];
len[k][1] = (double)minscore - tmplen[jm];
tmplen[im] = (double)minscore;
for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
// for( i=0; i<nseq; i++ )
for( i=0; i!=-1; i=ac[i].next )
{
if( i != im && i != jm )
{
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
eff[miniim][maxiim] =
MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
( eff0 + eff1 ) * 0.5 * SUEFF;
// eff[minijm][maxijm] = 9999.0;
}
}
ac[ac[jm].prev].next = ac[jm].next;
ac[ac[jm].next].prev = ac[jm].prev;
// eff[im][jm] = 9999.0;
#if 0
fprintf( stderr, "STEP-%03d:\n", k+1 );
fprintf( stderr, "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
fprintf( stderr, "\n" );
fprintf( stderr, "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
fprintf( stderr, "\n" );
#endif
}
fprintf( stderr, "\n" );
// FreeFloatMtx( eff );
// FreeCharMtx( pair );
// FreeFloatVec( tmplen );
// free( ac );
}
void supg( int nseq, double **oeff, int ***topol, double **len )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
#if 0
double eff[nseq][nseq];
char pair[njob][njob];
#else
static float *tmplen;
int *intpt;
float **floatptpt;
float *floatpt;
float tmpfloat;
float eff1, eff0;
static float **eff = NULL;
static char **pair = NULL;
if( !eff )
{
eff = AllocateFloatMtx( njob, njob );
pair = AllocateCharMtx( njob, njob );
tmplen = AllocateFloatVec( njob );
}
#endif
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
eff[i][j] = (float)oeff[i][j];
}
}
for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
for( i=0; i<nseq; i++ ) pair[i][i] = 1;
for( k=0; k<nseq-1; k++ )
{
float minscore = 9999.0;
int im = -1, jm = -1;
floatptpt = eff;
for( i=0; i<nseq-1; i++ )
{
floatpt = *floatptpt++ + i + 1;
for( j=i+1; j<nseq; j++ )
{
tmpfloat = *floatpt++;
if( tmpfloat < minscore )
{
minscore = tmpfloat;
im = i; jm = j;
}
}
}
intpt = topol[k][0];
for( i=0; i<nseq; i++ )
if( pair[im][i] > 0 )
*intpt++ = i;
*intpt = -1;
intpt = topol[k][1];
for( i=0; i<nseq; i++ )
if( pair[jm][i] > 0 )
*intpt++ = i;
*intpt = -1;
len[k][0] = (double)minscore / 2.0 - tmplen[im];
len[k][1] = (double)minscore / 2.0 - tmplen[jm];
tmplen[im] = (double)minscore / 2.0;
for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
for( i=0; i<nseq; i++ )
{
if( i != im && i != jm )
{
#if 1
if( i < im )
{
miniim = i;
maxiim = im;
minijm = i;
maxijm = jm;
}
else if( i < jm )
{
miniim = im;
maxiim = i;
minijm = i;
maxijm = jm;
}
else
{
miniim = im;
maxiim = i;
minijm = jm;
maxijm = i;
}
#else
miniim = MIN( i, im );
maxiim = MAX( i, im );
minijm = MIN( i, jm );
maxijm = MAX( i, jm );
#endif
#if 1
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
eff[miniim][maxiim] =
MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
( eff0 + eff1 ) * 0.5 * SUEFF;
#else
MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) +
( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF;
#endif
eff[minijm][maxijm] = 9999.0;
eff[im][jm] = 9999.0;
}
}
#if DEBUG
printf( "STEP-%03d:\n", k+1 );
printf( "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
printf( "\n" );
printf( "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
printf( "\n" );
#endif
}
}
void spg( int nseq, double **oeff, int ***topol, double **len )
{
int i, j, k;
double tmplen[M];
#if 0
double eff[nseq][nseq];
char pair[njob][njob];
#else
double **eff = NULL;
char **pair = NULL;
if( !eff )
{
eff = AllocateDoubleMtx( njob, njob );
pair = AllocateCharMtx( njob, njob );
}
#endif
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
for( i=0; i<nseq; i++ ) pair[i][i] = 1;
for( k=0; k<nseq-1; k++ )
{
float minscore = 9999.0;
int im = -1, jm = -1;
int count;
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
{
if( eff[i][j] < minscore )
{
minscore = eff[i][j];
im = i; jm = j;
}
}
for( i=0, count=0; i<nseq; i++ )
if( pair[im][i] > 0 )
{
topol[k][0][count] = i;
count++;
}
topol[k][0][count] = -1;
for( i=0, count=0; i<nseq; i++ )
if( pair[jm][i] > 0 )
{
topol[k][1][count] = i;
count++;
}
topol[k][1][count] = -1;
len[k][0] = minscore / 2.0 - tmplen[im];
len[k][1] = minscore / 2.0 - tmplen[jm];
tmplen[im] = minscore / 2.0;
for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
for( i=0; i<nseq; i++ )
{
if( i != im && i != jm )
{
eff[MIN(i,im)][MAX(i,im)] =
MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
}
eff[im][jm] = 9999.0;
}
#if DEBUG
printf( "STEP-%03d:\n", k+1 );
printf( "len0 = %f\n", len[k][0] );
for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
printf( "\n" );
printf( "len1 = %f\n", len[k][1] );
for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
printf( "\n" );
#endif
}
}
double ipower( double x, int n ) /* n > 0 */
{
double r;
r = 1;
while( n != 0 )
{
if( n & 1 ) r *= x;
x *= x; n >>= 1;
}
return( r );
}
void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
{
int i, j, k, s1, s2;
static double rootnode[M];
if( nseq-2 < 0 )
{
fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq );
exit( 1 );
}
for( i=0; i<nseq; i++ ) rootnode[i] = 0;
for( i=0; i<nseq-2; i++ )
{
for( j=0; topol[i][0][j]>-1; j++ )
rootnode[topol[i][0][j]]++;
for( j=0; topol[i][1][j]>-1; j++ )
rootnode[topol[i][1][j]]++;
for( j=0; topol[i][0][j]>-1; j++ )
{
s1 = topol[i][0][j];
for( k=0; topol[i][1][k]>-1; k++ )
{
s2 = topol[i][1][k];
node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
}
}
}
for( j=0; topol[nseq-2][0][j]>-1; j++ )
{
s1 = topol[nseq-2][0][j];
for( k=0; topol[nseq-2][1][k]>-1; k++ )
{
s2 = topol[nseq-2][1][k];
node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
}
}
}
void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */
{
int i, j, k, s1, s2;
int rootnode[M];
for( i=0; i<nseq; i++ ) rootnode[i] = 0;
for( i=0; i<nseq-2; i++ )
{
for( j=0; topol[i][0][j]>-1; j++ )
rootnode[topol[i][0][j]]++;
for( j=0; topol[i][1][j]>-1; j++ )
rootnode[topol[i][1][j]]++;
for( j=0; topol[i][0][j]>-1; j++ )
{
s1 = topol[i][0][j];
for( k=0; topol[i][1][k]>-1; k++ )
{
s2 = topol[i][1][k];
node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
}
}
}
for( j=0; topol[nseq-2][0][j]>-1; j++ )
{
s1 = topol[nseq-2][0][j];
for( k=0; topol[nseq-2][1][k]>-1; k++ )
{
s2 = topol[nseq-2][1][k];
node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
}
}
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
node[j][i] = node[i][j];
#if DEBUG
fprintf( stderr, "node[][] in countnode_int" );
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
fprintf( stderr, "%#3d", node[i][j] );
}
fprintf( stderr, "\n" );
}
#endif
}
void counteff_simple_float( int nseq, int ***topol, float **len, double *node )
{
int i, j, s1, s2;
double total;
static double rootnode[M];
static double eff[M];
#if DEBUG
for( i=0; i<nseq; i++ ){
fprintf( stderr, "len0 = %f\n", len[i][0] );
fprintf( stderr, "len1 = %f\n", len[i][1] );
}
#endif
for( i=0; i<nseq; i++ )
{
rootnode[i] = 0.0;
eff[i] = 1.0;
/*
rootnode[i] = 1.0;
*/
}
for( i=0; i<nseq-1; i++ )
{
for( j=0; (s1=topol[i][0][j]) > -1; j++ )
{
rootnode[s1] += (double)len[i][0] * eff[s1];
eff[s1] *= 0.5;
/*
rootnode[s1] *= 0.5;
*/
}
for( j=0; (s2=topol[i][1][j]) > -1; j++ )
{
rootnode[s2] += (double)len[i][1] * eff[s2];
eff[s2] *= 0.5;
/*
rootnode[s2] *= 0.5;
*/
}
}
for( i=0; i<nseq; i++ )
{
#if 1 /* 97.9.29 */
rootnode[i] += GETA3;
#endif
#if 0
fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
#endif
}
#if 1
total = 0.0;
for( i=0; i<nseq; i++ )
{
total += rootnode[i];
}
#else
total = 1.0;
#endif
for( i=0; i<nseq; i++ )
{
node[i] = rootnode[i] / total;
}
#if 0
fprintf( stderr, "weight array in counteff_simple\n" );
for( i=0; i<nseq; i++ )
fprintf( stderr, "%f\n", node[i] );
printf( "\n" );
exit( 1 );
#endif
}
void counteff_simple( int nseq, int ***topol, double **len, double *node )
{
int i, j, s1, s2;
double total;
static double rootnode[M];
static double eff[M];
#if DEBUG
for( i=0; i<nseq; i++ ){
fprintf( stderr, "len0 = %f\n", len[i][0] );
fprintf( stderr, "len1 = %f\n", len[i][1] );
}
#endif
for( i=0; i<nseq; i++ )
{
rootnode[i] = 0.0;
eff[i] = 1.0;
/*
rootnode[i] = 1.0;
*/
}
for( i=0; i<nseq-1; i++ )
{
for( j=0; (s1=topol[i][0][j]) > -1; j++ )
{
rootnode[s1] += len[i][0] * eff[s1];
eff[s1] *= 0.5;
/*
rootnode[s1] *= 0.5;
*/
}
for( j=0; (s2=topol[i][1][j]) > -1; j++ )
{
rootnode[s2] += len[i][1] * eff[s2];
eff[s2] *= 0.5;
/*
rootnode[s2] *= 0.5;
*/
}
}
for( i=0; i<nseq; i++ )
{
#if 1 /* 97.9.29 */
rootnode[i] += GETA3;
#endif
#if 0
fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
#endif
}
#if 1
total = 0.0;
for( i=0; i<nseq; i++ )
{
total += rootnode[i];
}
#else
total = 1.0;
#endif
for( i=0; i<nseq; i++ )
{
node[i] = rootnode[i] / total;
}
#if 0
fprintf( stderr, "weight array in counteff_simple\n" );
for( i=0; i<nseq; i++ )
fprintf( stderr, "%f\n", node[i] );
printf( "\n" );
exit( 1 );
#endif
}
void counteff( int nseq, int ***topol, double **len, double **node )
{
int i, j, k, s1, s2;
double rootnode[M];
double eff[M];
if( mix )
{
switch( weight )
{
case( 2 ):
weight = 3;
break;
case( 3 ):
weight = 2;
break;
default:
ErrorExit( "mix error" );
break;
}
}
if( weight == 2 )
{
for( i=0; i<nseq; i++ ) rootnode[i] = 0;
for( i=0; i<nseq-2; i++ )
{
for( j=0; topol[i][0][j]>-1; j++ )
rootnode[topol[i][0][j]]++;
for( j=0; topol[i][1][j]>-1; j++ )
rootnode[topol[i][1][j]]++;
for( j=0; topol[i][0][j]>-1; j++ )
{
s1 = topol[i][0][j];
for( k=0; topol[i][1][k]>-1; k++ )
{
s2 = topol[i][1][k];
node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
}
}
}
for( j=0; topol[nseq-2][0][j]>-1; j++ )
{
s1 = topol[nseq-2][0][j];
for( k=0; topol[nseq-2][1][k]>-1; k++ )
{
s2 = topol[nseq-2][1][k];
node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
}
}
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
node[j][i] = node[i][j];
}
if( weight == 3 )
{
#if DEBUG
for( i=0; i<nseq; i++ ){
fprintf( stderr, "len0 = %f\n", len[i][0] );
fprintf( stderr, "len1 = %f\n", len[i][1] );
}
#endif
for( i=0; i<nseq; i++ )
{
rootnode[i] = 0.0;
eff[i] = 1.0;
/*
rootnode[i] = 1.0;
*/
}
for( i=0; i<nseq-1; i++ )
{
for( j=0; (s1=topol[i][0][j]) > -1; j++ )
{
rootnode[s1] += len[i][0] * eff[s1];
eff[s1] *= 0.5;
/*
rootnode[s1] *= 0.5;
*/
}
for( j=0; (s2=topol[i][1][j]) > -1; j++ )
{
rootnode[s2] += len[i][1] * eff[s2];
eff[s2] *= 0.5;
/*
rootnode[s2] *= 0.5;
*/
}
}
for( i=0; i<nseq; i++ )
{
#if 1 /* 97.9.29 */
rootnode[i] += GETA3;
#endif
#if DEBUG
fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
#endif
}
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
if( j != i )
node[i][j] = (double)rootnode[i] * rootnode[j];
else node[i][i] = rootnode[i];
}
}
#if 0
printf( "weight matrix in counteff\n" );
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
printf( "%f ", node[i][j] );
}
printf( "\n" );
}
#endif
}
float score_calcp( char *seq1, char *seq2, int len )
{
int k;
int ms1, ms2;
float tmpscore;
int len2 = len - 2;
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
ms1 = (int)seq1[k];
ms2 = (int)seq2[k];
if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
tmpscore += (float)amino_dis[ms1][ms2];
if( ms1 == (int)'-' )
{
tmpscore += (float)penalty;
tmpscore += (float)amino_dis[ms1][ms2];
while( (ms1=(int)seq1[++k]) == (int)'-' )
tmpscore += (float)amino_dis[ms1][ms2];
k--;
if( k >len2 ) break;
continue;
}
if( ms2 == (int)'-' )
{
tmpscore += (float)penalty;
tmpscore += (float)amino_dis[ms1][ms2];
while( (ms2=(int)seq2[++k]) == (int)'-' )
tmpscore += (float)amino_dis[ms1][ms2];
k--;
if( k > len2 ) break;
continue;
}
}
return( tmpscore );
}
float score_calc1( char *seq1, char *seq2 ) /* method 1 */
{
int k;
float score = 0.0;
int count = 0;
int len = strlen( seq1 );
for( k=0; k<len; k++ )
{
if( seq1[k] != '-' && seq2[k] != '-' )
{
score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]];
count++;
}
}
if( count ) score /= (float)count;
else score = 1.0;
return( score );
}
float substitution_nid( char *seq1, char *seq2 )
{
int k;
float s12;
int len = strlen( seq1 );
s12 = 0.0;
for( k=0; k<len; k++ )
if( seq1[k] != '-' && seq2[k] != '-' )
s12 += ( seq1[k] == seq2[k] );
// fprintf( stdout, "s12 = %f\n", s12 );
return( s12 );
}
float substitution_score( char *seq1, char *seq2 )
{
int k;
float s12;
int len = strlen( seq1 );
s12 = 0.0;
for( k=0; k<len; k++ )
if( seq1[k] != '-' && seq2[k] != '-' )
s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
// fprintf( stdout, "s12 = %f\n", s12 );
return( s12 );
}
float substitution_hosei( char *seq1, char *seq2 ) /* method 1 */
#if 0
{
int k;
float score = 0.0;
int count = 0;
int len = strlen( seq1 );
for( k=0; k<len; k++ )
{
if( seq1[k] != '-' && seq2[k] != '-' )
{
score += (float)( seq1[k] != seq2[k] );
count++;
}
}
if( count ) score /= (float)count;
else score = 1.0;
if( score < 0.95 ) score = - log( 1.0 - score );
else score = 3.0;
return( score );
}
#else
{
int count = 0;
float score;
int iscore = 0;
char s1, s2;
while( (s1=*seq1++) )
{
s2 = *seq2++;
if( s1 == '-' ) continue;
if( s2 == '-' ) continue;
iscore += ( s1 != s2 );
count++;
}
if( count ) score = (float)iscore / count;
else score = 1.0;
if( score < 0.95 ) score = - log( 1.0 - score );
else score = 3.0;
return( score );
}
#endif
float substitution( char *seq1, char *seq2 ) /* method 1 */
{
int k;
float score = 0.0;
int count = 0;
int len = strlen( seq1 );
for( k=0; k<len; k++ )
{
if( seq1[k] != '-' && seq2[k] != '-' )
{
score += (float)( seq1[k] != seq2[k] );
count++;
}
}
if( count ) score /= (float)count;
else score = 1.0;
return( score );
}
void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
{
int i, j;
if( weight > 1 )
{
if( utree == 0 )
{
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
{
/*
eff[i][j] = (double)score_calc1( seq[i], seq[j] );
*/
eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
/*
fprintf( stderr, "%f\n", eff[i][j] );
*/
}
/*
fprintf( stderr, "distance matrix\n" );
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
fprintf( stderr, "%f ", eff[i][j] );
}
fprintf( stderr, "\n" );
}
*/
/*
upg( nseq, eff, topol, len );
upg2( nseq, eff, topol, len );
*/
spg( nseq, eff, topol, len );
counteff( nseq, topol, len, eff );
}
}
else
{
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
eff[i][j] = 1.0;
}
/*
fprintf( stderr, "weight matrix\n" );
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
fprintf( stderr, "%f ", eff[i][j] );
}
fprintf( stderr, "\n" );
}
*/
}
float bscore_calc( char **seq, int s, double **eff ) /* algorithm B */
{
int i, j, k;
int gb1, gb2, gc1, gc2;
int cob;
int nglen;
int len = strlen( seq[0] );
long score;
score = 0;
nglen = 0;
for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
{
double efficient = eff[i][j];
gc1 = 0;
gc2 = 0;
for( k=0; k<len; k++ )
{
gb1 = gc1;
gb2 = gc2;
gc1 = ( seq[i][k] == '-' );
gc2 = ( seq[j][k] == '-' );
cob =
!gb1 * gc1
* !gb2 * !gc2
+ !gb1 * !gc1
* !gb2 * gc2
+ !gb1 * gc1
* gb2 * !gc2
+ gb1 * !gc1
* !gb2 * gc2
+ gb1 * !gc1
* gb2 * gc2 *BEFF
+ gb1 * gc1
* gb2 * !gc2 *BEFF
;
score += (long)cob * penalty * efficient;
score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
nglen += ( !gc1 * !gc2 );
}
}
return( (float)score / nglen + 400.0 * !scoremtx );
}
void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
{
*mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
*mseq1pt = AllocateCharVec( locnlenmax+1 );
}
void FreeTmpSeqs( char **mseq2, char *mseq1 )
{
FreeCharMtx( mseq2 );
free( (char *)mseq1 );
}
void gappick0( char *aseq, char *seq )
{
for( ; *seq != 0; seq++ )
{
if( *seq != '-' )
*aseq++ = *seq;
}
*aseq = 0;
}
void gappick( int nseq, int s, char **aseq, char **mseq2,
double **eff, double *effarr )
{
int i, j, count, countjob, len, allgap;
len = strlen( aseq[0] );
for( i=0, count=0; i<len; i++ )
{
allgap = 1;
for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
if( allgap == 0 )
{
for( j=0, countjob=0; j<nseq; j++ )
{
if( j != s )
{
mseq2[countjob][count] = aseq[j][i];
countjob++;
}
}
count++;
}
}
for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
for( i=0, countjob=0; i<nseq; i++ )
{
if( i != s )
{
effarr[countjob] = eff[s][i];
countjob++;
}
}
/*
fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
for( i=0; i<countjob; i++ )
fprintf( stdout, " %f", effarr[i] );
printf( "\n" );
*/
}
void commongappick_record( int nseq, char **seq, int *map )
{
int i, j, count;
int len = strlen( seq[0] );
for( i=0, count=0; i<=len; i++ )
{
/*
allgap = 1;
for( j=0; j<nseq; j++ )
allgap *= ( seq[j][i] == '-' );
if( !allgap )
*/
for( j=0; j<nseq; j++ )
if( seq[j][i] != '-' ) break;
if( j != nseq )
{
for( j=0; j<nseq; j++ )
{
seq[j][count] = seq[j][i];
}
map[count] = i;
count++;
}
}
}
void commongappick( int nseq, char **seq )
{
int i, j, count;
int len = strlen( seq[0] );
for( i=0, count=0; i<=len; i++ )
{
/*
allgap = 1;
for( j=0; j<nseq; j++ )
allgap *= ( seq[j][i] == '-' );
if( !allgap )
*/
for( j=0; j<nseq; j++ )
if( seq[j][i] != '-' ) break;
if( j != nseq )
{
for( j=0; j<nseq; j++ )
{
seq[j][count] = seq[j][i];
}
count++;
}
}
}
double score_calc0( char **seq, int s, double **eff, int ex )
{
double tmp;
if( scmtd == 3 ) tmp = score_calc3( seq, s, eff, ex );
if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
else tmp = score_calc5( seq, s, eff, ex );
return( tmp );
}
/*
float score_m_1( char **seq, int ex, double **eff )
{
int i, j, k;
int len = strlen( seq[0] );
int gb1, gb2, gc1, gc2;
int cob;
int nglen;
double score;
score = 0.0;
nglen = 0;
for( i=0; i<njob; i++ )
{
double efficient = eff[MIN(i,ex)][MAX(i,ex)];
if( i == ex ) continue;
gc1 = 0;
gc2 = 0;
for( k=0; k<len; k++ )
{
gb1 = gc1;
gb2 = gc2;
gc1 = ( seq[i][k] == '-' );
gc2 = ( seq[ex][k] == '-' );
cob =
!gb1 * gc1
* !gb2 * !gc2
+ !gb1 * !gc1
* !gb2 * gc2
+ !gb1 * gc1
* gb2 * !gc2
+ gb1 * !gc1
* !gb2 * gc2
+ gb1 * !gc1
* gb2 * gc2 *BEFF
+ gb1 * gc1
* gb2 * !gc2 *BEFF
;
score += (double)cob * penalty * efficient;
score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
*
nglen += ( !gc1 * !gc2 );
*
if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
}
}
return( (float)score / nglen + 400.0 * !scoremtx );
}
*/
#if 0
void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
{
int i, j, k;
int len = strlen( seq[0] );
double tmp;
double count;
int ch;
double sco[N];
for( i=0; i<len; i++ )
{
tmp = 0.0; count = 0;
for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
{
/*
if( seq[j][i] != '-' && seq[k][i] != '-' )
*/
{
tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
count++;
}
}
if( count > 0.0 ) tmp /= count;
else( tmp = 0.0 );
ch = (int)( tmp/100.0 - 0.000001 );
sprintf( sco1+i, "%c", ch+0x61 );
}
sco1[len] = 0;
for( i=0; i<len; i++ )
{
tmp = 0.0; count = 0;
for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
{
/*
if( seq[j][i] != '-' && seq[k][i] != '-' )
*/
{
tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
count += eff[j][k];
}
}
if( count > 0.0 ) tmp /= count;
else( tmp = 0.0 );
tmp = ( tmp - 400 * !scoremtx ) * 2;
if( tmp < 0 ) tmp = 0;
ch = (int)( tmp/100.0 - 0.000001 );
sprintf( sco2+i, "%c", ch+0x61 );
sco[i] = tmp;
}
sco2[len] = 0;
for( i=WIN; i<len-WIN; i++ )
{
tmp = 0.0;
for( j=i-WIN; j<=i+WIN; j++ )
{
tmp += sco[j];
}
for( j=0; j<njob; j++ )
{
if( seq[j][i] == '-' )
{
tmp = 0.0;
break;
}
}
tmp /= WIN * 2 + 1;
ch = (int)( tmp/100.0 - 0.0000001 );
sprintf( sco3+i, "%c", ch+0x61 );
}
for( i=0; i<WIN; i++ ) sco3[i] = '-';
for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
sco3[len] = 0;
}
#endif
void strins( char *str1, char *str2 )
{
char *bk;
int len1 = strlen( str1 );
int len2 = strlen( str2 );
bk = str2;
str2 += len1+len2;
str1 += len1-1;
while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
while( str2 >= bk ) { *str2-- = *str1--; }
}
int isaligned( int nseq, char **seq )
{
int i;
int len = strlen( seq[0] );
for( i=1; i<nseq; i++ )
{
if( strlen( seq[i] ) != len ) return( 0 );
}
return( 1 );
}
double score_calc_for_score( int nseq, char **seq )
{
int i, j, k, c;
int len = strlen( seq[0] );
double score;
double tmpscore;
char *mseq1, *mseq2;
score = 0.0;
for( i=0; i<nseq-1; i++ )
{
for( j=i+1; j<nseq; j++ )
{
mseq1 = seq[i];
mseq2 = seq[j];
tmpscore = 0.0;
c = 0;
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
c++;
if( mseq1[k] == '-' )
{
tmpscore += penalty - n_dis[0][24];
while( mseq1[++k] == '-' )
;
k--;
if( k > len-2 ) break;
continue;
}
if( mseq2[k] == '-' )
{
tmpscore += penalty - n_dis[0][24];
while( mseq2[++k] == '-' )
;
k--;
if( k > len-2 ) break;
continue;
}
}
score += (double)tmpscore / (double)c;
#if DEBUG
printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
printf( "tmpscore / c = %f\n", tmpscore/(double)c );
#endif
}
}
fprintf( stderr, "raw score = %f\n", score );
score /= (double)nseq * ( nseq-1.0 ) / 2.0;
score += 400.0;
#if DEBUG
printf( "score in mltaln9.c = %f\n", score );
#endif
return( (double)score );
}
void floatncpy( float *vec1, float *vec2, int len )
{
while( len-- )
*vec1++ = *vec2++;
}
float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */
{
int i, j, k;
int gb1, gb2, gc1, gc2;
int cob;
int nglen;
int len = strlen( seq[0] );
float score;
score = 0;
nglen = 0;
for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
{
double efficient = eff[i][j];
gc1 = 0;
gc2 = 0;
for( k=0; k<len; k++ )
{
gb1 = gc1;
gb2 = gc2;
gc1 = ( seq[i][k] == '-' );
gc2 = ( seq[j][k] == '-' );
cob =
!gb1 * gc1
* !gb2 * !gc2
+ gb1 * !gc1
* !gb2 * !gc2
+ !gb1 * !gc1
* !gb2 * gc2
+ !gb1 * !gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * !gc2
+ gb1 * !gc1
* !gb2 * gc2
+ gb1 * !gc1
* gb2 * gc2
+ gb1 * gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * gc2
+ gb1 * gc1
* !gb2 * gc2
;
score += 0.5 * (float)cob * penalty * efficient;
score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
nglen += ( !gc1 * !gc2 );
}
}
return( (float)score / nglen + 400.0 * !scoremtx );
}
float score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */
{
int i, j, k;
int gb1, gb2, gc1, gc2;
int cob;
int nglen;
int len = strlen( seq[0] );
float score;
score = 0;
nglen = 0;
for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
{
double efficient = eff[i][j];
gc1 = 0;
gc2 = 0;
for( k=0; k<len; k++ )
{
gb1 = gc1;
gb2 = gc2;
gc1 = ( seq[i][k] == '-' );
gc2 = ( seq[j][k] == '-' );
cob =
!gb1 * gc1
* !gb2 * !gc2
+ gb1 * !gc1
* !gb2 * !gc2
+ !gb1 * !gc1
* !gb2 * gc2
+ !gb1 * !gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * !gc2
+ gb1 * !gc1
* !gb2 * gc2
#if 0
+ gb1 * !gc1
* gb2 * gc2
+ gb1 * gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * gc2
+ gb1 * gc1
* !gb2 * gc2
#endif
;
score += 0.5 * (float)cob * penalty * efficient;
score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
nglen += ( !gc1 * !gc2 );
}
}
return( (float)score / nglen + 400.0 );
}
double score_calc_for_score_s( int s, char **seq ) /* algorithm S */
{
int i, j, k;
int gb1, gb2, gc1, gc2;
int cob;
int nglen;
int len = strlen( seq[0] );
float score;
score = 0;
nglen = 0;
for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
{
gc1 = 0;
gc2 = 0;
for( k=0; k<len; k++ )
{
gb1 = gc1;
gb2 = gc2;
gc1 = ( seq[i][k] == '-' );
gc2 = ( seq[j][k] == '-' );
cob =
!gb1 * gc1
* !gb2 * !gc2
+ gb1 * !gc1
* !gb2 * !gc2
+ !gb1 * !gc1
* !gb2 * gc2
+ !gb1 * !gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * !gc2
+ gb1 * !gc1
* !gb2 * gc2
#if 0
+ gb1 * !gc1
* gb2 * gc2
+ gb1 * gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * gc2
+ gb1 * gc1
* !gb2 * gc2
#endif
;
score += 0.5 * (float)cob * penalty;
score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
nglen += ( !gc1 * !gc2 );
}
#if 0
fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
fprintf( stderr, "score = %f\n", score );
#endif
}
return( (double)score / nglen + 400.0 );
}
double SSPscore___( int s, char **seq, int ex ) /* algorithm S */
{
int i, j, k;
int gb1, gb2, gc1, gc2;
int cob;
int nglen;
int len = strlen( seq[0] );
float score;
score = 0;
nglen = 0;
i=ex; for( j=0; j<s; j++ )
{
if( j == ex ) continue;
gc1 = 0;
gc2 = 0;
for( k=0; k<len; k++ )
{
gb1 = gc1;
gb2 = gc2;
gc1 = ( seq[i][k] == '-' );
gc2 = ( seq[j][k] == '-' );
cob =
!gb1 * gc1
* !gb2 * !gc2
+ gb1 * !gc1
* !gb2 * !gc2
+ !gb1 * !gc1
* !gb2 * gc2
+ !gb1 * !gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * !gc2 * 2.0
+ gb1 * !gc1
* !gb2 * gc2 * 2.0
#if 0
+ gb1 * !gc1
* gb2 * gc2
+ gb1 * gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * gc2
+ gb1 * gc1
* !gb2 * gc2
#endif
;
score += 0.5 * (float)cob * penalty;
score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
nglen += ( !gc1 * !gc2 ); /* tsukawanai */
}
#if 0
fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
fprintf( stderr, "score = %f\n", score );
#endif
}
return( (double)score );
}
double SSPscore( int s, char **seq ) /* algorithm S */
{
int i, j, k;
int gb1, gb2, gc1, gc2;
int cob;
int nglen;
int len = strlen( seq[0] );
float score;
score = 0;
nglen = 0;
for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
{
gc1 = 0;
gc2 = 0;
for( k=0; k<len; k++ )
{
gb1 = gc1;
gb2 = gc2;
gc1 = ( seq[i][k] == '-' );
gc2 = ( seq[j][k] == '-' );
cob =
!gb1 * gc1
* !gb2 * !gc2
+ gb1 * !gc1
* !gb2 * !gc2
+ !gb1 * !gc1
* !gb2 * gc2
+ !gb1 * !gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * !gc2
+ gb1 * !gc1
* !gb2 * gc2
#if 0
+ gb1 * !gc1
* gb2 * gc2
+ gb1 * gc1
* gb2 * !gc2
+ !gb1 * gc1
* gb2 * gc2
+ gb1 * gc1
* !gb2 * gc2
#endif
;
score += 0.5 * (float)cob * penalty;
score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
nglen += ( !gc1 * !gc2 ); /* tsukawanai */
}
#if 0
fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
fprintf( stderr, "score = %f\n", score );
#endif
}
return( (double)score );
}
double DSPscore( int s, char **seq ) /* method 3 deha nai */
{
int i, j, k;
double c;
int len = strlen( seq[0] );
double score;
double tmpscore;
char *mseq1, *mseq2;
#if DEBUG
FILE *fp;
#endif
score = 0.0;
c = 0.0;
for( i=0; i<s-1; i++ )
{
for( j=i+1; j<s; j++ )
{
mseq1 = seq[i];
mseq2 = seq[j];
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
if( mseq1[k] == '-' )
{
tmpscore += penalty;
while( mseq1[++k] == '-' )
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
}
if( mseq2[k] == '-' )
{
tmpscore += penalty;
while( mseq2[++k] == '-' )
tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
}
}
score += (double)tmpscore;
}
}
return( score );
}
#define SEGMENTSIZE 150
int searchAnchors( int nseq, char **seq, Segment *seg )
{
int i, j, k;
int status;
double score;
int value = 0;
int len;
int length;
static double *stra = NULL;
static int alloclen = 0;
double cumscore;
static double threshold;
len = strlen( seq[0] );
if( alloclen < len )
{
if( alloclen )
{
FreeDoubleVec( stra );
}
else
{
threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
}
stra = AllocateDoubleVec( len );
alloclen = len;
}
for( i=0; i<len; i++ )
{
#if 0
/* make prf */
for( j=0; j<26; j++ )
{
prf[j] = 0.0;
}
for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0;
/* make hat */
pre = 26;
for( j=25; j>=0; j-- )
{
if( prf[j] )
{
hat[pre] = j;
pre = j;
}
}
hat[pre] = -1;
/* make site score */
stra[i] = 0.0;
for( k=hat[26]; k!=-1; k=hat[k] )
for( j=hat[26]; j!=-1; j=hat[j] )
stra[i] += n_dis[k][j] * prf[k] * prf[j];
#else
stra[i] = 0.0;
for( k=0; k<nseq-1; k++ ) for( j=k+1; j<nseq; j++ )
stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
stra[i] /= (double)nseq * ( nseq-1 ) / 2;
#endif
}
(seg+0)->skipForeward = 0;
(seg+1)->skipBackward = 0;
status = 0;
cumscore = 0.0;
score = 0.0;
length = 0; /* modified at 01/09/11 */
for( j=0; j<divWinSize; j++ ) score += stra[j];
for( i=1; i<len-divWinSize; i++ )
{
score = score - stra[i-1] + stra[i+divWinSize-1];
#if DEBUG
fprintf( stderr, "%d %f ? %f", i, score, threshold );
if( score > threshold ) fprintf( stderr, "YES\n" );
else fprintf( stderr, "NO\n" );
#endif
if( score > threshold )
{
if( !status )
{
status = 1;
seg->start = i;
length = 0;
cumscore = 0.0;
}
length++;
cumscore += score;
}
if( score <= threshold || length > SEGMENTSIZE )
{
if( status )
{
seg->end = i;
seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
seg->score = cumscore;
#if DEBUG
fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
#endif
if( length > SEGMENTSIZE )
{
(seg+0)->skipForeward = 1;
(seg+1)->skipBackward = 1;
}
else
{
(seg+0)->skipForeward = 0;
(seg+1)->skipBackward = 0;
}
length = 0;
cumscore = 0.0;
status = 0;
value++;
seg++;
if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
}
}
}
if( status )
{
seg->end = i;
seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
seg->score = cumscore;
#if DEBUG
fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
#endif
value++;
}
return( value );
}
void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
{
int i, j;
LocalHom *ptr;
static int *nogaplen = NULL;
if( nogaplen == NULL )
{
nogaplen = AllocateIntVec( nseq );
}
for( i=0; i<nseq; i++ )
{
nogaplen[i] = seqlen( seq[i] );
// fprintf( stderr, "nogaplen[%d] = %d\n", i, nogaplen[i] );
}
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
{
// fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr );
#if 1
ptr->importance = ptr->opt / ptr->overlapaa;
ptr->fimportance = (float)ptr->importance;
#else
ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
#endif
}
}
}
}
void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
{
int i, j, pos, len;
static double *importance;
double tmpdouble;
static int *nogaplen = NULL;
LocalHom *tmpptr;
if( importance == NULL )
{
importance = AllocateDoubleVec( nlenmax );
nogaplen = AllocateIntVec( nseq );
}
for( i=0; i<nseq; i++ )
{
nogaplen[i] = seqlen( seq[i] );
// fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
}
#if 0
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
{
tmpptr = localhom[i]+j;
fprintf( stderr, "%d-%d\n", i, j );
do
{
fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
} while( tmpptr=tmpptr->next );
}
#endif
for( i=0; i<nseq; i++ )
{
// fprintf( stderr, "i = %d\n", i );
for( pos=0; pos<nlenmax; pos++ )
importance[pos] = 0.0;
for( j=0; j<nseq; j++ )
{
if( i == j ) continue;
tmpptr = localhom[i]+j;
for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
{
if( tmpptr->opt == -1 ) continue;
for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
#if 1
importance[pos] += eff[j];
#else
importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa;
#endif
}
}
#if 0
fprintf( stderr, "position specific importance of seq %d:\n", i );
for( pos=0; pos<nlenmax; pos++ )
fprintf( stderr, "%d: %f\n", pos, importance[pos] );
fprintf( stderr, "\n" );
#endif
for( j=0; j<nseq; j++ )
{
// fprintf( stderr, "i=%d, j=%d\n", i, j );
if( i == j ) continue;
if( localhom[i][j].opt == -1.0 ) continue;
#if 1
for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
{
if( tmpptr->opt == -1.0 ) continue;
tmpdouble = 0.0;
len = 0;
for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
{
tmpdouble += importance[pos];
len++;
}
tmpdouble /= (double)len;
tmpptr->importance = tmpdouble * tmpptr->opt;
tmpptr->fimportance = (float)tmpptr->importance;
}
#else
tmpdouble = 0.0;
len = 0;
for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
{
if( tmpptr->opt == -1.0 ) continue;
for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
{
tmpdouble += importance[pos];
len++;
}
}
tmpdouble /= (double)len;
for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
{
if( tmpptr->opt == -1.0 ) continue;
tmpptr->importance = tmpdouble * tmpptr->opt;
// tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B
}
#endif
// fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
}
}
#if 0
fprintf( stderr, "before averaging:\n" );
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
{
fprintf( stderr, "%d-%d\n", i, j );
for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
{
fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
}
}
#endif
#if 1
// fprintf( stderr, "average?\n" );
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
{
double imp;
LocalHom *tmpptr1, *tmpptr2;
// fprintf( stderr, "i=%d, j=%d\n", i, j );
tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
{
if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
{
// fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
continue;
}
// fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
tmpptr1->importance = tmpptr2->importance = imp;
tmpptr1->fimportance = tmpptr2->fimportance = (float)imp;
// fprintf( stderr, "## importance = %f\n", tmpptr1->importance );
}
#if 1
if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
{
fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j );
exit( 1 );
}
#endif
}
#endif
#if 0
fprintf( stderr, "after averaging:\n" );
for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
{
fprintf( stderr, "%d-%d\n", i, j );
for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
{
fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
}
}
#endif
}
#if 0
void weightimportance( int nseq, double **eff, LocalHom **localhom )
{
int i, j, pos, len;
static double *importance;
double tmpdouble;
LocalHom *tmpptr, *tmpptr1, *tmpptr2;
if( importance == NULL )
importance = AllocateDoubleVec( nlenmax );
fprintf( stderr, "effmtx = :\n" );
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
fprintf( stderr, "%6.3f ", eff[i][j] );
}
fprintf( stderr, "\n" );
}
for( i=0; i<nseq; i++ )
{
for( pos=0; pos<nlenmax; pos++ )
importance[pos] = 0.0;
for( j=0; j<nseq; j++ )
{
if( i == j ) continue;
tmpptr = localhom[i]+j;
while( 1 )
{
fprintf( stderr, "i=%d, j=%d\n", i, j );
for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
// importance[pos] += eff[i][j] * tmpptr->importance;
importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
tmpptr = tmpptr->next;
if( tmpptr == NULL ) break;
}
}
#if 0
fprintf( stderr, "position specific importance of seq %d:\n", i );
for( pos=0; pos<nlenmax; pos++ )
fprintf( stderr, "%d: %f\n", pos, importance[pos] );
fprintf( stderr, "\n" );
#endif
for( j=0; j<nseq; j++ )
{
fprintf( stderr, "i=%d, j=%d\n", i, j );
if( i == j ) continue;
tmpptr = localhom[i]+j;
do
{
tmpdouble = 0.0;
len = 0;
for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
{
tmpdouble += importance[pos];
len++;
}
tmpdouble /= (double)len;
tmpptr->importance = tmpdouble;
fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
tmpptr = tmpptr->next;
} while( tmpptr );
}
}
#if 1
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
{
fprintf( stderr, "i = %d, j=%d\n", i, j );
tmpptr1 = localhom[i]+j;
tmpptr2 = localhom[j]+i;
while( tmpptr1 && tmpptr2 )
{
tmpptr1->importance += tmpptr2->importance;
tmpptr1->importance *= 0.5;
tmpptr2->importance *= tmpptr1->importance;
fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
tmpptr1 = tmpptr1->next;
tmpptr2 = tmpptr2->next;
fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
}
}
#endif
}
void weightimportance2( int nseq, double *eff, LocalHom **localhom )
{
int i, j, pos, len;
static double *wimportance;
double tmpdouble;
if( wimportance == NULL )
wimportance = AllocateDoubleVec( nlenmax );
fprintf( stderr, "effmtx = :\n" );
for( i=0; i<nseq; i++ )
{
for( j=0; j<nseq; j++ )
{
fprintf( stderr, "%6.3f ", eff[i] * eff[j] );
}
fprintf( stderr, "\n" );
}
for( i=0; i<nseq; i++ )
{
fprintf( stderr, "i = %d\n", i );
for( pos=0; pos<nlenmax; pos++ )
wimportance[pos] = 0.0;
for( j=0; j<nseq; j++ )
{
if( i == j ) continue;
for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
// wimportance[pos] += eff[i][j];
wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
}
#if 0
fprintf( stderr, "position specific wimportance of seq %d:\n", i );
for( pos=0; pos<nlenmax; pos++ )
fprintf( stderr, "%d: %f\n", pos, wimportance[pos] );
fprintf( stderr, "\n" );
#endif
for( j=0; j<nseq; j++ )
{
if( i == j ) continue;
tmpdouble = 0.0;
len = 0;
for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
{
tmpdouble += wimportance[pos];
len++;
}
tmpdouble /= (double)len;
localhom[i][j].wimportance = tmpdouble;
fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
}
}
#if 1
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
{
localhom[i][j].wimportance += localhom[j][i].wimportance;
localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
}
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
{
localhom[j][i].wimportance = localhom[i][j].wimportance;
}
#endif
}
void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
{
int i, j, pos, len;
static double *wimportance;
LocalHom *tmpptr, *tmpptr1, *tmpptr2;
if( wimportance == NULL )
wimportance = AllocateDoubleVec( nlenmax );
#if 0
fprintf( stderr, "effarr1 = :\n" );
for( i=0; i<clus1; i++ )
fprintf( stderr, "%6.3f\n", eff1[i] );
fprintf( stderr, "effarr2 = :\n" );
for( i=0; i<clus2; i++ )
fprintf( stderr, "%6.3f\n", eff2[i] );
#endif
for( i=0; i<clus1; i++ )
{
for( j=0; j<clus2; j++ )
{
// fprintf( stderr, "i=%d, j=%d\n", i, j );
tmpptr = localhom[i][j];
do
{
tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
tmpptr = tmpptr->next;
} while( tmpptr );
}
}
}
static void addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
{
LocalHom *tmpptr;
tmpptr = localhom;
fprintf( stderr, "adding localhom\n" );
while( tmpptr->next )
tmpptr = tmpptr->next;
fprintf( stderr, "allocating localhom\n" );
tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
fprintf( stderr, "done\n" );
tmpptr = tmpptr->next;
tmpptr->start1 = start1;
tmpptr->start2 = start2;
tmpptr->end1 = end1;
tmpptr->end2 = end2;
tmpptr->opt = opt;
fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
}
#if 0
#endif
void extendlocalhom( int nseq, LocalHom **localhom )
{
int i, j, k, pos0, pos1, pos2, st;
int start1, start2, end1, end2;
static int *tmpint1 = NULL;
static int *tmpint2 = NULL;
static int *tmpdouble1 = NULL;
static int *tmpdouble2 = NULL;
double opt;
LocalHom *tmpptr;
if( tmpint1 == NULL )
{
tmpint1 = AllocateIntVec( nlenmax );
tmpint2 = AllocateIntVec( nlenmax );
tmpdouble1 = AllocateIntVec( nlenmax );
tmpdouble2 = AllocateIntVec( nlenmax );
}
for( k=0; k<nseq; k++ )
{
for( i=0; i<nseq-1; i++ )
{
if( i == k ) continue;
for( pos0=0; pos0<nlenmax; pos0++ )
tmpint1[pos0] = -1;
tmpptr=localhom[k]+i;
do
{
pos0 = tmpptr->start1;
pos1 = tmpptr->start2;
while( pos0<=tmpptr->end1 )
{
tmpint1[pos0] = pos1++;
tmpdouble1[pos0] = tmpptr->opt;
pos0++;
}
} while( tmpptr = tmpptr->next );
for( j=i+1; j<nseq; j++ )
{
if( j == k ) continue;
for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
tmpptr=localhom[k]+j;
do
{
pos0 = tmpptr->start1;
pos2 = tmpptr->start2;
while( pos0<=tmpptr->end1 )
{
tmpint2[pos0] = pos2++;
tmpdouble2[pos0++] = tmpptr->opt;
}
} while( tmpptr = tmpptr->next );
#if 0
fprintf( stderr, "i,j=%d,%d\n", i, j );
for( pos0=0; pos0<nlenmax; pos0++ )
fprintf( stderr, "%d ", tmpint1[pos0] );
fprintf( stderr, "\n" );
for( pos0=0; pos0<nlenmax; pos0++ )
fprintf( stderr, "%d ", tmpint2[pos0] );
fprintf( stderr, "\n" );
#endif
st = 0;
for( pos0=0; pos0<nlenmax; pos0++ )
{
// fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
{
if( st == 0 )
{
st = 1;
start1 = tmpint1[pos0];
start2 = tmpint2[pos0];
opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
}
else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
{
addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
start1 = tmpint1[pos0];
start2 = tmpint2[pos0];
opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
}
}
if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
{
if( st == 1 )
{
st = 0;
addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
}
}
}
}
}
}
}
#endif
static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
{
// dokka machigatteru
if( pt != lh ) // susumeru
{
pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
pt = pt->next;
pt->next = NULL;
lh->last = pt;
}
else // sonomamatsukau
{
lh->last = pt;
}
lh->nokori++;
// fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
pt->start1 = sti;
pt->start2 = stj;
pt->end1 = eni;
pt->end2 = enj;
pt->opt = opt;
pt->extended = interm;
pt->overlapaa = overlp;
#if 0
fprintf( stderr, "i: %d-%d\n", sti, eni );
fprintf( stderr, "j: %d-%d\n", stj, enj );
fprintf( stderr, "opt=%f\n", opt );
fprintf( stderr, "overlp=%d\n", overlp );
#endif
}
void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
{
int overlp, plim;
int i, j, k;
int pi, pj, pk, len;
int status, sti, stj;
int *ipt;
int co;
static int *ini = NULL;
static int *inj = NULL;
LocalHom *pt;
sti = 0; // by D.Mathog, a guess
stj = 0; // by D.Mathog, a guess
if( ini == NULL )
{
ini = AllocateIntVec( nlenmax+1 );
inj = AllocateIntVec( nlenmax+1 );
}
for( i=0; i<nseq-1; i++ )
{
for( j=i+1; j<nseq; j++ )
{
#if 0
for( k=0; k<nseq; k++ ) sai[k] = 0;
numint = ncons;
while( 1 )
{
k = (int)( rnd() * nseq );
if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
if( numint-- == 0 ) break;
if( sai[k] ) continue;
sai[k] = 1;
#else
for( k=0; k<nseq; k++ )
{
#endif
// fprintf( stderr, "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
ipt = ini; co = nlenmax+1;
while( co-- ) *ipt++ = -1;
ipt = inj; co = nlenmax+1;
while( co-- ) *ipt++ = -1;
overlp = 0;
{
for( pt=localhom[i]+k; pt; pt=pt->next )
{
// fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
if( pt->opt == -1 )
{
fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
}
if( pt->extended > -1 ) break;
pi = pt->start1;
pk = pt->start2;
len = pt->end1 - pt->start1 + 1;
ipt = ini + pk;
while( len-- ) *ipt++ = pi++;
}
}
{
for( pt=localhom[j]+k; pt; pt=pt->next )
{
if( pt->opt == -1 )
{
fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
}
if( pt->extended > -1 ) break;
pj = pt->start1;
pk = pt->start2;
len = pt->end1 - pt->start1 + 1;
ipt = inj + pk;
while( len-- ) *ipt++ = pj++;
}
}
#if 0
fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
overlp = 0;
for( pk = 0; pk < nlenmax; pk++ )
{
if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
fprintf( stderr, " %d", inj[pk] );
}
fprintf( stderr, "\n" );
fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
overlp = 0;
for( pk = 0; pk < nlenmax; pk++ )
{
if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
fprintf( stderr, " %d", ini[pk] );
}
fprintf( stderr, "\n" );
#endif
overlp = 0;
plim = nlenmax+1;
for( pk = 0; pk < plim; pk++ )
if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
status = 0;
plim = nlenmax+1;
for( pk=0; pk<plim; pk++ )
{
// fprintf( stderr, "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
if( status )
{
if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
{
status = 0;
// fprintf( stderr, "end here!\n" );
pt = localhom[i][j].last;
// fprintf( stderr, "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
// fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
pt = localhom[j][i].last;
// fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
// fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
// fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
}
}
if( !status ) // else deha arimasenn.
{
if( ini[pk] == -1 || inj[pk] == -1 ) continue;
sti = ini[pk];
stj = inj[pk];
// fprintf( stderr, "start here!\n" );
status = 1;
}
}
// if( status ) fprintf( stderr, "end here\n" );
// exit( 1 );
// fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
}
#if 0
for( pt=localhomtable[i]+j; pt; pt=pt->next )
{
if( tmpptr->opt == -1.0 ) continue;
fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
}
#endif
}
}
}
int makelocal( char *s1, char *s2, int thr )
{
int start, maxstart, maxend;
char *pt1, *pt2;
double score;
double maxscore;
pt1 = s1;
pt2 = s2;
maxend = 0; // by D.Mathog, a guess
// fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
maxscore = 0.0;
score = 0.0;
start = 0;
maxstart = 0;
while( *pt1 )
{
// fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
if( *pt1 == '-' || *pt2 == '-' )
{
// fprintf( stderr, "penalty = %d\n", penalty );
score += penalty;
while( *pt1 == '-' || *pt2 == '-' )
{
pt1++; pt2++;
}
continue;
}
score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr );
// score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
if( score > maxscore )
{
// fprintf( stderr, "score = %f\n", score );
maxscore = score;
maxstart = start;
// fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start );
}
if( score < 0.0 )
{
// fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart );
if( start == maxstart )
{
maxend = pt1 - s1;
// fprintf( stderr, "maxend = %d\n", maxend );
}
score = 0.0;
start = pt1 - s1;
}
}
if( start == maxstart )
maxend = pt1 - s1 - 1;
// fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
s1[maxend+1] = 0;
s2[maxend+1] = 0;
return( maxstart );
}
void resetlocalhom( int nseq, LocalHom **lh )
{
int i, j;
LocalHom *pt;
for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
{
for( pt=lh[i]+j; pt; pt=pt->next )
pt->opt = 1.0;
}
}
void gapireru( char *res, char *ori, char *gt )
{
char g;
while( (g = *gt++) )
{
if( g == '-' )
{
*res++ = '-';
}
else
{
*res++ = *ori++;
}
}
*res = 0;
}
void getkyokaigap( char *g, char **s, int pos, int n )
{
// char *bk = g;
// while( n-- ) *g++ = '-';
while( n-- ) *g++ = (*s++)[pos];
// fprintf( stderr, "bk = %s\n", bk );
}
void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
#if 0
{
int i, j, gc, gb;
float feff;
for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
gc = ( sgappat[j] == '-' );
for( i=0; i<len; i++ )
{
gb = gc;
gc = ( seq[j][i] == '-' );
if( !gb * gc ) ogcp[i] += feff;
}
}
}
#else
{
int i, j, gc, gb;
float feff;
float *fpt;
char *spt;
fpt = ogcp;
i = len;
while( i-- ) *fpt++ = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
spt = seq[j];
fpt = ogcp;
gc = ( sgappat[j] == '-' );
i = len;
while( i-- )
{
gb = gc;
gc = ( *spt++ == '-' );
{
if( !gb * gc ) *fpt += feff;
fpt++;
}
}
}
}
#endif
void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
#if 0
{
int i, j, gc, gb;
float feff;
for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
gc = ( sgappat[j] == '-' );
for( i=0; i<len; i++ )
{
gb = gc;
gc = ( seq[j][i] == '-' );
if( !gb * gc ) ogcp[i] += feff;
}
{
gb = gc;
gc = ( egappat[j] == '-' );
if( !gb * gc ) ogcp[i] += feff;
}
}
}
#else
{
int i, j, gc, gb;
float feff;
float *fpt;
char *spt;
fpt = ogcp;
i = len+2;
while( i-- ) *fpt++ = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
spt = seq[j];
fpt = ogcp;
gc = ( sgappat[j] == '-' );
i = len;
while( i-- )
{
gb = gc;
gc = ( *spt++ == '-' );
{
if( !gb * gc ) *fpt += feff;
fpt++;
}
}
{
gb = gc;
gc = ( egappat[j] == '-' );
if( !gb * gc ) *fpt += feff;
}
}
}
#endif
void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
#if 0
{
int i, j, gc, gb;
float feff;
for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
gc = ( sgappat[j] == '-' );
for( i=0; i<len; i++ )
{
gb = gc;
gc = ( seq[j][i] == '-' );
{
if( gb * !gc ) fgcp[i] += feff;
}
}
{
gb = gc;
gc = ( egappat[j] == '-' );
{
if( gb * !gc ) fgcp[len] += feff;
}
}
}
}
#else
{
int i, j, gc, gb;
float feff;
float *fpt;
char *spt;
fpt = fgcp;
i = len+2;
while( i-- ) *fpt++ = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
fpt = fgcp;
spt = seq[j];
gc = ( sgappat[j] == '-' );
i = len;
while( i-- )
{
gb = gc;
gc = ( *spt++ == '-' );
{
if( gb * !gc ) *fpt += feff;
fpt++;
}
}
{
gb = gc;
gc = ( egappat[j] == '-' );
{
if( gb * !gc ) *fpt += feff;
}
}
}
}
#endif
void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
#if 0
{
int i, j, gc, gb;
float feff;
for( i=0; i<len; i++ ) fgcp[i] = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
gc = ( seq[j][0] == '-' );
for( i=1; i<len; i++ )
{
gb = gc;
gc = ( seq[j][i] == '-' );
{
if( gb * !gc ) fgcp[i-1] += feff;
}
}
{
gb = gc;
gc = ( egappat[j] == '-' );
{
if( gb * !gc ) fgcp[len-1] += feff;
}
}
}
}
#else
{
int i, j, gc, gb;
float feff;
float *fpt;
char *spt;
fpt = fgcp;
i = len;
while( i-- ) *fpt++ = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
fpt = fgcp;
spt = seq[j];
gc = ( *spt == '-' );
i = len;
while( i-- )
{
gb = gc;
gc = ( *++spt == '-' );
{
if( gb * !gc ) *fpt += feff;
fpt++;
}
}
{
gb = gc;
gc = ( egappat[j] == '-' );
{
if( gb * !gc ) *fpt += feff;
}
}
}
}
#endif
void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len )
{
int i, j, gc, gb;
float feff;
float *fpt;
char *spt;
fpt = ogcp;
i = len;
while( i-- ) *fpt++ = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
spt = seq[j];
fpt = ogcp;
gc = 0;
// gc = 1;
i = len;
while( i-- )
{
gb = gc;
gc = ( *spt++ == '-' );
{
if( !gb * gc ) *fpt += feff;
fpt++;
}
}
}
ogcp[len] = 0.0;
}
void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len )
{
int i, j, gc, gb;
float feff;
float *fpt;
char *spt;
fpt = fgcp;
i = len+1;
while( i-- ) *fpt++ = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
fpt = fgcp+1;
spt = seq[j];
gc = ( *spt == '-' );
i = len;
// for( i=1; i<len; i++ )
while( i-- )
{
gb = gc;
gc = ( *++spt == '-' );
{
if( gb * !gc ) *fpt += feff;
fpt++;
}
}
{
gb = gc;
gc = 0;
// gc = 1;
{
if( gb * !gc ) *fpt += feff;
}
}
}
}
void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len )
{
int i, j, gc, gb;
float feff;
float *fpt;
char *spt;
fpt = fgcp;
i = len;
while( i-- ) *fpt++ = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
fpt = fgcp;
spt = seq[j];
gc = ( *spt == '-' );
i = len;
// for( i=1; i<len; i++ )
while( i-- )
{
gb = gc;
gc = ( *++spt == '-' );
{
if( gb * !gc ) *fpt += feff;
fpt++;
}
}
{
gb = gc;
gc = 0;
// gc = 1;
{
if( gb * !gc ) *fpt += feff;
}
}
}
}
void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
{
int i, j, gc, gb;
float feff;
float *fpt;
char *spt;
fpt = fgcp;
i = len+1;
while( i-- ) *fpt++ = 0.0;
for( j=0; j<clus; j++ )
{
feff = (float)eff[j];
fpt = fgcp;
spt = seq[j];
gc = ( *spt == '-' );
i = len+1;
while( i-- )
{
gb = gc;
gc = ( *++spt == '-' );
{
if( gb * !gc ) *fpt += feff;
fpt++;
}
}
#if 0
{
gb = gc;
gc = ( egappat[j] == '-' );
{
if( gb * !gc ) *fpt += feff;
}
}
#endif
}
for( j=0; j<len; j++ )
{
fprintf( stderr, "%d, %f\n", j, fgcp[j] );
}
}
void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len )
{
int i, j;
float feff;
for( i=0; i<len+1; i++ ) freq[i] = 0.0;
for( i=0; i<clus; i++ )
{
feff = eff[i];
if( 0 && seq[i][0] == '-' ) // machigai kamo
freq[0] += feff;
for( j=1; j<len; j++ )
{
if( seq[i][j] == '-' && seq[i][j-1] == '-' )
freq[j] += feff;
}
if( 0 && seq[i][len-1] == '-' )
freq[len] += feff;
}
// fprintf( stderr, "\ndigapf = \n" );
// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len )
{
int i, j;
float feff;
for( i=0; i<len+2; i++ ) freq[i] = 0.0;
for( i=0; i<clus; i++ )
{
feff = eff[i];
if( seq[i][0] != '-' ) // tadashii
freq[0] += feff;
for( j=1; j<len; j++ )
{
if( seq[i][j] != '-' && seq[i][j-1] != '-' )
freq[j] += feff;
}
if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
freq[len] += feff;
}
// fprintf( stderr, "\ndiaaf = \n" );
// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len )
{
int i, j;
float feff;
for( i=0; i<len+1; i++ ) freq[i] = 0.0;
for( i=0; i<clus; i++ )
{
feff = eff[i];
if( seq[i][0] != '-' )
freq[0] += feff;
for( j=1; j<len; j++ )
{
if( seq[i][j] != '-' && seq[i][j-1] != '-' )
freq[j] += feff;
}
// if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
freq[len] += feff;
}
// fprintf( stderr, "\ndiaaf = \n" );
// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
{
int i, j;
float feff;
for( i=0; i<len+2; i++ ) freq[i] = 0.0;
for( i=0; i<clus; i++ )
{
feff = eff[i];
// if( seq[i][0] == '-' )
if( seq[i][0] == '-' && sgappat[i] == '-' )
freq[0] += feff;
for( j=1; j<len; j++ )
{
if( seq[i][j] == '-' && seq[i][j-1] == '-' )
freq[j] += feff;
}
// if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
if( egappat[i] == '-' && seq[i][len-1] == '-' )
freq[len] += feff;
}
// fprintf( stderr, "\ndigapf = \n" );
// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
{
int i, j;
float feff;
for( i=0; i<len+2; i++ ) freq[i] = 0.0;
for( i=0; i<clus; i++ )
{
feff = eff[i];
if( seq[i][0] != '-' && sgappat[i] != '-' )
freq[0] += feff;
for( j=1; j<len; j++ )
{
if( seq[i][j] != '-' && seq[i][j-1] != '-' )
freq[j] += feff;
}
// if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
if( egappat[i] != '-' && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
freq[len] += feff;
}
// fprintf( stderr, "\ndiaaf = \n" );
// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *sgap )
{
int i, j;
float feff;
for( i=0; i<len+2; i++ ) freq[i] = 0.0;
for( i=0; i<clus; i++ )
{
feff = eff[i];
if( sgap[i] == '-' )
freq[0] += feff;
for( j=0; j<len; j++ )
{
if( seq[i][j] == '-' )
freq[j+1] += feff;
}
// if( egap[i] == '-' )
// freq[len+1] += feff;
}
// fprintf( stderr, "\ngapf = \n" );
// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len )
{
int i, j;
float feff;
for( i=0; i<len+1; i++ ) freq[i] = 0.0;
for( i=0; i<clus; i++ )
{
feff = eff[i];
for( j=0; j<len; j++ )
{
if( seq[i][j] == '-' )
freq[j+1] += feff;
}
}
freq[len+1] = 0.0;
// fprintf( stderr, "\ngapf = \n" );
// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
void getgapfreq( float *freq, int clus, char **seq, double *eff, int len )
{
int i, j;
float feff;
for( i=0; i<len+1; i++ ) freq[i] = 0.0;
for( i=0; i<clus; i++ )
{
feff = eff[i];
for( j=0; j<len; j++ )
{
if( seq[i][j] == '-' )
freq[j] += feff;
}
}
freq[len] = 0.0;
// fprintf( stderr, "\ngapf = \n" );
// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
{
int i, j, k, gb, gc;
int known;
float feff;
Gappat **fpt;
char *spt;
int gaplen;
fpt = pat;
i = len+1;
while( i-- )
{
if( *fpt ) free( *fpt );
*fpt++ = NULL;
}
for( j=0; j<clus; j++ )
{
// fprintf( stderr, "seq[%d] = %s\n", j, seq[j] );
feff = (float)eff[j];
fpt = pat;
*fpt = NULL; // Falign.c kara yobareru tokiha chigau.
spt = seq[j];
gc = 0;
gaplen = 0;
for( i=0; i<len+1; i++ )
// while( i-- )
{
// fprintf( stderr, "i=%d, gaplen = %d\n", i, gaplen );
gb = gc;
gc = ( i != len && *spt++ == '-' );
if( gc )
gaplen++;
else
{
if( gb && gaplen )
{
k = 1;
known = 0;
if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
{
if( (*fpt)[k].len == gaplen )
{
// fprintf( stderr, "known\n" );
known = 1;
break;
}
}
if( known == 0 )
{
*fpt = (Gappat *)realloc( *fpt, (k+3) * sizeof( Gappat ) ); // mae1 (total), ato2 (len0), term
if( !*fpt )
{
fprintf( stderr, "Cannot allocate gappattern!'n" );
fprintf( stderr, "Use an approximate method, with the --mafft5 option.\n" );
exit( 1 );
}
(*fpt)[k].freq = 0.0;
(*fpt)[k].len = gaplen;
(*fpt)[k+1].len = -1;
(*fpt)[k+1].freq = 0.0; // iranai
// fprintf( stderr, "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
}
// fprintf( stderr, "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
(*fpt)[k].freq += feff;
// fprintf( stderr, "%f\n", (*fpt)[k].freq );
gaplen = 0;
}
}
fpt++;
}
}
#if 1
for( j=0; j<len+1; j++ )
{
if( pat[j] )
{
// fprintf( stderr, "j=%d\n", j );
// for( i=1; pat[j][i].len!=-1; i++ )
// fprintf( stderr, "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
pat[j][0].len = 0; // iminashi
pat[j][0].freq = 0.0;
for( i=1; pat[j][i].len!=-1;i++ )
{
pat[j][0].freq += pat[j][i].freq;
// fprintf( stderr, "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
}
// fprintf( stderr, "totaled, result = %f\n", pat[j][0].freq );
pat[j][i].freq = 1.0 - pat[j][0].freq;
pat[j][i].len = 0; // imiari
pat[j][i+1].len = -1;
}
else
{
pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
pat[j][0].freq = 0.0;
pat[j][0].len = 0; // iminashi
pat[j][1].freq = 1.0 - pat[j][0].freq;
pat[j][1].len = 0; // imiari
pat[j][2].len = -1;
}
}
#endif
}
static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
{
// strcpy( r1, i1 );
// strcpy( r2, i2 );
// return; // not SP
while( *i1 )
{
if( *i1 == '-' && *i2 == '-' )
{
i1++;
i2++;
}
else
{
*r1++ = *i1++;
*r2++ = *i2++;
}
}
*r1 = 0;
*r2 = 0;
}
float naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
{
// return( 0 );
int i, j;
float val;
float valf;
int pv;
double deff;
char *p1, *p2, *p1p, *p2p;
val = 0.0;
for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
{
deff = eff1[i] * eff2[j];
// fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
// fprintf( stderr, "i1 = %s\n", seq1[i] );
// fprintf( stderr, "i2 = %s\n", seq2[j] );
// fprintf( stderr, "s1 = %s\n", s1 );
// fprintf( stderr, "s2 = %s\n", s2 );
// fprintf( stderr, "penal = %d\n", penal );
valf = 0;
p1 = seq1[i]; p2 = seq2[j];
pv = 0;
if( *p1 == '-' && *p2 != '-' )
pv = penal;
if( *p1 != '-' && *p2 == '-' )
pv = penal;
// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
p1p = p1; p2p = p2;
valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
while( *p1p )
{
pv = 0;
if( *p1p != '-' && *p2p != '-' )
{
if( *p1 == '-' && *p2 != '-' )
pv = penal;
if( *p1 != '-' && *p2 == '-' )
pv = penal;
if( *p1 != '-' && *p2 != '-' )
;
if( *p1 == '-' && *p2 == '-' )
;
}
if( *p1p == '-' && *p2p == '-' )
{
if( *p1 == '-' && *p2 != '-' )
pv = penal;
// ;
if( *p1 != '-' && *p2 == '-' )
pv = penal;
// ;
if( *p1 != '-' && *p2 != '-' )
;
if( *p1 == '-' && *p2 == '-' )
;
}
if( *p1p != '-' && *p2p == '-' )
{
if( *p1 == '-' && *p2 != '-' )
pv = penal * 2; // ??
// ;
if( *p1 != '-' && *p2 == '-' )
;
if( *p1 != '-' && *p2 != '-' )
pv = penal;
// ;
if( *p1 == '-' && *p2 == '-' )
pv = penal;
// ;
}
if( *p1p == '-' && *p2p != '-' )
{
if( *p1 == '-' && *p2 != '-' )
;
if( *p1 != '-' && *p2 == '-' )
pv = penal * 2; // ??
// ;
if( *p1 != '-' && *p2 != '-' )
pv = penal;
// ;
if( *p1 == '-' && *p2 == '-' )
pv = penal;
// ;
}
// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
p1p++; p2p++;
}
// fprintf( stderr, "valf = %d\n", valf );
val += deff * ( valf );
}
fprintf( stderr, "val = %f\n", val );
return( val );
// exit( 1 );
}
float naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
{
int i, j;
float val;
float valf;
int pv;
double deff;
char *p1, *p2, *p1p, *p2p;
return( 0 );
val = 0.0;
for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
{
deff = eff1[i] * eff2[j];
// fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
// fprintf( stderr, "i1 = %s\n", seq1[i] );
// fprintf( stderr, "i2 = %s\n", seq2[j] );
// fprintf( stderr, "s1 = %s\n", s1 );
// fprintf( stderr, "s2 = %s\n", s2 );
// fprintf( stderr, "penal = %d\n", penal );
valf = 0;
p1 = seq1[i]; p2 = seq2[j];
pv = 0;
if( *p1 == '-' && *p2 != '-' )
pv = penal;
if( *p1 != '-' && *p2 == '-' )
pv = penal;
// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
p1p = p1; p2p = p2;
valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
while( *p1p )
{
pv = 0;
if( *p1p != '-' && *p2p != '-' )
{
if( *p1 == '-' && *p2 != '-' )
pv = penal;
if( *p1 != '-' && *p2 == '-' )
pv = penal;
if( *p1 != '-' && *p2 != '-' )
;
if( *p1 == '-' && *p2 == '-' )
;
}
if( *p1p == '-' && *p2p == '-' )
{
if( *p1 == '-' && *p2 != '-' )
// pv = penal;
;
if( *p1 != '-' && *p2 == '-' )
// pv = penal;
;
if( *p1 != '-' && *p2 != '-' )
;
if( *p1 == '-' && *p2 == '-' )
;
}
if( *p1p != '-' && *p2p == '-' )
{
if( *p1 == '-' && *p2 != '-' )
pv = penal * 2; // ??
// ;
if( *p1 != '-' && *p2 == '-' )
;
if( *p1 != '-' && *p2 != '-' )
pv = penal;
// ;
if( *p1 == '-' && *p2 == '-' )
// pv = penal;
;
}
if( *p1p == '-' && *p2p != '-' )
{
if( *p1 == '-' && *p2 != '-' )
;
if( *p1 != '-' && *p2 == '-' )
pv = penal * 2; // ??
// ;
if( *p1 != '-' && *p2 != '-' )
pv = penal;
// ;
if( *p1 == '-' && *p2 == '-' )
// pv = penal;
;
}
// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
p1p++; p2p++;
}
// fprintf( stderr, "valf = %d\n", valf );
val += deff * ( valf );
}
fprintf( stderr, "val = %f\n", val );
return( val );
// exit( 1 );
}
float naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
{
int i, j;
float val;
float valf;
int pv;
// float feff = 0.0; // by D.Mathog, a guess
double deff;
char *p1, *p2, *p1p, *p2p;
val = 0.0;
for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
{
deff = eff1[i] * eff2[j];
// fprintf( stderr, "i1 = %s\n", seq1[i] );
// fprintf( stderr, "i2 = %s\n", seq2[j] );
// fprintf( stderr, "s1 = %s\n", s1 );
// fprintf( stderr, "s2 = %s\n", s2 );
// fprintf( stderr, "penal = %d\n", penal );
valf = 0;
p1 = seq1[i]; p2 = seq2[j];
pv = 0;
if( *p1 == '-' && *p2 != '-' )
pv = penal;
if( *p1 != '-' && *p2 == '-' )
pv = penal;
if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
p1p = p1; p2p = p2;
valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
while( *p1p )
{
pv = 0;
if( *p1p != '-' && *p2p != '-' )
{
if( *p1 == '-' && *p2 != '-' )
pv = penal;
if( *p1 != '-' && *p2 == '-' )
pv = penal;
if( *p1 != '-' && *p2 != '-' )
;
if( *p1 == '-' && *p2 == '-' )
;
}
if( *p1p == '-' && *p2p == '-' )
{
if( *p1 == '-' && *p2 != '-' )
// pv = penal;
;
if( *p1 != '-' && *p2 == '-' )
// pv = penal;
;
if( *p1 != '-' && *p2 != '-' )
;
if( *p1 == '-' && *p2 == '-' )
;
}
if( *p1p != '-' && *p2p == '-' )
{
if( *p1 == '-' && *p2 != '-' )
// pv = penal;
;
if( *p1 != '-' && *p2 == '-' )
;
if( *p1 != '-' && *p2 != '-' )
pv = penal;
if( *p1 == '-' && *p2 == '-' )
// pv = penal;
;
}
if( *p1p == '-' && *p2p != '-' )
{
if( *p1 == '-' && *p2 != '-' )
;
if( *p1 != '-' && *p2 == '-' )
// pv = penal;
;
if( *p1 != '-' && *p2 != '-' )
pv = penal;
if( *p1 == '-' && *p2 == '-' )
// pv = penal;
;
}
// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
p1p++; p2p++;
}
// fprintf( stderr, "valf = %d\n", valf );
val += deff * ( valf );
}
fprintf( stderr, "val = %f\n", val );
return( val );
// exit( 1 );
}
int naivepairscore11( char *seq1, char *seq2, int penal )
{
int vali;
int len = strlen( seq1 );
char *s1, *s2, *p1, *p2;
s1 = calloc( len+1, sizeof( char ) );
s2 = calloc( len+1, sizeof( char ) );
{
vali = 0;
commongappickpair( s1, s2, seq1, seq2 );
// fprintf( stderr, "i1 = %s\n", seq1[i] );
// fprintf( stderr, "i2 = %s\n", seq2[j] );
// fprintf( stderr, "s1 = %s\n", s1 );
// fprintf( stderr, "s2 = %s\n", s2 );
// fprintf( stderr, "penal = %d\n", penal );
p1 = s1; p2 = s2;
while( *p1 )
{
if( *p1 == '-' )
{
// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
vali += penal;
// while( *p1 == '-' || *p2 == '-' )
while( *p1 == '-' ) // SP
{
p1++;
p2++;
}
continue;
}
if( *p2 == '-' )
{
// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
vali += penal;
// while( *p2 == '-' || *p1 == '-' )
while( *p2 == '-' ) // SP
{
p1++;
p2++;
}
continue;
}
// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
vali += amino_dis[(int)*p1++][(int)*p2++];
}
}
free( s1 );
free( s2 );
// fprintf( stderr, "vali = %d\n", vali );
return( vali );
// exit( 1 );
}
float naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
{
// return( 0.0 );
int i, j;
float val;
int vali;
float feff;
int len = strlen( seq1[0] );
char *s1, *s2, *p1, *p2;
s1 = calloc( len+1, sizeof( char ) );
s2 = calloc( len+1, sizeof( char ) );
val = 0.0;
for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
{
vali = 0;
feff = eff1[i] * eff2[j];
// fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
commongappickpair( s1, s2, seq1[i], seq2[j] );
// fprintf( stderr, "i1 = %s\n", seq1[i] );
// fprintf( stderr, "i2 = %s\n", seq2[j] );
// fprintf( stderr, "s1 = %s\n", s1 );
// fprintf( stderr, "s2 = %s\n", s2 );
// fprintf( stderr, "penal = %d\n", penal );
p1 = s1; p2 = s2;
while( *p1 )
{
if( *p1 == '-' )
{
// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
vali += penal;
// while( *p1 == '-' || *p2 == '-' )
while( *p1 == '-' ) // SP
{
p1++;
p2++;
}
continue;
}
if( *p2 == '-' )
{
// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
vali += penal;
// while( *p2 == '-' || *p1 == '-' )
while( *p2 == '-' ) // SP
{
p1++;
p2++;
}
continue;
}
// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
vali += amino_dis[(int)*p1++][(int)*p2++];
}
// fprintf( stderr, "vali = %d\n", vali );
val += feff * vali;
}
free( s1 );
free( s2 );
fprintf( stderr, "val = %f\n", val );
return( val );
// exit( 1 );
}