/* This code was modified from PHYLIP sources */ #include "mynhmlg.h" #define nmlngth 10 /* for phylip input sequence formats, number of characters in species name */ #define getch gettch FILE *infile; char gettc(FILE* file) { /* catch eof's so that other functions not expecting an eof * won't have to worry about it */ int ch; ch=getc(file); if (ch == EOF ) { puts("Unexpected End of File"); exit(-1); } if ( ch == '\r' ) { ch = getc(file); if ( ch != '\n' ) ungetc(ch,file); ch = '\n'; } return ch; } /* gettc */ void uppercase(char *ch) { /* convert ch to upper case */ *ch = (islower (*ch) ? toupper(*ch) : (*ch)); } /* uppercase */ int eoff(FILE *f) { /* check for end of file */ int ch; if (feof(f)) return TRUE; ch = getc(f); if (ch == EOF) { ungetc(ch, f); return TRUE; } ungetc(ch, f); return FALSE; } /*eoff*/ int eoln(FILE *f) { /* check for end of line or eof*/ register int ch; ch = getc(f); if (ch == EOF) return TRUE; ungetc(ch, f); return ((ch == '\n') || (ch == '\r')); } /*eoln*/ void scan_eoln(FILE *f) { /* eat everything to the end of line or eof*/ char ch; while (!eoff(f) && !eoln(f)) gettc(f); if (!eoff(f)) ch = getc(f); } void initname(long i, char **seqname) { /* read in species name */ long j; char tempch; for (j = 0; j < nmlngth; j++) { if (eoff(infile) | eoln(infile)){ printf("\n\nERROR: end-of-line or end-of-file"); printf(" in the middle of species name for species %ld\n\n", i+1); exit(-1); } tempch = gettc(infile); if (tempch!=' ') seqname[i][j]=tempch; // seqname[i][j] = gettc(infile); if ((seqname[i][j] == '(') || (seqname[i][j] == ')') || (seqname[i][j] == ':') || (seqname[i][j] == ',') || (seqname[i][j] == ';') || (seqname[i][j] == '[') || (seqname[i][j] == ']')) { printf("\nERROR: Species name may not contain characters ( ) : ; , [ ] \n"); printf(" In name of species number %ld there is character %c\n\n", i+1, seqname[i][j]); exit(-1); } } seqname[i][++j]='\0'; } /* initname */ // read a phylip format aligned sequence file and get the names and sequences for each species int readSequencefile(char *filename, char ***seqp, char ***seqnamep, int interleaved) { int spp, seqLen, allread, done; char **seq, **seqname; char line[256]; long i, j, basesread, basesnew; char charstate; if( (infile=fopen(filename,"r")) == NULL) { fprintf(stderr,"File not found:%s\n",filename); return 0; } fgets(line,256,infile); sscanf(line,"%d%d",&spp,&seqLen); *seqnamep = (char **) check_alloc(spp,sizeof(char*)); *seqp = (char **) check_alloc(spp,sizeof(char*)); seqname = *seqnamep; seq = *seqp; for (i=0;i= '0' && charstate <= '9')) continue; uppercase(&charstate); if ((strchr("ABCDEFGHIKLMNPQRSTVWXYZ*?-", charstate)) == NULL) { printf("ERROR: bad amino acid: %c at position %ld of species %ld. Check sequence format.\n", charstate, j+1, i); if (charstate == '.') { printf(" Periods (.) may not be used as gap characters.\n"); printf(" The correct gap character is (-)\n"); } exit(-1); } j++; seq[i - 1][j - 1] = charstate; } if (interleaved) continue; if (j < seqLen) scan_eoln(infile); else if (j == seqLen) done = TRUE; } if (interleaved && i == 1) basesnew = j; scan_eoln(infile); if ((interleaved && j != basesnew) || (!interleaved && j != seqLen)) { printf("ERROR: SEQUENCES OUT OF ALIGNMENT AT POSITION %ld. Check sequence format.\n", j); exit(-1); } i++; } if (interleaved) { basesread = basesnew; allread = (basesread == seqLen); } else allread = (i > spp); } /* print the alignment to an outfile, not useful for (i = 1; i <= ((seqLen - 1) / 60 + 1); i++) { for (j = 1; j <= spp; j++) { for (k = 0; k < nmlngth; k++) putc(seqname[j - 1][k], outfile); fprintf(outfile, " "); l = i * 60; if (l > seqLen) l = seqLen; for (k = (i - 1) * 60 + 1; k <= l; k++) { if (j > 1 && seq[j - 1][k - 1] == seq[0][k - 1]) charstate = '.'; else charstate = seq[j - 1][k - 1]; putc(charstate, outfile); if (k % 10 == 0 && k % 60 != 0) putc(' ', outfile); } putc('\n', outfile); } putc('\n', outfile); } putc('\n', outfile); */ return (spp); } /* read infile */