/* PATTERN

   This program looks for a pattern of motifs defined by the user in the
   sequence(s) contained in the database file specified.
 
   The program can be invoked either by use of command line parameters or
   in an interactive mode when the user is prompted for replies.
   Help on the command line parameters can be obtained by typing:
 
       PATTERN HELP or PATTERN ?.
 
   The program is written in standard ANSI C.  In this source file conditional
   compliation for the Borland Turbo C complier (IBM PC) or the
   Waterloo compiler for an IBM 3090 mainframe can be selected by setting
   the appropriate define to TRUE and the other to FALSE.  Most constants
   set by the conditional compliation are concerned with the underlying
   file naming conventions on the different systems and with the amount of
   memory available.  The code is written to be able to run on either
   ASCII or EBCDIC encoded machines.
 
   Please acknowledge the original description of this program in any
   publications:
 
       Cockwell & Giles (1989) Comput Applic Biosci  5, 227-232
*/

/*  9.11.89:  Ported to VAX C by R. Fuchs (FUCHS@EMBL). Uses Curses, so make
              sure that LNK$LIBRARY is defined as SYS$LIBRARY:VAXCCURSE and
              LNK$LIBRARY_1 as SYS$LIBRARY:VAXCRTL.  */
 
#include <stdio.h>
#include <string.h>
#include <ctype.h>
 
#define TRUE 1
#define FALSE 0
 
#define waterloo FALSE
#define turboc FALSE
#define VAX TRUE

#define MAXQ 50
#define ARRSIZE 256
#define MAXMATRIX 30
#define MAXLINE 128

#if (turboc)
#define MAXSEQ 10000
#define SCORE 1000
#define LIMIT 4
#define ERRLOG "CON"
#define RESFILE "PATTERN.OUT"
#define SCANNAME "SCA"
#define FILENAMELEN 80
#include <conio.h>
#define cls clrscr
#define index strchr
#endif

#if (waterloo)
#define SCORE 1500
#define LIMIT 6
#define ERRLOG "TERMINAL"
#define RESFILE "PATTERN OUTPUT A"
#define SCANNAME "SCANFILE"
#define FILENAMELEN 20
#include <stdlib.h>
#include <file.h>
#define MAXSEQ 200000
#define cls clear
#endif

#if (VAX)
#include <curses.h>
#define SCORE 1500
#define LIMIT 6
#define RESFILE "PATTERN.OUT"
#define ERRLOG "screen"
#define SCANNAME "SCA"
#define MAXSEQ  200000
#define FILENAMELEN 80
#define cls clear
#define index strchr
#endif
 
int   dlength, matches[LIMIT][SCORE], qlength[LIMIT];
char  setmatrix[LIMIT][ARRSIZE][MAXMATRIX];
int   max[LIMIT-1], min[LIMIT-1];
int   left, right;
int   total, totalseq, sum;
int   totmotif, minmotif, maxmotif;
char  qseq[LIMIT][MAXQ], *dseq, uptable[256];
char  rname[FILENAMELEN], lname[FILENAMELEN], ename[FILENAMELEN];
char  dbname[71];
FILE  *rfil, *lfil, *efil;
 
main(argc,argv)
   int argc;
   char *argv[];
{
    int i, count, test;
    char *calloc();

#if (VAX)
    initscr();
#endif 
    left = right = 0;
    settranstable();
/*  get input parameters and find number of motifs in pattern definition
*/
    if(argc >1) count = getnames(argc,argv);
    else count = interractive();
 
    test = databasecheck(lname);
    openfiles(count);
    totmotif = count;
 
    checkminmax(count);
 
    for(i=0;i<count;i++){             /* set lookup setmatrixs and */
         if(right==1){
              printf("Right end indicator '.' must be at right end!!");
               exit(0);
               }
         iub(i);
        qlength[i] =  makesetmatrix(i);  /* get lengths of all qseqs */
         }
 
     minmotif = maxmotif = qlength[count];
     for(i=0;i<count;i++){
         minmotif = minmotif + min[i] + qlength[i];
         maxmotif = maxmotif + max[i] + qlength[i];
         }
 
    checkseq(count);
 
    sum = qlength[count-1];
    for(i=0;i<count-1;i++)
         sum = sum  + min[i] +qlength[i+1];
 
    if((dseq = calloc(MAXSEQ,sizeof(char)))==0){
         fprintf(efil,"ERROR: cannot allocate sequence array.\n");
         exit(0);
         }
 
    printf("Executing ....\n");
 
    if(test == 1) /* scan database using fast read routines */
      getdbseq(count,test);
    else          /* scan database using the GETSEQ routine */
      getdata(count);
 
    if (efil!=stdout)
      fclose(efil);
    if(rfil!=stdout)
      fclose(rfil);
    fclose(lfil);
    free(dseq);

#if (VAX)
    endwin();
#endif

    return(0);
}
 
interractive()
{
    int i, j, qcount;
    char cut[5], temp[5];
    *lname = *temp = *cut = 0;
 
    cls();
    printf("PATTERN\n~~~~~~~\n");
    printf("  A utility to search for a pattern of motifs within a \
sequence.\n\n");
    qcount = 1;
 
    while(*qseq[0] == 0){
         printf("Motif %d ?: ",qcount);
         gets(qseq[0]);
         }
    while(*temp == 0){
         printf("Minimum gap value %d ?: ",qcount);
         gets(temp);
         for(i=0;i<strlen(temp);i++)
              if(isdigit(temp[i]) == 0) *temp = 0;
         }
     sscanf(temp,"%d",&min[0]);
 
     while(*cut == 0){
         printf("Maximum gap value %d ?: ",qcount);
         gets(cut);
         for(i=0;i<strlen(cut);i++)
             if(isdigit(cut[i]) == 0) *cut = 0;
         }
     sscanf(cut,"%d",&max[0]);
 
     qcount++;
     while(*qseq[1] == 0){
         printf("Motif %d ?: ",qcount);
         gets(qseq[1]);
         }
 
     for(i=2;i<LIMIT;i++){
         *temp = *cut = 0;
          printf("Minimum gap value %d ?: ",i);
          gets(temp);
          sscanf(temp,"%d",&min[i-1]);
          if(min[i-1] == 0) break;
          qcount++;
          while(*cut == 0){
              printf("Maximum gap value %d ?: ",i);
              gets(cut);
              for(j=0;j<strlen(cut);j++)
                   if(isdigit(cut[j]) == 0) *cut = 0;
              }
         sscanf(cut,"%d",&max[i-1]);
         while(*qseq[i] == 0){
              printf("Motif %d ?: ",i+1);
              gets(qseq[i]);
              }
         }
 
    while (*lname == 0){
         printf("Library file name ?: ");
         gets(lname);
         }
    printf("Results file name (Default: %s) ?: ",RESFILE);
    gets(rname);
    if (*rname == 0) strcpy(rname,RESFILE);
    printf("File to receive error messages (Default: %s (screen)) ?: "
    ,ERRLOG);
    gets(ename);
    if (*ename == 0) strcpy(ename,ERRLOG);
 
    for(i=0;i<qcount;i++)
         qlength[i] = strlen(qseq[i]);
 
    return(qcount);
}
 
/* getnames needs at least 5 arguments;  2*qseq, min gap, max gap
and a database file name.  If less arguments are passed in, a helpscreen
is given */
 
getnames(argc,argv)
int argc;
char *argv[];
{
    int i ,j, k, qcount, gapcount;
    char key, arg[256], *eq;
    char temp[5];
 
    i = 1;
    qcount = 0;                         /* count for qseqs and gap values */
    gapcount = 0;
    strcpy(rname,RESFILE);     /* set default results file name */
    strcpy(ename,ERRLOG);
 
    if (argc < 4) helpscreen();
    else if (argc > 3) {
         for(i=1;i<argc-3;i=i+2){
              strcpy(arg, argv[i]);
              if(!strchr(argv[i+1],',')) break;
              strcpy(qseq[qcount++],arg);
              strcpy(arg,argv[i+1]);
              eq = strchr(arg,',');
              sscanf(eq+1,"%d",&max[gapcount++]);
         for(j=0;j<5;j++)
              temp[j] = ' ';
              for(j=0;j<(strlen(arg)-strlen(eq));j++){
                   if(j>4) {
                        printf("ERROR: minimum gap too large.\n");
                        exit(0);
                        }
                   if(((int)arg[j] >= (int)'0')
                   &&((int)arg[j] <= (int)'9'))
                   temp[j] = arg[j];
                   else break;
                   }
              sscanf(temp,"%d",&min[gapcount-1]);
              }
         strcpy(qseq[qcount++],argv[i++]);
 
         while( i < argc) {
              strcpy(arg, argv[i]);
                   eq = strchr(arg,'=');
                   strncpy(&key,arg,1);
                  key = uptable[key];
                   switch (key) {
 
                   case 'O':             /* get results file */
                   strcpy(rname, eq+1 );
                   for(k = 1;k <= 2; k++) {
                        if(index(argv[i+1], '=')) break;
                        if(i+1 >= argc) break;
                        else {
                             strcpy( arg, argv[i + 1]);
                             i++;
                             strcat(rname," ");
                             strcat(rname, arg);
                             }
                        }
                   break;
 
                  case 'E':             /* get error log file */
                   strcpy(ename, eq+1 );
                   for(k = 1;k <= 2; k++) {
                        if(i+1 >= argc) break;
                        else {
                             strcpy( arg, argv[i + 1]);
                             i++;
                             strcat(ename," ");
                             strcat(ename, arg);
                             }
                        }
                   break;
 
                  case 'S':             /* get sequence file */
                   strcpy(lname, eq+1 );
                   for(k = 1;k <= 2; k++) {
                        if(index(argv[i+1], '=')) break;
                        if(i+1 >= argc) break;
                        else {
                             strcpy( arg, argv[i + 1]);
                             i++;
                             strcat(lname," ");
                             strcat(lname, arg);
                             }
                        }
                    break;
 
              default:
                  printf(" WARNING: Unknown argument %s \n",argv[i]);
              }
              i++;
         }
    }
 
           /* set initial length of query sequence */
    for(i=0;i<qcount;i++)
         qlength[i] = strlen(qseq[i]);
    if(qcount != gapcount+1) {
         printf("ERROR: unequal arguments.\n");
         exit(0);
         }
   return(qcount);   /* return number of query sequences */
}
 
/* This routine sets up a translation table, used when reading sequence data,
   that forces all lower case chars to upper case, and U to T.  This routine
   does not rely on ASCII character encoding
*/
settranstable()
{
    int i;
    char high[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ*-@";
    char low[]  = "abcdefghijklmnopqrstuvwxyz*-@";
 
    for(i=0;i<256;i++)
         uptable[i] = 0;
    for(i=0;i<29;i++)
       uptable[high[i]] = uptable[low[i]] = (char)high[i];
    uptable['u'] = uptable['U'] = (char)'T';
}
 
/* This sets up translation used in defining motif for lower case letters
   which are used as the IUB abbreviations for nucleotides.  This routine
   effectively translates lower case IUB symbols into our motif definition
   format; eg r become [AG].
*/
iub(q)
  int q;
{
    int i, len;
    char qtemp[MAXMATRIX];
 
    char *r = "[AG]";
    char *y = "[TC]";
    char *m = "[AG]";
    char *k = "[TC]";
    char *s = "[CG]";
    char *w = "[AT]";
    char *h = "[ACT]";
    char *b = "[CGT]";
    char *v = "[ACT]";
    char *d = "[AGT]";
 
    *qtemp = 0;
    len = strlen(qseq[q]);
    for(i=0;i<len;i++){
         if(islower(qseq[q][i]) == 0) strncat(qtemp,qseq[q]+i,1);
         else switch(qseq[q][i]) {
 
              case 'r':
                   strcat(qtemp,r);
                   break;
 
              case 'y':
                   strcat(qtemp,y);
                   break;
 
              case 'm':
                   strcat(qtemp,m);
                   break;
 
              case 'k':
                   strcat(qtemp,k);
                   break;
 
              case 's':
                   strcat(qtemp,s);
                   break;
 
              case 'w':
                   strcat(qtemp,w);
                   break;
 
              case 'h':
                   strcat(qtemp,h);
                   break;
 
              case 'b':
                   strcat(qtemp,b);
                   break;
 
              case 'v':
                   strcat(qtemp,v);
                   break;
 
              case 'd':
                   strcat(qtemp,d);
                   break;
 
              case 'n':
                   strcat(qtemp,"X");
                   break;
 
              case 'x':
                   strcat(qtemp,"X");
                   break;
 
              default :
                   fprintf(efil,"ERROR: unrecognised IUB symbol %c.\n",qseq[i]);
                   exit(0);
                   }
        }
    strcpy(qseq[q],qtemp);
    qlength[q] = strlen(qseq[q]);
}
 
checkminmax(qcount)
  int qcount;
{
    int i;
 /*  from zero; check that max > min */
    for(i=0;i<qcount-1;i++) {
         if(max[i] < min[i]) {
              fprintf(efil,"ERROR: maximum gap %d less than minimum gap %d.\n",
              max[i],min[i]);
              exit(0);
              }
        if ((min[i] <=0 )||(max[i] <=0 )){
             fprintf(efil,"ERROR: gap values must be greater than 0.\n");
              exit(0);
              }
        if ((min[i] >1000 )||(max[i] >1000 )){
             fprintf(efil,"ERROR: gap values must be less than 1000.\n");
              exit(0);
              }
         }
 
    for(i=0;i<qcount;i++)
         if(strlen(qseq[i]) >= MAXQ) {
              fprintf(efil,"ERROR: motif %s too long.\n",qseq[i]);
              exit(0);
              }
}
 
checkseq(qcount)
    int qcount;
{
    int i;
    for(i=0;i<qcount;i++)
         if(qlength[i] >= MAXMATRIX){
              fprintf(efil,"ERROR: motif %s too long.\n",qseq[i]);
              exit(0);
              }
}
 
openfiles(count)
int count;
{
    int i;
 
    if(strcmp(ename,"screen")) {
        efil = fopen(ename,"w");
        if (efil == 0) {
            printf("ERROR: cannot open error log file %s\n",ename);
            exit(0);
            }
        }
    else
        efil = stdout;
 
   if(strcmp(rname,"screen")) {
        rfil = fopen(rname,"w");
        if (rfil == 0) {
            fprintf(efil,"ERROR: cannot open results file %s \n",rname);
            exit(0);
            }
        }
    else
        rfil = stdout;
 
    lfil = fopen(lname,"r");
    if (lfil == 0) {
         fprintf(efil,"ERROR: cannot open sequence file %s.\n",lname);
         exit(0);
        }
#ifndef waterloo
    if (setvbuf(lfil, NULL, _IOFBF, 32767) != 0) {
       fprintf(efil,"WARNING: insufficient memory for large database buffer\n");
       }
#endif
 
    for(i=0;i<count;i++)
         fprintf(rfil,"Motif %d = %s\n",i+1,qseq[i]);
    for(i=0;i<count-1;i++) {
         fprintf(rfil,"Minimum gap %d = %d\n",i+1,min[i]);
         fprintf(rfil,"Maximum gap %d = %d \n",i+1,max[i]);
         }
    fprintf(rfil,"File to be searched = %s\n",lname);
    fprintf(rfil,"Results file = %s\n\n",rname);
}
 
/* construct set matrix for each motif */
makesetmatrix(num)
int num;             /* counter for number of qseqs */
{
    int i, j, k, bracket, count, not;
    char temp[MAXQ];
 
    count = 0;       /* counter for position in qseq */
    bracket = 0;     /* flag for bracket (0 = off) */
    not = 0;         /* flag for not (0 = off) */
 
    for(i = 0;i<qlength[num];i++)
         for(j=0;j<ARRSIZE;j++)
              setmatrix[num][j][i] = 0;   /* set whole setmatrix to zero */
 
    if((num==0)&&(qseq[num][0]=='.')){
         left = 1;       /* left end marker */
         strcpy(temp,qseq[num]+1);
         *qseq[num] = 0;
         strcpy(qseq[num],temp);
         qlength[num]--;
         }
    else if((num!=0)&&(qseq[num][0]=='.')){
         printf("Left hand indicator '.' must be at left end!");
         exit(0);
         }
 
    if (qseq[num][strlen(qseq[num])-1]=='.'){
         right = 1;       /* right end marker */
         strncpy(temp,qseq[num],strlen(qseq[num])-1);
         temp[strlen(qseq[num])-1] = '\0';
         *qseq[num] = 0;
         strcpy(qseq[num],temp);
         qlength[num]--;
         }
 
    for(i=0; i<qlength[num]; i++) {   /* set values in qseq to 1 */
         if (qseq[num][i] == '^') {
              not = 1;
              continue;
              }
         if (qseq[num][i] == '[') {  /* if a bracket occurs, do not */
              bracket = 1;           /* increment the counter */
              continue;
              }
         if (qseq[num][i] ==']') {
              bracket = 0;
              if ((bracket == 0) &&(not == 1))
                   not = donot(not,count+1,num);
              count++;
              continue;
              }
        if((qseq[num][i] == 'X')||(qseq[num][i] == '-')) {
              for(k=0;k<ARRSIZE;k++)
                   setmatrix[num][k][count] = 1;
              }
         j = (int)qseq[num][i];
         if (bracket == 0) {
              setmatrix[num][j][count] = 1;
              count++;
              }
         if (bracket  == 1) {
              setmatrix[num][j][count] = 1;
              continue;
              }
         if (not == 1) not = donot(not,count,num);
         }
/* check bracket not left open */
    if(bracket == 1) {
         fprintf(efil,"ERROR: bracket not closed in %s\n",qseq[num]);
         exit(0);
         }
        return(count);
}
 
donot(not,count,num)      /* inverts a column of the setmatrix */
int not, count, num;
{
    int k;
 
     for (k=0;k<ARRSIZE;k++) {
         if (setmatrix[num][k][count-1] == 0) setmatrix[num][k][count-1] = 1;
         else if (setmatrix[num][k][count-1] == 1)
              setmatrix[num][k][count-1] = 0;
         }
    not = 0;
    return(not);
}
 
databasecheck(name)
   char name[];
{
    int i, test;
    char *type;
 
    test = 0;
 
    type = strchr(name,' ');
    if (type == 0) type = strchr(name,'.');
    if (type == 0) return(0);
 
    for(i=1;i<=strlen(SCANNAME);i++)
         type[i] = uptable[type[i]];
 
    if(strncmp(type+1,SCANNAME,strlen(SCANNAME)) == 0) test++;
 
    return(test);
}
 
getdbseq(count,test)
int count, test;
{
/* case for scanfile format database */
    int i, flag;
    char string[MAXLINE];
 
    total = 0;
    fgets(string,MAXLINE,lfil);     /* read in first name */
    if (string[0] == '>') strncpy(dbname,string,70);
    else {
         fprintf(efil,"Warning: SCANFILE in unrecognised format\n");
         test = 0;
         }
    if(test == 0) return(0);
    while ((dlength = getlib(string)) != 0) {
         if (dlength >= sum) compare(0);
         for(i=1;i<count;i++)
              if (matches[i-1][0] !=0) compare(i);
 
    /* flag indicates matches have occured for that query seqence
       only call subtract if all query sequences have matches
       and the database sequence is longer than the query sequence
       (sum) */
 
          flag = 0;
          for(i=0;i<count;i++)
               if(matches[i][0] != 0) flag++;
                    if(flag == count) subtract(count,dbname);
          }
    if (total == 0) fprintf(rfil,"no matches found\n");
    else fprintf(rfil,"\ntotal of %d matches in %d sequences\n"
           ,total,totalseq);
}
 
getdata(count)
   int count;
{
    int i, flag;
 
    total = 0;
 
    while ((dlength = getseq(dbname)) != 0) {
         if (dlength >= sum) compare(0);
         for(i=1;i<count;i++)
              if (matches[i-1][0] != 0) compare(i);
         flag = 0;
         for(i=0;i<count;i++)
              if(matches[i][0] != 0) flag++;
              if(flag == count) subtract(count,dbname);
         }
    if (total == 0) fprintf(rfil,"no matches found\n");
    else fprintf(rfil,"\ntotal of %d matches in %d sequences\n"
           ,total,totalseq);
}
 
/* This routine is called if file is a SCANFILE, and so no checking of file
   format is performed
*/
getlib(string)              /* read in library sequences */
   char string[];
{
    int i, n, len, stop;
 
    n = stop = 0;
    if (string[0] == '>') strncpy(dbname,string,70);
    if(*dbname != 0)
         if(strlen(dbname) >= 70) dbname[70] = '\n';
    while(fgets(string,MAXLINE,lfil) != 0) {
         if (string[0] == '>') {
              break;
             }
        if(stop == 1) break;
        len = strlen(string)-1;
        if(((len+n)>=MAXSEQ)&&(stop == 0)){
  fprintf(efil,"WARNING: sequence over %d long; truncated.\n",MAXSEQ)
 ;
             len = MAXSEQ - n;
             stop = 1;
             }
        for(i=0;i<len;i++){
             if(string[i] == ' ') break;
             dseq[n++] = string[i];
             }
         }
    return(n);
}
 
/* This routine will read in one sequence from a database file.  The
   sequence can be in any of the supported formats.
*/
getseq(dbname)
    char dbname[];
{
    int i, j, len;
    int embl, genbank, staden, fastp, codata;
    char temp[MAXLINE], split;
    int offset;
 
    embl = genbank = staden = fastp = codata = FALSE;
    offset = j = 0;
 
    if(fgets(temp,MAXLINE,lfil) == 0) return(0);
    if ((temp[0] == '<')&&(temp[19] == '>')) staden = TRUE;
    else if(strncmp(temp,"ID   ",5) == 0) embl = TRUE;
    else if(strncmp(temp,"LOCUS     ",10) == 0) genbank = TRUE;
    else if(strncmp(temp,"ENTRY",5) == 0) codata = TRUE;
    else if (temp[0] == '>') fastp = TRUE;
 
    if (embl){
        strncpy(dbname,temp+5,70);
         while(temp[0] != ' ')
              fgets(temp,MAXLINE,lfil);
         }
 
    if (genbank){
         while(strncmp(temp,"ORIGIN",6) !=0){
              fgets(temp,MAXLINE,lfil);
              if(strncmp(temp,"DEFINITION",10) == 0)
              strncpy(dbname,temp+12,70);
              }
         fgets(temp,MAXLINE,lfil);
         }
 
    if(codata){
         strncpy(dbname,temp+6,70);
         while(strncmp(temp,"SEQUENCE",8) != 0)
              fgets(temp,MAXLINE,lfil);
         fgets(temp,MAXLINE,lfil);
         }
 
    if (fastp){
         strncpy(dbname,temp+1,70 );
         fgets(temp,MAXLINE,lfil);
         }
 
    if(staden){
         strncpy(dbname,temp+1,18);
         offset = 20;
         }
 
    if(*dbname != 0)
        if(strlen(dbname) >= 70) dbname[70] = '\n';
 
    do{
        if(strncmp(temp,"//",2) == 0) break;
        len = strlen(temp);
        for(i=offset;i<len;i++){
             if(j==MAXSEQ) break;
              split = uptable[temp[i]];
             if(split == '@'){
                 while(fgets(temp,MAXLINE,lfil) != 0) ;
                 return (j);
                 }
             if((isalpha(split)!=FALSE)||(split == '*')||(split == '-'))
                        dseq[j++] = split;
              else if(temp[i] == '\n')break;
              }
         if(staden) offset = 0;
         if(fgets(temp,MAXLINE,lfil) == 0) break;
         }
    while (TRUE);
 
   if(j==MAXSEQ)
       fprintf(efil,"\nWARNING: sequence %s over %d long; truncated\n",
       dbname,MAXSEQ);
 
    return(j);
}
 
compare(q)
   int q;
{
    int i, j, k, total, count, multi, n, postemp;
    int stop;
 
    multi = count = 0;
    for(i=1;i<=matches[0][0];i++)
       matches[q][i] = 0;
    matches[q][0] = 0;
 
    postemp = matches[0][0];
 
    stop = dlength - minmotif + 1;
    if(q==0) for(i=0;i<stop;i++) {
         if((left==1)&&(i==1)) break;
        if((right==1)&&(i==0)) i = dlength-maxmotif;
         for(j=0,total=0;j<qlength[q];j++) {
              total = total + setmatrix[q][(int)dseq[i+j]][j];
              if (total <= j){   /* break out as soon as an amino*/
                   break;        /* acid does not match */
                   }
              }
  /* if all aa's match record a match in matches; matches[x][0]
     contains the number of matches in matches[x]       */
 
        if(total == qlength[q]) {
              count++;
              if(matches[q][0] >= SCORE) {
                   fprintf(efil,"WARNING matches overflow.\n");
                   return;
                   }
              matches[q][0] = matches[q][0] + 1;
              matches[q][count] = i+1;   /* offset by one to count from
                                       one instead of zero */
              }
         }
 
   /* add one to total for each match, as soon as a match does not occur
    break out of the loop */
 
    else if(q>0){
        count = matches[0][0];
        for(i=0;i<count;i++){
            multi = 0;
            stop = matches[q-1][i+1]+max[q-1]+qlength[q-1]-1;
           for(k=matches[q-1][i+1]+min[q-1]+qlength[q-1]-1; k<=stop ;k++){
                 if(k>dlength) break;
                if(matches[q-1][i+1] <= 0) break;
                 for(j=0,total=0;j<qlength[q];j++) {
                        total = total + setmatrix[q][(int)dseq[k+j]][j];
                        if (total <= j) break;
                        }
                   if(total == qlength[q]) {
                       if(multi>0){
                            postemp++;
                            for(n=0;n<q;n++)
                                 matches[n][postemp] = matches[n][i+1];
                           matches[q][0]++;
                            matches[q][postemp] = k+1;
                           }
                        else{
                           matches[q][0] = matches[q][0] + 1;
                            matches[q][i+1] = k+1;
                            multi++;
                           }
      /* offset by one to count from one instead of zero */
      /* multi checks for more than one match to last motif */
                       }
                  }
              }
          matches[0][0] = postemp;
          }
}
 
subtract(q,dbname)
int q;
char dbname[];
{
    char *calloc();
    int i, k, j, next;
 
    next = 0;
 
    for(j=0;j<matches[0][0];j++){
         if (matches[q-1][j+1] > 0) {
              next++;
   /* print out matched sequence name */
              if(next == 1) {
                   fprintf(rfil,"\n%s",dbname);
                   totalseq++;
                   }
              total++;
              fprintf(rfil,"match at;");
              for(i=0;i<q;i++)
                   fprintf(rfil,"%d ",matches[i][j+1]);
              fprintf(rfil,"\n");
 
   /* print out matched sequence */
              for(i=matches[0][j+1];i<(matches[q-1][j+1]
              +qlength[q-1]);i++)
                   fprintf(rfil,"%c",dseq[i-1]);
   /* allow for offset of numbers in match arrays */
              fprintf(rfil,"\n");
 
   /* underline matched amino acids */
              for(k=0;k<q;k++) {
                   for(i=matches[k][j+1];i<(matches[k][j+1]
                   + qlength[k]);i++)
                        fprintf(rfil,"~");
                   if (k == q-1) break;
                   for(i=0;i<matches[k+1][j+1]
                   -matches[k][j+1]-qlength[k];i++)
                         fprintf(rfil," ");
                   }
              fprintf(rfil,"\n");
              }
         }
}
 
helpscreen()
{
   cls();
    printf("PATTERN\n~~~~~~~\n");
    printf("   A utility to search for a pattern of motifs \
within a sequence or set of\n");
    printf("sequences.  The motifs are defined as single \
elements; ABCD, or ");
    printf("bracketed\nelements AB[DEF]C, where the third element \
can be one of D,");
    printf(" E or F.  A\nclaret (^) before an element signifies 'not'.");
    printf("  The minimum and maximum gap\nvalues for each pair of\
 query motifs must be ");
    printf("given between the pair of motifs,\nseparated by a \
comma (eg 5,7).");
#if (waterloo)
    printf("  The program should be run in the batch stream.\n");
#endif
    printf("\n   PARAMETERS\n   ~~~~~~~~~~\n");
    printf("   motif       eg. AKILK                        \
Mandatory parameter\n");
    printf("   minimum amd maximum gap values eg 5,9        \
Mandatory parameter\n\n");
    printf("   SEQ=        The sequence searched through.   \
Mandatory parameter\n");
    printf("   OUT=        The results file.                \
DEFAULT: %s\n",RESFILE);
    printf("   ERROR=      File to receive run time errors. \
DEFAULT: %s",ERRLOG);
    printf("\n\n A minimum of two query motifs, a maximum and\
 minimum gap value and");
    printf(" a data \nbase file name must be given.\n");
    printf(" eg. PATTERN MNEST 5,7 LLKI SEQ=DATA SCANFILE\n");
    exit(0);
}
