From news@ulrik.uio.no Wed Feb  9 04:51:19 1994
Received: from net.bio.net by sunflower.bio.indiana.edu
	(4.1/9.7jsm) id AA17015; Wed, 9 Feb 94 04:51:13 EST
Received: from nac.no by net.bio.net (8.6.5/IG-2.0) with SMTP 
	id BAA24220; Wed, 9 Feb 1994 01:46:36 -0800
Received: from pat.uio.no by nac.no with SMTP (PP) id <23776-0@nac.no>;
          Wed, 9 Feb 1994 10:44:12 +0100
Received: from ulrik.uio.no by pat.uio.no with local-SMTP (PP) 
          id <22498-0@pat.uio.no>; Wed, 9 Feb 1994 10:43:51 +0100
Received: by hermod.uio.no ; Wed, 9 Feb 1994 10:43:49 +0100
To: bionet-software-sources@nac.no
Path: biomaster.uio.no!rodrigol
From: rodrigo.lopez@biotek.uio.no (Rodrigo Lopez)
Newsgroups: bionet.software.gcg,bionet.software.sources
Subject: fastacheck.f
Date: 9 Feb 1994 09:43:48 GMT
Organization: The Norwegian EMBnet node
Lines: 259
Distribution: world
Message-Id: <2jab8k$3rn@hermod.uio.no>
Nntp-Posting-Host: biomaster.uio.no
Status: R


	Program FastaCheck

	Implicit None

	Integer HITLISTSIZE
	Parameter (HITLISTSIZE = 1000)

	Character HitList(512,HITLISTSIZE)
	Character DocLines(512,HITLISTSIZE)
	Character OutLine(512)

	Integer InFile, OutFile, ILen
	Integer IHits, NHits
	Integer IDoc, NDoc
	Character InFName(256), OutFName(256)
	Character Line(512)
	Integer NSeq, NRes, IWord
	Character DbName(256), SeqName(256)
	Real meann, mean1, sdn, sd1
	Integer init1, initn, opt
	Real SDs
	Real hitpct
	Integer HitRes
	Real ScorePct
	Logical Check1, Checkn
	Real MinSd1/3./, MinSdn/3./
	Logical Accepted /.false./
	Integer i
	Integer AcceptCnt

	Logical CLNoInteract, CLGetOldFName, CLGetNewFName
	Logical StrMatch, CLGetReal
	Integer ReadString, CIStrFind, GetString
	Logical AcceptScore, CheckSd

	Call Doc ('eGenRunDoc:fastacheck.txt')
	Call CLComCheck ('eGenRunDoc:fastacheck.cmd')

	Check1 = CLGetReal('MINSD1',0., 99., MinSd1)
	Checkn = CLGetReal('MINSDN',0., 99., MinSdn)

	If (.not.CLGetOldFName('INfile1', 1, InFName) .and.
     &	      .not.CLNoInteract()                         ) Then
	  Call WriteF ('\n FASTACHECK of what FASTA output file ?  ')
	  If (GetString(InFName).eq.0) Stop ' '
	End If
	Call OpenF (InFile, InFName, 'r')

	Call StrCopy (OutFName, InFName)
	Call BaseName (OutFName)
	Call NewFileType (OutFName, '.check')
	If (.not.CLGetNewFName('OUTfile1', 2, OutFName) .and.
     &	      .not.CLNoInteract()                         ) Then
	  Call WriteF ('\n What should I call the output file'//
     &		  ' (* %s *) ?  ', OutFName)
	  Call GetString(OutFName)
	End If
	Call OpenF (OutFile, OutFName, 'w')

	Call ReadString(InFile, Line, ILen)
	If (Ilen .lt. 0) stop ' '

!* read file heading

	Do While (CIStrFind('(Peptide) ', Line) .eq. 0)
	  Call ReadString(InFile, Line, ILen)
	  If (Ilen .lt. 0) stop ' '
	End Do
	Call SReadF (Line, 'of: %s', SeqName)

!* copy to summary line

	Do While (CIStrFind('Word Size', Line) .eq. 0)
	  Call FWriteF (OutFile, '%s\n', Line)
	  Call ReadString(InFile, Line, ILen)
	  If (Ilen .lt. 0) stop ' '
	End Do

	Call SReadF (Line, 'TO: %s Sequences: %d '//
     &		  'Symbols: %d Word Size: %d',
     &		  DbName, NSeq, NRes, IWord)

!* get mean scores and standard deviations

	Do While (CIStrFind('mean init', Line) .eq. 0)
	  Call ReadString(InFile, Line, ILen)
	  If (Ilen .lt. 0) stop ' '
	End Do

	Call SReadF (Line, 'score: %f (%f)', meann, sdn)
	Call ReadString(InFile, Line, ILen)
	If (Ilen .lt. 0) stop ' '
	Call SReadF (Line, 'score: %f (%f)', mean1, sd1)

!* read one-line descriptions from best scores list

	Do While (CIStrFind('The best scores are', Line) .eq. 0)
	  Call ReadString(InFile, Line, ILen)
	  If (Ilen .lt. 0) stop ' '
	End Do

	Call ReadString(InFile, Line, ILen)
	If (Ilen .lt. 0) stop ' '

	Do While (ReadString(InFile, Line, ILen) .gt. 0)
	  NHits = NHits +  1
	  Call StrCopy (HitList(1,NHits), Line)
	End Do

	IHits = 0
	Do While (ReadString(InFile, Line, ILen) .ge. 0)
	  Do While (.not.StrMatch(SeqName, Line))
	    If (CIStrFind('CPU time:',Line) .ne. 0) GoTo 999
	    If (Accepted) Call FWriteF (OutFile, '%s\n', Line)
	    Call ReadString(InFile, Line, ILen)
	    If (Ilen .lt. 0) stop ' '
	  End Do
	  NDoc = 0
	  IHits = IHits + 1

	  Call ReadString(InFile, Line, ILen)
	  If (Ilen .lt. 0) stop ' '

	  Do While (CIStrFind('SCORES  ', Line) .eq. 0)
	    NDoc = NDoc + 1
	    Call StrCopy (DocLines(1,NDoc),Line)
	    Call ReadString(InFile, Line, ILen)
	    If (Ilen .lt. 0) stop ' '
	  End Do

!* read scoring details

	  Call SReadF (Line, 'Init1: %d Initn: %d Opt: %d',
     &		  init1, initn, opt)
	  Call ReadString(InFile, Line, ILen)
	  If (Ilen .lt. 0) stop ' '
	  Call SReadF (Line, '%f identity in %d aa overlap',
     &		  hitpct, hitres)

!* check for acceptability

	  OutLine(1) = Char(0)
	  Accepted = .false.
	  If (AcceptScore (HitPct, HitRes, ScorePct)) Then
	    Call SWriteF (OutLine,'Minimum identity: %.1f%%',
     &		  ScorePct)
	    Accepted = .true.
	  End If
	  If (Check1 .and. CheckSD(mean1, sd1, init1,
     &		  minsd1, sds)) Then
	    If (Accepted) Call SWriteF (OutLine, '~, ')
	    Call SWriteF (OutLine, '~Init1 SDs: +%.1f', sds)
	    Accepted = .true.
	  End If
	  If (Checkn .and. CheckSD(meann, sdn, initn,
     &		  minsdn, sds)) Then
	    If (Accepted) Call SWriteF (OutLine, '~, ')
	    Call SWriteF (OutLine, '~Initn SDs: +%.1f', sds)
	    Accepted = .true.
	  End If
	  If (Accepted) Then
	    Call FWriteF (OutFile, '(%d) %s\n',
     &		      IHits, HitList(1,IHits))
	    i = 3
	    Do While (i .le. NDoc)
	      Call FWriteF (OutFile, '%s\n', DocLines(1,i))
	      i = i + 1
	    End Do
	    Call FWriteF (OutFile, '\nSCORES     Init1: %d '//
     &		  'Initn: %d Opt: %d\n', Init1, Initn, Opt)
	    Call FWriteF (OutFile,   '           %.2f%% identity '//
     &		  'in %d aa overlap\n', HitPct, HitRes)
	    Call FWriteF (OutFile,   '           %s\n\n', OutLine)
	    AcceptCnt = AcceptCnt + 1
	  End If
	End Do

  999	Call CloseF (InFile)
	Call CloseF (OutFile)

	Call WriteF ('\n\n Hits checked: %d\n     Accepted: %d\n'//
     &		  '     Rejected: %d\n\n',
     &		  NHits, AcceptCnt, NHits-AcceptCnt)

	Call Account ('FastaCheck')

	Stop ' '
	End

!***** AcceptScore ************************************************************
!*
!******************************************************************************

	Logical Function AcceptScore (PctId, Length, ScorePct)

	Implicit None

	Real PctId
	Integer Length
	Real ScorePct
	Real Factor /290.15/
	Real Exponent /-0.56158/
	Integer MinLen /10/
	Integer MaxLen /80/
	Real MaxPct /25.0/
	Logical FirstCall /.true./

	If (FirstCall) Then
	  Call CLGetReal('FACTor', -1E10, 1E10, Factor)
	  Call CLGetReal('EXPonent', -1E10, 1E10, Exponent)
	  Call CLGetInt('MINLen', 0, 1000, MinLen)
	  Call CLGetInt('MAXLen', MinLen, 10000, MaxLen)
	  MaxPct = Factor*(Float(MaxLen)**exponent)
	  Call CLGetReal('MAXPct', 0.0, 100.0, MaxPct)
	End If

	AcceptScore = .false.
	If (Length .lt. MinLen) Return
	If (Length .ge. MaxLen) Then
	  If (PctId .lt. MaxPct) Return
	  ScorePct = MaxPct
	  AcceptScore = .true.
	  Return
	End If

	ScorePct = Factor*(Float(Length)**exponent)
	If (PctId .lt. ScorePct) Return
	AcceptScore = .true.
	Return

	End

	Logical Function CheckSD (MeanScore, Sd, IScore, MinSd, Sds)

	Implicit None

	Real MeanScore, Sd
	Integer IScore
	Real MinSd, Sds

	Real Score

	Score = Float(IScore)

	CheckSD = .false.
	Sds = (Score-MeanScore)/Sd

	If (Sds .ge. MinSd) CheckSD = .true.
	Return

	End


-- 
***************************************************************************
* RODRIGO LOPEZ SERRANO  rodrigol@biomed.uio.no - rodrigol@biotek.uio.no  * 
* Norwegian EMBnet node  Tel:xx-47-22958756 Fax:xx-47-22694130            *
***************************************************************************   

