From MATHOG@SEQVAX.CALTECH.EDU Mon May 24 11:29:03 1993
Received: from SEQVAX.CALTECH.EDU (seqvax.bio.caltech.edu) by sunflower.bio.indiana.edu
	(4.1/9.7jsm) id AA19332; Mon, 24 May 93 11:29:02 EST
Date: Mon, 24 May 1993 9:28:23 -0700 (PDT)
From: David Mathog <MATHOG@SEQVAX.CALTECH.EDU>
To: Archive@Bio.Indiana.Edu
Message-Id: <930524092823.20a030cb@SEQVAX.CALTECH.EDU>
Subject: TFDTOGCG
Status: R

This is a program submission to your software Archive.

TFDTOGCG is a small Fortran program that converts TFD site.dat files to GCG
format.  The resulting files are, as near as I can tell, in exactly the same
format as those provided by GCG.  There is no readme, but the comments at the
front of the program could be chopped out and made into one.  The program has
only been tested under VMS.

The same program was posted to bionet.software about a month ago.

Thanks,

David Mathog
mathog@seqvax.bio.caltech.edu
Manager, sequence analysis facility, biology division, Caltech

C	TFDTOGCG.FOR
C	9-APR-1993 David Mathog, Division of Biology, Caltech
C	mathog@seqvax.bio.caltech.edu
C
C	This little program takes one of David Ghosh's site.dat
C	files and reformats it for GCG usage.
C
C	Put the output file into the GCG system as: GENMOREDATA:TFDSITES.DAT
C
C	FTP to NCBI.NLM.NIH.GOV and look in repository/TFD/tfd.ascii
C	for the site.dat file.
C
C	This works with GCG 7.2 and VMS 5.5-2 and *may* work on other 
C	systems (not tested).
C
C	Instructions for building an executable:
C
C	   $ for/nolis  tfdtogcg
C	   $ link/nomap tfdtogcg
C
C	Example session (program's prompts not shown):
C
C	   $ run tfdtogcg
C          site.dat
C	   temporary.out
C	   9-APR-1993, Converted TFD X.Y to GCG format
C
C	   $ copy temporary.out genmoredata:tfsites.dat
C	   $ set file/prot=w:re genmoredata:tfsites.dat
C	   $ delete temporary.out.
C	   $ purge genmoredata:tfsites.dat
C
C	There is practically NO error checking, so watch out!
C
C
	implicit none
	character*2048 inline,outline,infile,outfile
	integer*4 inlen,istat
	integer*4 recsize
	logical   ok
c
c  Ghosh lays out site records like this
c
	structure /ghosh_record/
	  character SITE_ID*6
	  character FAC_NAME*25
	  character SEQ_NAME*30
	  character NA_SEQ*45
	  character SEQ_TYPE*1
	  character SYSTEM*10
	  character GENOME*1
	  character TRN_UNIT*20
	  character COMMENTS*80
	  character MAIN_REF*60
	  character FAC_SOURCE*16
	  character LOCAT_REF*20
	  character LOCATION*20
	  character METHOD*11
	  character N_PROB*8
	  character REF_N*8
	  character STRAND*1
	  character BINDING*1
	end structure
c
c  GCG lays out TFSITE.DAT records like this
c
	structure /GCG_RECORD/
	  character SEQ_NAME*31
	  character SPACER1*2
	  character NA_SEQ*45
	  character SPACER2*5
	  character FAC_NAME*25
	  character SPACER3*1
	  character MAIN_REF*60
	end structure
c
	record /ghosh_record/ ghosh
	record /gcg_record/   gcg
c
c	Init the spacers for the GCG record
c
	gcg.spacer1 = '0 '
	gcg.spacer2 = ' 0 ! '
	gcg.spacer3 = ' '
c
	write(6,*)'TFDtoGCG'
	write(6,*)'This program converts one of David Ghosh''s site'
	write(6,*)'  files to GCG''s format'
c
	write(6,*)'Input the name of the file to process'
	read(5,'(q,a)')inlen,infile(1:inlen)
c
	open(unit=10,file=infile(1:inlen)
	1 ,form='UNFORMATTED',organization='SEQUENTIAL',status='OLD'
	1 ,recordtype='VARIABLE', READONLY)
c
	write(6,*)'Input the name of the output file'
	read(5,'(q,a)')inlen,outfile(1:inlen)
c
	open(unit=11,file=outfile(1:inlen)
	1 ,form='UNFORMATTED',status='NEW',organization='SEQUENTIAL'
	2 ,recordtype='STREAM_LF',recl = 255)
c
c	get the comments
c
	write(6,*)'Enter as many lines of comments as you would like'
	write(6,*)'  End each line with a <return>'
	write(6,*)'  End the last line with <return><return>'
	ok = .true.
	do while (ok)
	   read(5,1000)inlen,inline(1:inlen)
	   if(inlen.eq.0)then
	       ok = .false.
	   else
	       write(11)' '//inline(1:inlen)
	   end if
	end do
	write(6,*)'Working ...'
c
c	write a title line, this one is *easy*
c
	GCG.SEQ_NAME = 'NAME'
	GCG.FAC_NAME = 'FACTOR'
	GCG.NA_SEQ   = 'SEQUENCE'
	GCG.MAIN_REF = 'REFERENCE'
	write(11)gcg
c
c	Now write the divider
c
	write(11)'..'	
c
	istat=0
	do while(istat.ge.0)
	    read(10,iostat=istat)ghosh
1000	    format(q,a)
	    if(istat.ge.0)then
	       GCG.SEQ_NAME = GHOSH.SEQ_NAME//' '
	       GCG.FAC_NAME = GHOSH.FAC_NAME
	       GCG.NA_SEQ   = GHOSH.NA_SEQ
	       GCG.MAIN_REF = GHOSH.MAIN_REF
	       call fixseqname(GCG.SEQ_NAME,GHOSH.N_PROB)
	       write(11)gcg
	    end if
	end do
1100	format(a)
	close(unit=10)
	close(unit=11)
	stop 'TFDtoGCG: normal completion' 
	end

	subroutine fixseqname(NAME,N_PROB)
	character name*(*)
	character N_prob*(*)
	integer i,last,inlen,nlen
	real limit,value
	parameter (limit = 5.0e-4)
c
c	Do the length this way so that it will still work if the
c	length of NAME, N_PROB change.
c
	inlen=len(NAME)
	nlen =len(N_PROB)
c
c	first put in a ";", if needed to indicate a frequent motif
c
	read(N_prob,1000)value
1000	format(F<NLEN>.2)
	if(value.gt.limit)NAME = ';'//NAME(1:inlen-1)
c
c	now convert any internal spaces to underscores
c	if it doesn't find *any* nonspaces, the name becomes "UNKNOWN"
c	This is done in two passes.
c
	do i = 1, inlen
	  if(name(i:i).eq.' ')name(i:i)='_'
	end do
c
	last=inlen+1
	i = inlen
	do while(last.eq.inlen+1 .and. i.gt.0)
	  if(name(i:i).ne.'_')then
	     last = i
	  else
	     name(i:i) = ' '
	     i = i-1
	  end if
	end do
c
	if(last.eq.inlen+1)name='UNKNOWN'
c
	return
	end


