IUBio GIL .. BIOSCI/Bionet News .. Biosequences .. Software .. FTP

Phi-Psi angles from Brookhaven Protein Data (C code)

Rick MacDonald rbm8p at darwin.clas.Virginia.EDU
Tue Jan 26 12:48:53 EST 1993



George Johnson <geojohn at casbah.acns.nwu.edu> recently asked for a
routine to calculate phi and psi angles from Brookhaven (pdb) 
coordinates.   

The short C program that follows calculates phi-psi and alpha-carbon
bend and torsion angles for Brookhaven protein data files.  The
people who manage the data bank also have FORTRAN routines to do
this.

I have run it on SGI 4D and IBM RS6000's, but you may have to fiddle
with your local math library routines.

To Build: cc sifi.c -o sifi -lm  

Usage: sifi <pdbfile>

/*********************   CUT HERE      ********************************/

/***********************************************************************
This program was written by 

Rick MacDonald
University of Virginia Biophysics

rbm8p at virginia.edu

This program will print for each amino acid residue in a pdb file: 
    psi
    phi
    alpha carbon bend
    alpha carbon dihedral

To Build: cc sifi.c -o sifi -lm  

Usage: sifi <pdbfile>
***********************************************************************/
#include <stdio.h>
#include <math.h>

#define TRUE	1
#define FALSE	0

#define LINELEN 100

#define MAXATOMS 10000
#define MAXRES   1000

typedef struct {
	int	num;
	char	a_type[6];
	char	r_type[4];
	char	subunit;
	int	resnum;
	float	p[3];	    /* position x,y,z */
} ATOM;

typedef struct {
	int	nitrog;
	int	alphac;
	int	carbon;
	int	oxygen;
	char	*r_type;    /* 3 letter code */
	char	res_type;   /* 1 letter code */
	char	subunit;
	float	phi;
	float	psi;
	float	cab;
	float	cad;
	int 	resnum;
} AMINO;

ATOM	atom[MAXATOMS];
AMINO	res[MAXRES];

int 	err;


main(int argc, char **argv)
{
char	*strstr(), *strncpy(), *strcat();
char	*next_str(char *string);
float	angle(float *v1, float *v2);
float	dot(float *v1, float *v2);
float	dihedral(float *p1, float *p2, float *p3, float *p4);
void	cross(float *v1, float *v2, float *p);
float	len(float *v1);
int 	cmpstr(char *s1, char *s2);
void	read_atom_record(char *rec, ATOM *atom1);
char	single_letter_code(char *string); 

    register i;

    FILE    *Fpdb;

    char    tname[80], fname[80], line[LINELEN], *temp, WAS, WUZ, WLB;

    float  v1[3], v2[3];
    int	    ndx, n_atoms, n_res, rno, is, was;
    
/*-----------------------------------------------------------------------
Open file specified by command line, if it exists 
-----------------------------------------------------------------------*/
    if(argc<2) err = printf("Usage: sifi <pdbfile>\n");

    if(argc!=2) exit(1);

    temp = strncpy(fname,argv[argc-1],sizeof(tname));
    if ((Fpdb = fopen(fname,"r"))==NULL)
    	err = printf("Could not open file %s\n",fname);

/*-----------------------------------------------------------------------
Read through the PDB format file to extract ATOM records
-----------------------------------------------------------------------*/

    ndx = 0;
    while (fgets(line,LINELEN,Fpdb)!=0) {
    	if (strstr(line,"ATOM  ")!=NULL) {
    	    read_atom_record(line,&atom[ndx]);
    	    ndx++;
    	}
    }
    n_atoms = ndx;

/*-----------------------------------------------------------------------
Now we've got the ATOMs. Re-order them into residues. 
-----------------------------------------------------------------------*/
    rno = 0;
    was = atom[0].resnum;
    for (ndx = 0; ndx < n_atoms; ndx++) {
	is = atom[ndx].resnum;
    	if (is != was) rno++;
	if (cmpstr(atom[ndx].a_type,"CA")!=NULL) {
		res[rno].resnum = atom[ndx].resnum;
		res[rno].alphac = ndx;
		res[rno].r_type = atom[ndx].r_type;
		res[rno].subunit= atom[ndx].subunit;
	}
	else if (cmpstr(atom[ndx].a_type,"N")!=NULL)
		res[rno].nitrog = ndx;
	else if (cmpstr(atom[ndx].a_type,"C")!=NULL)
		res[rno].carbon = ndx;
	else if (cmpstr(atom[ndx].a_type,"O")!=NULL)
		res[rno].oxygen = ndx;
	was = is;
    }
    
    n_res = rno; 

/*-----------------------------------------------------------------------
Find for each residue:
    	single letter residue code
	phi angle
	psi angle
	cab angle = bend angle between adjacent alpha carbons 
	cad angle = is the dihedral() or torsion angle
-----------------------------------------------------------------------*/

    res[0].res_type = single_letter_code(res[0].r_type);
    res[0].psi	    = -(dihedral(atom[res[1].nitrog].p,
				    	 atom[res[1].alphac].p,
				    	 atom[res[1].carbon].p,
				    	 atom[res[2].nitrog].p));
    res[1].res_type = single_letter_code(res[1].r_type);
    res[1].phi	    = -(dihedral(atom[res[0].carbon].p,
				    	 atom[res[1].nitrog].p,
				    	 atom[res[1].alphac].p,
				    	 atom[res[1].carbon].p));
    res[1].psi	    = -(dihedral(atom[res[1].nitrog].p,
				    	 atom[res[1].alphac].p,
				    	 atom[res[1].carbon].p,
				    	 atom[res[2].nitrog].p));

    for (rno = 2; rno <= n_res; rno++) {
	res[rno].res_type   = single_letter_code(res[rno].r_type);
	
	res[rno].phi	    = -(dihedral(atom[res[rno-1].carbon].p,
				    	 atom[res[rno  ].nitrog].p,
				    	 atom[res[rno  ].alphac].p,
				    	 atom[res[rno  ].carbon].p));
					
	res[rno].psi	    = -(dihedral(atom[res[rno  ].nitrog].p,
				    	 atom[res[rno  ].alphac].p,
				    	 atom[res[rno  ].carbon].p,
				    	 atom[res[rno+1].nitrog].p));
					
    	res[rno].cad	    = dihedral( atom[res[rno-2].alphac].p,
    	    	    	    	    	atom[res[rno-1].alphac].p,
				    	atom[res[rno  ].alphac].p,
				    	atom[res[rno+1].alphac].p);
    	for (i=0;i<=2;i++) {
    	    v1[i] = atom[res[rno-1].alphac].p[i] - atom[res[rno].alphac].p[i];
    	    v2[i] = atom[res[rno+1].alphac].p[i] - atom[res[rno].alphac].p[i];
    	}    
    	res[rno].cab	    = angle(v1, v2);
    }
			

/*-----------------------------------------------------------------------
Print this stuff out as a table.
-----------------------------------------------------------------------*/
    	printf("%s\n", fname);
    	printf("   #     resid   phi  psi    Cab  Cat\n");

    	WAS = 'x';
	WUZ = 'x';
	WLB = res[0].subunit;
	res[n_res+1].subunit = 'x';
    	for (rno = 0; rno <= n_res; rno++) {
	
	    if (res[rno].subunit != WAS) {  	/* This gibberish flags the meaningless      */
		res[rno].phi = 999.;	    	/* values at the ends of each subunit as 999 */
	    }
	    if (res[rno].subunit != WUZ) {
		res[rno].cab = 999.;
		res[rno].cad = 999.;
	    }
	    WUZ = WAS;
	    WAS = res[rno].subunit;
	    WLB = res[rno+1].subunit;
	    if (res[rno].subunit != WLB) {
		res[rno].psi = 999.;
		res[rno].cab = 999.;
		res[rno].cad = 999.;
	    }
	    
	
	    printf("%4d %c   %3s %c ", 
	    	res[rno].resnum, res[rno].subunit, res[rno].r_type, res[rno].res_type);
	    printf("%5.0f", res[rno].phi);
	    printf("%5.0f", res[rno].psi);
	    printf("  %5.0f", res[rno].cab);
	    printf("%5.0f", res[rno].cad);
	    printf("\n");
    	}

} /* end main */

/***********************************************************************

***********************************************************************/
void read_atom_record(char *rec, ATOM *atom1) 
{
	char	*next_str(char *string);
	char	*ptr;

	float	fl;

	ptr = rec;
	
	err = sscanf( (ptr = next_str(ptr)), "%d",  &atom1->num    );
	err = sscanf( (ptr = next_str(ptr)), "%3s",  atom1->a_type );
	err = sscanf( (ptr = next_str(ptr)), "%3s",   atom1->r_type );
	ptr = ptr + 4;
	atom1->subunit = *ptr;
	err = sscanf( (ptr = next_str(ptr)), "%d",  &atom1->resnum );


/*  Remove this comment to print atom records as read from pdb for debug
printf("\n%5d %4s %s %c %d ", atom1->num, atom1->a_type, atom1->r_type, atom1->subunit, atom1->resnum);
*/


/* This is a silly patch --- but I can't make %Lf read a float on the SGI! */
	err = sscanf( (ptr = next_str(ptr)), "%f", &fl   );
		atom1->p[0] = fl;
	err = sscanf( (ptr = next_str(ptr)), "%f", &fl   );
		atom1->p[1] = fl;
	err = sscanf( (ptr = next_str(ptr)), "%f", &fl   );
		atom1->p[2] = fl;

    	return;
}

/***********************************************************************

***********************************************************************/
char single_letter_code(char *string) 
{
    	     if (cmpstr(string,"CYS")!=NULL) return 'C';
	else if (cmpstr(string,"HIS")!=NULL) return 'H';
	else if (cmpstr(string,"ILE")!=NULL) return 'I';
	else if (cmpstr(string,"MET")!=NULL) return 'M';
	else if (cmpstr(string,"SER")!=NULL) return 'S';
	else if (cmpstr(string,"VAL")!=NULL) return 'V';
	else if (cmpstr(string,"ALA")!=NULL) return 'A';
	else if (cmpstr(string,"GLY")!=NULL) return 'G';
	else if (cmpstr(string,"LEU")!=NULL) return 'L';
	else if (cmpstr(string,"PRO")!=NULL) return 'P';
	else if (cmpstr(string,"THR")!=NULL) return 'T';
	else if (cmpstr(string,"PHE")!=NULL) return 'F';
	else if (cmpstr(string,"ARG")!=NULL) return 'R';
	else if (cmpstr(string,"TYR")!=NULL) return 'Y';
	else if (cmpstr(string,"TRP")!=NULL) return 'W';
	else if (cmpstr(string,"ASN")!=NULL) return 'N';
	else if (cmpstr(string,"GLU")!=NULL) return 'E';
	else if (cmpstr(string,"GLN")!=NULL) return 'Q';
	else if (cmpstr(string,"LYS")!=NULL) return 'K';
	else if (cmpstr(string,"ASP")!=NULL) return 'D';
    return '*';
}

/***********************************************************************
 cmpstr does a simple compare of a string s1 with string s2 and returns a  
 pointer to the beginning of s1 if same or NULL if not same
***********************************************************************/
int cmpstr(char *s1, char *s2)
{
	loop:  if(*s1 != *s2) return(0);
	if(*s1 == '\0') return(1);
	s1++;   s2++;
	goto loop;
}

/***********************************************************************

***********************************************************************/
char *next_str(char *string)
{
	char	c;
	while (((c = *string) != ' ') && (c != '\t')) string++;
	while (((c = *string) == ' ') || (c



More information about the Bio-soft mailing list

Send comments to us at archive@iubioarchive.bio.net