Context Navigation

source: branches/port5/PERL_SCRIPTS/ARBTOOLS/IFTHELP/format_dssp.pl

Visit:

Last change on this file was 5858, checked in by westram, 16 years ago
fixed errors and warnings in perl scripts
Property svn:eol-style set to `native` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 3.1 KB

Line
1	#!/usr/bin/perl
2
3	use warnings;
4	use strict;
5
6	my $line = undef;
7	my $titleline = undef;
8	my $pdb_id = undef;
9	my $header = undef;
10	my $compnd = undef;
11	my $source = undef;
12	my $date = undef;
13	my $author = undef;
14	my $reference = undef;
15	my @chains = (' ');
16	my @sequences = ('');
17	my @secstructs = ('');
18	my $seqnum = 0;
19	my $mode = 0;
20
21	foreach $line (<STDIN>) {
22	chomp($line);
23	if ($mode==0) {
24	if ($line =~ /^(==== Secondary Structure Definition.+)\s+\.$/) {
25	$titleline = $1;
26	}
27	elsif ($line =~ /^REFERENCE\s+(.+);*\s+\.$/) {
28	$reference = $1;
29	$reference =~ s/(;\s$)//; # remove semicolon and whitespace at the end (if present)
30	}
31	elsif ($line =~ /^HEADER\s+(.+)\s+(\d\d-\w\w\w-\d\d)\s+(\w{4})\s+\.$/) {
32	$header = $1;
33	$date = $2;
34	$pdb_id = $3;
35	$header =~ s/(;\s$)//; # remove semicolon and whitespace at the end (if present)
36	}
37	elsif ($line =~ /^COMPND\s+\d?\s+(.+);*\s+\.$/) {
38	$compnd = $1;
39	$compnd =~ s/(;\s$)//; # remove semicolon and whitespace at the end (if present)
40	}
41	elsif ($line =~ /^SOURCE\s+\d?\s+(.+);*\s+\.$/) {
42	$source = $1;
43	$source =~ s/(;\s$)//; # remove semicolon and whitespace at the end (if present)
44	}
45	elsif ($line =~ /^AUTHOR\s+(.+);*\s+\.$/) {
46	$author = $1;
47	$author =~ s/(;\s$)//; # remove semicolon and whitespace at the end (if present)
48	$mode++;
49	}
50	}
51	elsif ($mode==1) {
52	if ($line =~ /RESIDUE AA/) {
53	$mode++;
54	}
55	}
56	elsif ($mode==2) {
57	if ($line =~ /^.{11}(.)\s([A-Z!])..([A-Z\s])/io) {
58	if ($2 eq '!') { # chain break encountered (-> start new protein sequence)
59	$seqnum++;
60	$sequences[$seqnum] = '';
61	$secstructs[$seqnum] = '';
62	$chains[$seqnum] = ' ';
63	}
64	else { # append protein sequence and secondary structure
65	$sequences[$seqnum] .= $2;
66	$secstructs[$seqnum] .= $3 eq ' ' ? '-' : $3;
67	if ($1 ne $chains[$seqnum]) {
68	$chains[$seqnum] = $1;
69	}
70	}
71	}
72	else {
73	die "Can't parse '$line'";
74	}
75	}
76	}
77
78	if (not defined $titleline) { die "Could not find title line"; }
79	if (not defined $pdb_id) { die "Could not extract PDB_ID entry from HEADER"; }
80	if (not defined $header) { die "Could not find HEADER entry"; }
81	if (not defined $compnd) { die "Could not find COMPND entry"; }
82	if (not defined $source) { die "Could not find SOURCE entry"; }
83	if (not defined $date) { die "Could not extract DATE entry from HEADER"; }
84	if (not defined $author) { die "Could not find AUTHOR entry"; }
85	if (not defined $reference) { die "Could not find REFERENCE entry"; }
86
87	if ($mode!=2) { die "Unknown parse error"; }
88
89	for (my $i = 0; $i <= $seqnum; $i++) {
90	print "$titleline\n";
91	print "REFERENCE $reference\n";
92	if ($chains[$i] ne ' ') {
93	print "PDB_ID $pdb_id\_$chains[$i]\n";
94	} else {
95	print "PDB_ID $pdb_id\n";
96	}
97	print "DATE $date\n";
98	print "HEADER $header\n";
99	print "COMPND $compnd\n";
100	print "SOURCE $source\n";
101	print "AUTHOR $author\n";
102	print "SECSTRUCT $secstructs[$i]\n";
103	print "SEQUENCE\n$sequences[$i]\n";
104	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: