Context Navigation

source: tags/arb_5.2/EDIT4/ED4_protein_2nd_structure.cxx

Visit:

Last change on this file was 6100, checked in by westram, 16 years ago
fix warning "format not a string literal and no format arguments" GB_export_error → GB_export_error/GB_export_errorf
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 53.2 KB

Line
1	/** \file ED4_protein_2nd_structure.cxx
2	* \brief Implements the functions defined in ed4_protein_2nd_structure.hxx.
3	* \author Markus Urban
4	* \date 2008-02-08
5	* \sa Refer to ed4_protein_2nd_structure.hxx for details, please.
6	*/
7
8	#include <iostream>
9	#if !defined(DARWIN)
10	#include <malloc.h>
11	#endif // DARWIN
12
13	#include "arbdb.h"
14	#include "arbdbt.h"
15	#include "ed4_class.hxx"
16	#include "ed4_awars.hxx"
17	#include "ed4_protein_2nd_structure.hxx"
18
19	#include <inline.h>
20
21	#ifndef ARB_ASSERT_H
22	#include <arb_assert.h>
23	#endif
24	#define e4_assert(bed) arb_assert(bed)
25
26	using namespace std;
27
28	// --------------------------------------------------------------------------------
29	// exported data
30
31	/// Awars for the match type; binds the #PFOLD_MATCH_TYPE to the corresponding awar name.
32	name_value_pair pfold_match_type_awars[] = {
33	{ "Perfect_match", STRUCT_PERFECT_MATCH },
34	{ "Good_match", STRUCT_GOOD_MATCH },
35	{ "Medium_match", STRUCT_MEDIUM_MATCH },
36	{ "Bad_match", STRUCT_BAD_MATCH },
37	{ "No_match", STRUCT_NO_MATCH },
38	{ "Unknown_match", STRUCT_UNKNOWN },
39	{ 0, PFOLD_MATCH_TYPE_COUNT }
40	};
41
42	/// Symbols for the match quality (defined by #PFOLD_MATCH_TYPE) as used for match methods #SECSTRUCT_SECSTRUCT and #SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match().
43	char *pfold_pair_chars[6] = {
44	strdup(" "), // STRUCT_PERFECT_MATCH
45	strdup("-"), // STRUCT_GOOD_MATCH
46	strdup("~"), // STRUCT_MEDIUM_MATCH
47	strdup("+"), // STRUCT_BAD_MATCH
48	strdup("#"), // STRUCT_NO_MATCH
49	strdup("?") // STRUCT_UNKNOWN
50	};
51
52	/// Match pair definition (see #PFOLD_MATCH_TYPE) as used for match methods #SECSTRUCT_SECSTRUCT and #SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match().
53	char *pfold_pairs[6] = {
54	strdup("HH GG II TT EE BB SS -- -. .."), // STRUCT_PERFECT_MATCH
55	strdup("HG HI HS EB ES TS H- G- I- T- E- B- S-"), // STRUCT_GOOD_MATCH
56	strdup("HT GT IT"), // STRUCT_MEDIUM_MATCH
57	strdup("ET BT"), // STRUCT_BAD_MATCH
58	strdup("EH BH EG EI"), // STRUCT_NO_MATCH
59	strdup("") // STRUCT_UNKNOWN
60	};
61
62	// --------------------------------------------------------------------------------
63
64	/** \brief Specifies the characters used for amino acid one letter code.
65	*
66	* These are the characters that represent amino acids in one letter code.
67	* The order is important as the array initializes #char2AA which is used to
68	* access array elements in the tables #cf_parameters and #cf_parameters_norm.
69	*/
70	static const char *amino_acids = "ARDNCEQGHILKMFPSTWYV";
71
72	/** \brief Maps character to amino acid one letter code.
73	*
74	* This array maps a character to an integer value. It is initialized with the
75	* function ED4_pfold_init_statics() which creates an array of the size 256
76	* (for ISO/IEC 8859-1 character encoding). Characters that represent an amino
77	* acid get values from 0 to 19 (according to their position in #amino_acids)
78	* and all others get the value -1. That way, it can be used to get parameters
79	* from the tables #cf_parameters and #cf_parameters_norm or to check if a
80	* certain character represents an amino acid.
81	*/
82	static int *char2AA = 0;
83
84	/** \brief Characters representing protein secondary structure.
85	*
86	* Defines the characters representing secondary structure as output by the
87	* function ED4_pfold_predict_structure(). According to common standards,
88	* these are: <BR>
89	* H = alpha-helix, <BR>
90	* E = beta-sheet, <BR>
91	* T = beta-turn.
92	*/
93	static char structure_chars[3] = {'H', 'E', 'T'};
94
95	/// Amino acids that break a certain structure (#ALPHA_HELIX or #BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites().
96	static char *structure_breaker[2] = {
97	strdup("NYPG"),
98	strdup("PDESGK")
99	};
100
101	/// Amino acids that are indifferent for a certain structure (#ALPHA_HELIX or #BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites().
102	static char *structure_indifferent[2] = {
103	strdup("RTSC"),
104	strdup("RNHA")
105	};
106
107	/// Awars for the match method; binds the #PFOLD_MATCH_METHOD to the corresponding name that is used to create the menu in ED4_pfold_create_props_window().
108	static name_value_pair pfold_match_method_awars[4] = {
109	{ "Secondary Structure <-> Secondary Structure", SECSTRUCT_SECSTRUCT },
110	{ "Secondary Structure <-> Sequence", SECSTRUCT_SEQUENCE },
111	{ "Secondary Structure <-> Sequence (Full Prediction)", SECSTRUCT_SEQUENCE_PREDICT },
112	{ 0, PFOLD_MATCH_METHOD_COUNT }
113	};
114
115	static double max_former_value[3] = { 1.42, 1.62, 156 }; ///< Maximum former value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (in #cf_parameters_norm).
116	static double min_former_value[3] = { 0.0, 0.0, 47 }; ///< Minimum former value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (in #cf_parameters_norm).
117	static double max_breaker_value[3] = { 1.21, 2.03, 0.0 }; ///< Maximum breaker value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (no breaker values => 0).
118
119	// --------------------------------------------------------------------------------
120
121	//TODO: is there a way to prevent doxygen from stripping the comments from the table?
122	// I simply added the parameter table as verbatim environment to show the comments in
123	// the documentation.
124	/** \brief Former and breaker values for alpha-helices and beta-sheets (= strands).
125	*
126	* \hideinitializer
127	* \par Initial value:
128	* \verbatim
129	{
130	// Helix Former Strand Former Helix Breaker Strand Breaker Amino
131	// Value Value Value Value Acid
132	// -----------------------------------------------------------------------
133	{ 1.34, 0.00, 0.00, 0.00 }, // A
134	{ 0.00, 0.00, 0.00, 0.00 }, // R
135	{ 0.50, 0.00, 0.00, 1.39 }, // D
136	{ 0.00, 0.00, 1.03, 0.00 }, // N
137	{ 0.00, 1.13, 0.00, 0.00 }, // C
138	{ 1.42, 0.00, 0.00, 2.03 }, // E
139	{ 1.05, 1.05, 0.00, 0.00 }, // Q
140	{ 0.00, 0.00, 1.21, 1.00 }, // G
141	{ 0.50, 0.00, 0.00, 0.00 }, // H
142	{ 1.02, 1.52, 0.00, 0.00 }, // I
143	{ 1.14, 1.24, 0.00, 0.00 }, // L
144	{ 1.09, 0.00, 0.00, 1.01 }, // K
145	{ 1.37, 1.00, 0.00, 0.00 }, // M
146	{ 1.07, 1.31, 0.00, 0.00 }, // F
147	{ 0.00, 0.00, 1.21, 1.36 }, // P
148	{ 0.00, 0.00, 0.00, 1.00 }, // S
149	{ 0.00, 1.13, 0.00, 0.00 }, // T
150	{ 1.02, 1.30, 0.00, 0.00 }, // W
151	{ 0.00, 1.40, 1.00, 0.00 }, // Y
152	{ 1.00, 1.62, 0.00, 0.00 }}; // V
153	\endverbatim
154	*
155	* The former and breaker values are used to find alpha-helix and beta-sheet
156	* nucleation sites in ED4_pfold_find_nucleation_sites() and to resolve overlaps
157	* in ED4_pfold_resolve_overlaps(). Addressing the array with the enums
158	* #ALPHA_HELIX or #BETA_SHEET as second index gives the former values and
159	* addressing it with #ALPHA_HELIX+2 or #BETA_SHEET+2 gives the breaker values.
160	* The first index is for the amino acid. Use #char2AA to convert an amino acid
161	* character to the corresponding index.
162	*
163	* \sa Refer to the definition in the source code for commented table.
164	*/
165	static double cf_parameters[20][4] = {
166	/*Helix Former Strand Former Helix Breaker Strand Breaker Amino
167	Value Value Value Value Acid
168	-----------------------------------------------------------------------*/
169	{ 1.34, 0.00, 0.00, 0.00 }, // A
170	{ 0.00, 0.00, 0.00, 0.00 }, // R
171	{ 0.50, 0.00, 0.00, 1.39 }, // D
172	{ 0.00, 0.00, 1.03, 0.00 }, // N
173	{ 0.00, 1.13, 0.00, 0.00 }, // C
174	{ 1.42, 0.00, 0.00, 2.03 }, // E
175	{ 1.05, 1.05, 0.00, 0.00 }, // Q
176	{ 0.00, 0.00, 1.21, 1.00 }, // G
177	{ 0.50, 0.00, 0.00, 0.00 }, // H
178	{ 1.02, 1.52, 0.00, 0.00 }, // I
179	{ 1.14, 1.24, 0.00, 0.00 }, // L
180	{ 1.09, 0.00, 0.00, 1.01 }, // K
181	{ 1.37, 1.00, 0.00, 0.00 }, // M
182	{ 1.07, 1.31, 0.00, 0.00 }, // F
183	{ 0.00, 0.00, 1.21, 1.36 }, // P
184	{ 0.00, 0.00, 0.00, 1.00 }, // S
185	{ 0.00, 1.13, 0.00, 0.00 }, // T
186	{ 1.02, 1.30, 0.00, 0.00 }, // W
187	{ 0.00, 1.40, 1.00, 0.00 }, // Y
188	{ 1.00, 1.62, 0.00, 0.00 }}; // V
189
190	/** \brief Normalized former values for alpha-helices, beta-sheets (= strands)
191	* and beta-turns as well as beta-turn probabilities.
192	*
193	* \hideinitializer
194	* \par Initial value:
195	* \verbatim
196	{
197	// P(a) P(b) P(turn) f(i) f(i+1) f(i+2) f(i+3) Amino Acid
198	// --------------------------------------------------------------------
199	{ 142, 83, 66, 0.060, 0.076, 0.035, 0.058 }, // A
200	{ 98, 93, 95, 0.070, 0.106, 0.099, 0.085 }, // R
201	{ 101, 54, 146, 0.147, 0.110, 0.179, 0.081 }, // D
202	{ 67, 89, 156, 0.161, 0.083, 0.191, 0.091 }, // N
203	{ 70, 119, 119, 0.149, 0.050, 0.117, 0.128 }, // C
204	{ 151, 37, 74, 0.056, 0.060, 0.077, 0.064 }, // E
205	{ 111, 110, 98, 0.074, 0.098, 0.037, 0.098 }, // Q
206	{ 57, 75, 156, 0.102, 0.085, 0.190, 0.152 }, // G
207	{ 100, 87, 95, 0.140, 0.047, 0.093, 0.054 }, // H
208	{ 108, 160, 47, 0.043, 0.034, 0.013, 0.056 }, // I
209	{ 121, 130, 59, 0.061, 0.025, 0.036, 0.070 }, // L
210	{ 116, 74, 101, 0.055, 0.115, 0.072, 0.095 }, // K
211	{ 145, 105, 60, 0.068, 0.082, 0.014, 0.055 }, // M
212	{ 113, 138, 60, 0.059, 0.041, 0.065, 0.065 }, // F
213	{ 57, 55, 152, 0.102, 0.301, 0.034, 0.068 }, // P
214	{ 77, 75, 143, 0.120, 0.139, 0.125, 0.106 }, // S
215	{ 83, 119, 96, 0.086, 0.108, 0.065, 0.079 }, // T
216	{ 108, 137, 96, 0.077, 0.013, 0.064, 0.167 }, // W
217	{ 69, 147, 114, 0.082, 0.065, 0.114, 0.125 }, // Y
218	{ 106, 170, 50, 0.062, 0.048, 0.028, 0.053 }}; // V
219	\endverbatim
220	*
221	* The normalized former values are used to find beta-turns in an amino acid
222	* sequence in ED4_pfold_find_turns(). Addressing the array with the enums
223	* #ALPHA_HELIX, #BETA_SHEET or #BETA_TURN as second index gives the former
224	* values and addressing it with #BETA_TURN+i \f$(1 <= i <= 4)\f$ gives the
225	* turn probabilities. The first index is for the amino acid. Use #char2AA to
226	* convert an amino acid character to the corresponding index.
227	*
228	* \sa Refer to the definition in the source code for commented table.
229	*/
230	static double cf_parameters_norm[20][7] = {
231	/*P(a) P(b) P(turn) f(i) f(i+1) f(i+2) f(i+3) Amino Acid
232	--------------------------------------------------------------------*/
233	{ 142, 83, 66, 0.060, 0.076, 0.035, 0.058 }, // A
234	{ 98, 93, 95, 0.070, 0.106, 0.099, 0.085 }, // R
235	{ 101, 54, 146, 0.147, 0.110, 0.179, 0.081 }, // D
236	{ 67, 89, 156, 0.161, 0.083, 0.191, 0.091 }, // N
237	{ 70, 119, 119, 0.149, 0.050, 0.117, 0.128 }, // C
238	{ 151, 37, 74, 0.056, 0.060, 0.077, 0.064 }, // E
239	{ 111, 110, 98, 0.074, 0.098, 0.037, 0.098 }, // Q
240	{ 57, 75, 156, 0.102, 0.085, 0.190, 0.152 }, // G
241	{ 100, 87, 95, 0.140, 0.047, 0.093, 0.054 }, // H
242	{ 108, 160, 47, 0.043, 0.034, 0.013, 0.056 }, // I
243	{ 121, 130, 59, 0.061, 0.025, 0.036, 0.070 }, // L
244	{ 116, 74, 101, 0.055, 0.115, 0.072, 0.095 }, // K
245	{ 145, 105, 60, 0.068, 0.082, 0.014, 0.055 }, // M
246	{ 113, 138, 60, 0.059, 0.041, 0.065, 0.065 }, // F
247	{ 57, 55, 152, 0.102, 0.301, 0.034, 0.068 }, // P
248	{ 77, 75, 143, 0.120, 0.139, 0.125, 0.106 }, // S
249	{ 83, 119, 96, 0.086, 0.108, 0.065, 0.079 }, // T
250	{ 108, 137, 96, 0.077, 0.013, 0.064, 0.167 }, // W
251	{ 69, 147, 114, 0.082, 0.065, 0.114, 0.125 }, // Y
252	{ 106, 170, 50, 0.062, 0.048, 0.028, 0.053 }}; // V
253
254	// --------------------------------------------------------------------------------
255
256	/** \brief Symmetric arithmetic rounding of a double value to an integer value.
257	*
258	* \param[in] d Value to be rounded
259	* \return Rounded value
260	*
261	* Rounds a double value to an integer value using symmetric arithmetic rounding,
262	* i.e. a number \f$x.y\f$ is rounded to \f$x\f$ if \f$y < 5\f$ and to \f$x+1\f$
263	* otherwise.
264	*/
265	inline int ED4_pfold_round_sym(double d) {
266	return int(d + .5);
267	}
268
269
270	/** \brief Initializes static variables.
271	*
272	* So far, this function only concerns #char2AA which gets initialized here.
273	* See #char2AA for details on the values. It is called by
274	* ED4_pfold_predict_structure() and ED4_pfold_calculate_secstruct_match().
275	*
276	* \attention If any other prediction function is used alone before calling one
277	* of the mentioned functions, this function has to be called first.
278	*/
279	static void ED4_pfold_init_statics() {
280	// specify the characters used for amino acid one letter code
281	if (!char2AA) {
282	char2AA = new int [256];
283	for (int i = 0; i < 256; i++) {
284	char2AA[i] = -1;
285	}
286	for (int i = 0; amino_acids[i]; i++) {
287	char2AA[(unsigned char)amino_acids[i]] = i;
288	}
289	}
290	}
291
292
293	/** \brief Finds nucleation sites that initiate the specified structure.
294	*
295	* \param[in] sequence Amino acid sequence
296	* \param[out] structure Predicted secondary structure
297	* \param[in] length Size of \a sequence and \a structure
298	* \param[in] s Secondary structure type (either #ALPHA_HELIX or #BETA_SHEET)
299	*
300	* This function finds nucleation sites that initiate the specified structure
301	* (alpha-helix or beta-sheet). A window of a fixed size is moved over the
302	* sequence and former and breaker values (as defined by #cf_parameters) for
303	* the amino acids in the window are summed up. If the former values in this
304	* region reach a certain value and the breaker values do not exceed a certain
305	* limit a nucleation site is formed, i.e. the region is assumed to be the
306	* corresponding secondary structure. The result is stored in \a structure.
307	*/
308	static void ED4_pfold_find_nucleation_sites(const unsigned char sequence, char structure, int length, const PFOLD_STRUCTURE s) {
309	#ifdef SHOW_PROGRESS
310	cout << endl << "Searching for nucleation sites:" << endl;
311	#endif
312	e4_assert(s == ALPHA_HELIX \|\| s == BETA_SHEET); // incorrect value for s
313	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
314
315	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
316	int windowSize = (s == ALPHA_HELIX ? 6 : 5); // window size used for finding nucleation sites
317	double sumOfFormVal = 0, sumOfBreakVal = 0; // sum of former resp. breaker values in window
318	int pos; // current position in sequence
319	int count; // number of amino acids found in window
320
321	for (int i = 0; i < ((length + 1) - windowSize); i++) {
322	int aa = 0; // amino acid
323
324	pos = i;
325	for (count = 0; count < windowSize; count++) {
326	// skip gaps
327	while ( pos < ((length + 1) - windowSize) &&
328	strchr(gap_chars, sequence[pos + count]) ) {
329	pos++;
330	}
331	aa = char2AA[sequence[pos + count]];
332	if (aa == -1) break; // unknown character found
333
334	// compute former and breaker values
335	sumOfFormVal += cf_parameters[aa][s];
336	sumOfBreakVal += cf_parameters[aa][s+2];
337	}
338
339	// assign sequence and save start and end of nucleation site for later extension
340	if ((sumOfFormVal > (windowSize - 2)) && (sumOfBreakVal < 2)) {
341	for (int j = i; j < (pos + count); j++) {
342	if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[s];
343	}
344	}
345	if (aa == -1) i = pos + count; // skip unknown character
346	sumOfFormVal = 0, sumOfBreakVal = 0;
347	}
348
349	free(gap_chars);
350	#ifdef SHOW_PROGRESS
351	cout << structure << endl;
352	#endif
353	}
354
355
356	/** \brief Extends the found nucleation sites in both directions.
357	*
358	* \param[in] sequence Amino acid sequence
359	* \param[out] structure Predicted secondary structure
360	* \param[in] length Size of \a sequence and \a structure
361	* \param[in] s Secondary structure type (either #ALPHA_HELIX or #BETA_SHEET)
362	*
363	* The function extends the nucleation sites found by ED4_pfold_find_nucleation_sites()
364	* in both directions. Extension continues until a certain amino acid constellation
365	* is found. The amino acid 'P' breaks an alpha-helix and 'P' as well as 'E' break
366	* a beta-sheet. Also, two successive breakers or one breaker followed by an
367	* indifferent amino acid (as defined by #structure_breaker and #structure_indifferent)
368	* break the structure. The result is stored in \a structure.
369	*/
370	static void ED4_pfold_extend_nucleation_sites(const unsigned char sequence, char structure, int length, const PFOLD_STRUCTURE s) {
371	#ifdef SHOW_PROGRESS
372	cout << endl << "Extending nucleation sites:" << endl;
373	#endif
374	e4_assert(s == ALPHA_HELIX \|\| s == BETA_SHEET); // incorrect value for s
375	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
376
377	bool break_structure = false; // break the current structure
378	int start = 0, end = 0; // start and end of nucleation site
379	int neighbour = 0; // neighbour of start or end
380
381	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
382
383	// find nucleation sites and extend them in both directions (for whole sequence)
384	for (int indStruct = 0; indStruct < length; indStruct++) {
385
386	// search start and end of nucleated region
387	while (indStruct < length &&
388	((structure[indStruct] == ' ') \|\| strchr(gap_chars, sequence[indStruct]))
389	) indStruct++;
390
391	if (indStruct >= length) break;
392	// get next amino acid that is not included in nucleation site
393	start = indStruct - 1;
394	while ( indStruct < length &&
395	(structure[indStruct] != ' ' \|\| strchr(gap_chars, sequence[indStruct])) ) {
396	indStruct++;
397	}
398	// get next amino acid that is not included in nucleation site
399	end = indStruct;
400
401	// extend nucleated region in both directions
402	// left side:
403	while (start > 1 && strchr(gap_chars, sequence[start])) {
404	start--; // skip gaps
405	}
406	// break if no amino acid is found
407	if (start >= 0) break_structure = (char2AA[sequence[start]] == -1);
408	while (!break_structure && (start > 1) && (structure[start] == ' ')) {
409	// break if absolute breaker (P or E) is found
410	break_structure = (sequence[start] == 'P');
411	if (s == BETA_SHEET) break_structure \|= (sequence[start] == 'E');
412	if (break_structure) break;
413	// check for breaker at current position
414	break_structure = (strchr(structure_breaker[s], sequence[start]) != 0);
415	neighbour = start - 1; // get neighbour
416	while (neighbour > 0 && strchr(gap_chars, sequence[neighbour])) {
417	neighbour--; // skip gaps
418	}
419	// break if out of bounds or no amino acid is found
420	if (neighbour <= 0 \|\| char2AA[sequence[neighbour]] == -1) {
421	break;
422	}
423	// break if another breaker or indifferent amino acid is found
424	break_structure &=
425	(strchr(structure_breaker[s], sequence[neighbour]) != 0) \|\|
426	(strchr(structure_indifferent[s], sequence[neighbour]) != 0);
427	if (!break_structure) {
428	structure[start] = structure_chars[s];
429	}
430	start = neighbour; // continue with neighbour
431	}
432
433	// right side:
434	while (end < (length - 2) && strchr(gap_chars, sequence[end])) {
435	end++; // skip gaps
436	}
437	// break if no amino acid is found
438	if (end <= (length - 1)) break_structure = (char2AA[sequence[end]] == -1);
439	while (!break_structure && (end < (length - 2))) {
440	// break if absolute breaker (P or E) is found
441	break_structure = (sequence[end] == 'P');
442	if (s == BETA_SHEET) break_structure \|= (sequence[end] == 'E');
443	if (break_structure) break;
444	// check for breaker at current position
445	break_structure = (strchr(structure_breaker[s], sequence[end]) != 0);
446	neighbour = end + 1; // get neighbour
447	while (neighbour < (length - 2) && strchr(gap_chars, sequence[neighbour])) {
448	neighbour++; // skip gaps
449	}
450	// break if out of bounds or no amino acid is found
451	if (neighbour >= (length - 1) \|\| char2AA[sequence[neighbour]] == -1) {
452	end = neighbour;
453	break;
454	}
455	// break if another breaker or indifferent amino acid is found
456	break_structure &=
457	(strchr(structure_breaker[s], sequence[neighbour]) != 0) \|\|
458	(strchr(structure_indifferent[s], sequence[neighbour]) != 0);
459	if (!break_structure) {
460	structure[end] = structure_chars[s];
461	}
462	end = neighbour; // continue with neighbour
463	}
464	indStruct = end; // continue with end
465	}
466
467	free(gap_chars);
468	#ifdef SHOW_PROGRESS
469	cout << structure << endl;
470	#endif
471	}
472
473
474	/** \brief Predicts beta-turns from the given amino acid sequence
475	*
476	* \param[in] sequence Amino acid sequence
477	* \param[out] structure Predicted secondary structure
478	* \param[in] length Size of \a sequence and \a structure
479	*
480	* A window of a fixed size is moved over the sequence and former values for
481	* alpha-helices, beta-sheets and beta-turns are summed up. In addition,
482	* beta-turn probabilities are multiplied. The values are specified in
483	* #cf_parameters_norm. If the former values for beta-turn are greater than
484	* the ones for alpha-helix and beta-sheet and the turn probabilities
485	* exceed a certain limit the region is assumed to be a beta-turn. The result
486	* is stored in \a structure.
487	*/
488	static void ED4_pfold_find_turns(const unsigned char sequence, char structure, int length) {
489	#ifdef SHOW_PROGRESS
490	cout << endl << "Searching for beta-turns: " << endl;
491	#endif
492	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
493
494	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
495	const int windowSize = 4; // window size
496	double P_a = 0, P_b = 0, P_turn = 0; // former values for helix, sheet and beta-turn
497	double p_t = 1; // probability for beta-turn
498	int pos; // position in sequence
499	int count; // position in window
500	int aa; // amino acid
501
502	for (int i = 0; i < ((length + 1) - windowSize); i++) {
503	pos = i;
504	for (count = 0; count < windowSize; count++) {
505	// skip gaps
506	while ( pos < ((length + 1) - windowSize) &&
507	strchr(gap_chars, sequence[pos + count]) ) {
508	pos++;
509	}
510	aa = char2AA[sequence[pos + count]];
511	if (aa == -1) break; // unknown character found
512
513	// compute former values and turn probability
514	P_a += cf_parameters_norm[aa][0];
515	P_b += cf_parameters_norm[aa][1];
516	P_turn += cf_parameters_norm[aa][2];
517	p_t *= cf_parameters_norm[aa][3 + count];
518	}
519	if (count != 0) {
520	//if (count == 4) {
521	P_a /= count;
522	P_b /= count;
523	P_turn /= count;
524	if ((p_t > 0.000075) && (P_turn > 100) && (P_turn > P_a) && (P_turn > P_b)) {
525	for (int j = i; j < (pos + count); j++) {
526	if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[BETA_TURN];
527	}
528	}
529	}
530	if (aa == -1) i = pos + count; // skip unknown character
531	p_t = 1, P_a = 0, P_b = 0, P_turn = 0;
532	}
533
534	free(gap_chars);
535	#ifdef SHOW_PROGRESS
536	cout << structure << endl;
537	#endif
538	}
539
540
541	/** \brief Resolves overlaps of predicted secondary structures and creates structure summary.
542	*
543	* \param[in] sequence Amino acid sequence
544	* \param[in,out] structures Predicted secondary structures (#ALPHA_HELIX, #BETA_SHEET,
545	* #BETA_TURN and #STRUCTURE_SUMMARY, in this order)
546	* \param[in] length Size of \a sequence and \a structures[i]
547	*
548	* The function takes the given predicted structures (alpha-helix, beta-sheet
549	* and beta-turn) and searches for overlapping regions. If a beta-turn is found
550	* the structure summary is assumed to be a beta-turn. For overlapping alpha-helices
551	* and beta-sheets the former values are summed up for this region and the
552	* structure summary is assumed to be the structure type with the higher former
553	* value. The result is stored in \a structures[3] (= \a structures[#STRUCTURE_SUMMARY]).
554	*
555	* \attention I couldn't find a standard procedure for resolving overlaps and
556	* there might be other (possibly better) ways to do that.
557	*/
558	static void ED4_pfold_resolve_overlaps(const unsigned char sequence, char structures[4], int length) {
559	#ifdef SHOW_PROGRESS
560	cout << endl << "Resolving overlaps: " << endl;
561	#endif
562	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
563
564	int start = -1; // start of overlap
565	int end = -1; // end of overlap
566	double P_a = 0; // sum of former values for alpha-helix in overlapping regions
567	double P_b = 0; // sum of former values for beta-sheet in overlapping regions
568	PFOLD_STRUCTURE s; // structure with the highest former values
569	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
570
571	// scan structures for overlaps
572	for (int pos = 0; pos < length; pos++) {
573
574	// if beta-turn is found at position pos -> summary is beta-turn
575	if (structures[BETA_TURN][pos] != ' ') {
576	structures[STRUCTURE_SUMMARY][pos] = structure_chars[BETA_TURN];
577
578	// if helix and sheet are overlapping and no beta-turn is found -> check which structure has the highest sum of former values
579	} else if ((structures[ALPHA_HELIX][pos] != ' ') && (structures[BETA_SHEET][pos] != ' ')) {
580
581	// search start and end of overlap (as long as no beta-turn is found)
582	start = pos;
583	end = pos;
584	while ( structures[ALPHA_HELIX][end] != ' ' && structures[BETA_SHEET][end] != ' ' &&
585	structures[BETA_TURN][end] == ' ' ) {
586	end++;
587	}
588
589	// calculate P_a and P_b for overlap
590	for (int i = start; i < end; i++) {
591	// skip gaps
592	while (i < end && strchr(gap_chars, sequence[i])) {
593	i++;
594	}
595	int aa = char2AA[sequence[i]];
596	if (aa != -1) {
597	P_a += cf_parameters[aa][ALPHA_HELIX];
598	P_b += cf_parameters[aa][BETA_SHEET];
599	}
600	}
601
602	// check which structure is more likely and set s appropriately
603	s = (P_a > P_b) ? ALPHA_HELIX : BETA_SHEET;
604
605	// set structure for overlapping region
606	for (int i = start; i < end; i++) {
607	structures[STRUCTURE_SUMMARY][i] = structure_chars[s];
608	}
609
610	// set variables for next pass of loop resp. end of loop
611	P_a = 0, P_b = 0;
612	pos = end - 1;
613
614	// if helix and sheet are not overlapping and no beta-turn is found -> set structure accordingly
615	} else {
616	// summary at position pos is helix resp. sheet
617	if (structures[ALPHA_HELIX][pos] != ' ') {
618	structures[STRUCTURE_SUMMARY][pos] = structure_chars[ALPHA_HELIX];
619	} else if (structures[BETA_SHEET][pos] != ' ') {
620	structures[STRUCTURE_SUMMARY][pos] = structure_chars[BETA_SHEET];
621	}
622	}
623	}
624
625	free(gap_chars);
626	#ifdef SHOW_PROGRESS
627	cout << structures[summary] << endl;
628	#endif
629	}
630
631
632	/** \brief Predicts protein secondary structures from the amino acid sequence.
633	*
634	* \param[in] sequence Amino acid sequence
635	* \param[out] structures Predicted secondary structures (#ALPHA_HELIX, #BETA_SHEET,
636	* #BETA_TURN and #STRUCTURE_SUMMARY, in this order)
637	* \param[in] length Size of \a sequence and \a structures[i]
638	* \return Error description, if an error occured; 0 otherwise
639	*
640	* This function predicts the protein secondary structures from the amino acid
641	* sequence according to the Chou-Fasman algorithm. In a first step, nucleation sites
642	* for alpha-helices and beta-sheets are found using ED4_pfold_find_nucleation_sites().
643	* In a next step, the found structures are extended obeying certain rules with
644	* ED4_pfold_extend_nucleation_sites(). Beta-turns are found with the function
645	* ED4_pfold_find_turns(). In a final step, overlapping regions are identified and
646	* resolved to create a structure summary with ED4_pfold_resolve_overlaps().
647	* The results are written to \a structures[i] and can be accessed via the enums
648	* #ALPHA_HELIX, #BETA_SHEET, #BETA_TURN and #STRUCTURE_SUMMARY.
649	*/
650	static GB_ERROR ED4_pfold_predict_structure(const unsigned char sequence, char structures[4], int length) {
651	#ifdef SHOW_PROGRESS
652	cout << endl << "Predicting secondary structure for sequence:" << endl << sequence << endl;
653	#endif
654	GB_ERROR error = 0;
655	e4_assert((int)strlen((const char *)sequence) == length);
656
657	// init memory
658	ED4_pfold_init_statics();
659	e4_assert(char2AA);
660
661	// predict structure
662	ED4_pfold_find_nucleation_sites(sequence, structures[ALPHA_HELIX], length, ALPHA_HELIX);
663	ED4_pfold_find_nucleation_sites(sequence, structures[BETA_SHEET], length, BETA_SHEET);
664	ED4_pfold_extend_nucleation_sites(sequence, structures[ALPHA_HELIX], length, ALPHA_HELIX);
665	ED4_pfold_extend_nucleation_sites(sequence, structures[BETA_SHEET], length, BETA_SHEET);
666	ED4_pfold_find_turns(sequence, structures[BETA_TURN], length);
667	ED4_pfold_resolve_overlaps(sequence, structures, length);
668
669	return error;
670	}
671
672	#if 0
673	/** \brief Predicts protein secondary structure from the amino acid sequence.
674	*
675	* \param[in] sequence Amino acid sequence
676	* \param[out] structure Predicted secondary structure summary
677	* \param[in] length Size of \a sequence and \a structure
678	* \return Error description, if an error occured; 0 otherwise
679	*
680	* Basically the same as
681	* ED4_pfold_predict_structure(const char sequence, char structures[4], int length)
682	* except that it returns only the secondary structure summary in \a structure.
683	*/
684	static GB_ERROR ED4_pfold_predict_structure(const char sequence, char structure, int length) {
685	GB_ERROR error = 0;
686
687	// allocate memory
688	char *structures[4];
689	for (int i = 0; i < 4; i++) {
690	structures[i] = new char [length + 1];
691	if (!structures[i]) {
692	error = GB_export_error("Out of memory.");
693	return error;
694	}
695	for (int j = 0; j < length; j++) {
696	structures[i][j] = ' ';
697	}
698	structures[i][length] = '\0';
699	}
700
701	// predict structure
702	error = ED4_pfold_predict_structure(sequence, structures, length);
703
704	// write predicted summary to result_buffer
705	if (structure) {
706	for (int i = 0; i < length; i++) {
707	structure[i] = structures[STRUCTURE_SUMMARY][i];
708	}
709	structure[length] = '\0';
710	}
711
712	// free buffer and return
713	for (int i = 0; i < 4; i++) {
714	if (structures[i]) {
715	delete structures[i];
716	structures[i] = 0;
717	}
718	}
719	return error;
720	}
721	#endif
722
723
724
725	GB_ERROR ED4_pfold_calculate_secstruct_match(const unsigned char structure_sai, const unsigned char structure_cmp, int start, int end, char result_buffer, PFOLD_MATCH_METHOD match_method /= SECSTRUCT_SEQUENCE*/) {
726	GB_ERROR error = 0;
727	e4_assert(structure_sai);
728	e4_assert(structure_cmp);
729	e4_assert(start >= 0);
730	e4_assert(result_buffer);
731	e4_assert(match_method >= 0 && match_method < PFOLD_MATCH_METHOD_COUNT);
732	ED4_pfold_init_statics();
733	e4_assert(char2AA);
734
735	size_t length = strlen((const char *)structure_sai);
736	int match_end = min( min(end - start, length), (int) strlen((const char *)structure_cmp) );
737
738	enum {BEND = 3, NOSTRUCT = 4};
739	char *struct_chars[] = {
740	strdup("HGI"), // helical structures (enum ALPHA_HELIX)
741	strdup("EB"), // sheet-like structures (enum BETA_SHEET)
742	strdup("T"), // beta-turn (enum BETA_TURN)
743	strdup("S"), // bends (enum BEND)
744	strdup("") // no structure (enum NOSTRUCT)
745	};
746
747	// init awars
748	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string();
749	char *pairs[PFOLD_MATCH_TYPE_COUNT] = {0};
750	char *pair_chars[PFOLD_MATCH_TYPE_COUNT] = {0};
751	char *pair_chars_2 = ED4_ROOT->aw_root->awar(PFOLD_AWAR_SYMBOL_TEMPLATE_2)->read_string();
752	char awar[256];
753	for (int i = 0; pfold_match_type_awars[i].name; i++) {
754	sprintf(awar, PFOLD_AWAR_PAIR_TEMPLATE, pfold_match_type_awars[i].name);
755	pairs[i] = strdup(ED4_ROOT->aw_root->awar(awar)->read_string());
756	sprintf(awar, PFOLD_AWAR_SYMBOL_TEMPLATE, pfold_match_type_awars[i].name);
757	pair_chars[i] = strdup(ED4_ROOT->aw_root->awar(awar)->read_string());
758	}
759
760	int struct_start = start;
761	int struct_end = start;
762	int count = 0;
763	int current_struct = 4;
764	int aa = -1;
765	double prob = 0;
766
767	//TODO: move this check to callback for the corresponding field?
768	if (strlen(pair_chars_2) != 10) {
769	error = GB_export_error("You have to define 10 match symbols.");
770	}
771
772	if (!error) {
773	switch (match_method) {
774
775	case SECSTRUCT_SECSTRUCT:
776	//TODO: one could try to find out, if structure_cmp is really a secondary structure and not a sequence (define awar for allowed symbols in secondary structure)
777	for (count = 0; count < match_end; count++) {
778	result_buffer[count] = *pair_chars[STRUCT_UNKNOWN];
779	for (int n_pt = 0; n_pt < PFOLD_MATCH_TYPE_COUNT; n_pt++) {
780	int len = strlen(pairs[n_pt])-1;
781	char *p = pairs[n_pt];
782	for (int j = 0; j < len; j += 3) {
783	if ( (p[j] == structure_sai[count + start] && p[j+1] == structure_cmp[count + start]) \|\|
784	(p[j] == structure_cmp[count + start] && p[j+1] == structure_sai[count + start]) ) {
785	result_buffer[count] = *pair_chars[n_pt];
786	n_pt = PFOLD_MATCH_TYPE_COUNT; // stop searching the pair types
787	break; // stop searching the pairs array
788	}
789	}
790	}
791	}
792
793	// fill the remaining buffer with spaces
794	while (count <= end - start) {
795	result_buffer[count] = ' ';
796	count++;
797	}
798	break;
799
800	case SECSTRUCT_SEQUENCE:
801	// clear result buffer
802	for (int i = 0; i <= end - start; i++) result_buffer[i] = ' ';
803
804	// skip gaps
805	while ( structure_sai[struct_start] != '\0' && structure_cmp[struct_start] != '\0' &&
806	strchr(gap_chars, structure_sai[struct_start]) &&
807	strchr(gap_chars, structure_cmp[struct_start]) ) {
808	struct_start++;
809	}
810	if (structure_sai[struct_start] == '\0' \|\| structure_cmp[struct_start] == '\0') break;
811
812	// check structure at the first displayed position and find out where it starts
813	for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
814	;
815	}
816	if (current_struct != BEND && current_struct != NOSTRUCT) {
817	struct_start--; // check structure left of start
818	while (struct_start >= 0) {
819	// skip gaps
820	while ( struct_start > 0 &&
821	strchr(gap_chars, structure_sai[struct_start]) &&
822	strchr(gap_chars, structure_cmp[struct_start]) ) {
823	struct_start--;
824	}
825	aa = char2AA[structure_cmp[struct_start]];
826	if (struct_start == 0 && aa == -1) { // nothing was found
827	break;
828	} else if (strchr(struct_chars[current_struct], structure_sai[struct_start]) && aa != -1) {
829	prob += cf_former(aa, current_struct) - cf_breaker(aa, current_struct); // sum up probabilities
830	struct_start--;
831	count++;
832	} else {
833	break;
834	}
835	}
836	}
837
838	// parse structures
839	struct_start = start;
840	// skip gaps
841	while ( structure_sai[struct_start] != '\0' && structure_cmp[struct_start] != '\0' &&
842	strchr(gap_chars, structure_sai[struct_start]) &&
843	strchr(gap_chars, structure_cmp[struct_start]) ) {
844	struct_start++;
845	}
846	if (structure_sai[struct_start] == '\0' \|\| structure_cmp[struct_start] == '\0') break;
847	struct_end = struct_start;
848	while (struct_end < end ) {
849	aa = char2AA[structure_cmp[struct_end]];
850	if (current_struct == NOSTRUCT) { // no structure found -> move on
851	struct_end++;
852	} else if (aa == -1) { // structure found but no corresponding amino acid -> doesn't fit at all
853	result_buffer[struct_end - start] = pair_chars_2[0];
854	struct_end++;
855	} else if (current_struct == BEND) { // bend found -> fits perfectly everywhere
856	result_buffer[struct_end - start] = pair_chars_2[9];
857	struct_end++;
858	} else { // helix, sheet or beta-turn found -> while structure doesn't change: sum up probabilities
859	while (structure_sai[struct_end] != '\0') {
860	// skip gaps
861	while ( strchr(gap_chars, structure_sai[struct_end]) &&
862	strchr(gap_chars, structure_cmp[struct_end]) &&
863	structure_sai[struct_end] != '\0' && structure_cmp[struct_end] != '\0' ) {
864	struct_end++;
865	}
866	aa = char2AA[structure_cmp[struct_end]];
867	if ( structure_sai[struct_end] != '\0' && structure_cmp[struct_end] != '\0' &&
868	strchr(struct_chars[current_struct], structure_sai[struct_end]) && aa != -1 ) {
869	prob += cf_former(aa, current_struct) - cf_breaker(aa, current_struct); // sum up probabilities
870	struct_end++;
871	count++;
872	} else {
873	break;
874	}
875	}
876
877	if (count != 0) {
878	// compute average and normalize probability
879	prob /= count;
880	prob = (prob + max_breaker_value[current_struct] - min_former_value[current_struct]) / (max_breaker_value[current_struct] + max_former_value[current_struct] - min_former_value[current_struct]);
881
882	// map to match characters and store in result_buffer
883	int prob_normalized = ED4_pfold_round_sym(prob * 9);
884	//e4_assert(prob_normalized >= 0 && prob_normalized <= 9); // if this happens check if normalization is correct or some undefined charachters mess everything up
885	char prob_symbol = *pair_chars[STRUCT_UNKNOWN];
886	if (prob_normalized >= 0 && prob_normalized <= 9) {
887	prob_symbol = pair_chars_2[prob_normalized];
888	}
889	for (int i = struct_start - start; i < struct_end - start && i < (end - start); i++) {
890	if (char2AA[structure_cmp[i + start]] != -1) result_buffer[i] = prob_symbol;
891	}
892	}
893	}
894
895	// find next structure type
896	if (structure_sai[struct_end] == '\0' \|\| structure_cmp[struct_end] == '\0') {
897	break;
898	} else {
899	prob = 0;
900	count = 0;
901	struct_start = struct_end;
902	for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
903	;
904	}
905	}
906	}
907	break;
908
909	case SECSTRUCT_SEQUENCE_PREDICT:
910	// predict structures from structure_cmp and compare with structure_sai
911	char *structures[4];
912	for (int i = 0; i < 4; i++) {
913	structures[i] = new char [length + 1];
914	if (!structures[i]) {
915	error = GB_export_error("Out of memory.");
916	return error;
917	}
918	for (size_t j = 0; j < length; j++) {
919	structures[i][j] = ' ';
920	}
921	structures[i][length] = '\0';
922	}
923	error = ED4_pfold_predict_structure(structure_cmp, structures, length);
924	if (!error) {
925	for (count = 0; count < match_end; count++) {
926	result_buffer[count] = *pair_chars[STRUCT_UNKNOWN];
927	if (!strchr(gap_chars, structure_sai[count + start]) && strchr(gap_chars, structure_cmp[count + start])) {
928	result_buffer[count] = *pair_chars[STRUCT_NO_MATCH];
929	} else if ( strchr(gap_chars, structure_sai[count + start]) \|\|
930	(structures[ALPHA_HELIX][count + start] == ' ' && structures[BETA_SHEET][count + start] == ' ' && structures[BETA_TURN][count + start] == ' ') ) {
931	result_buffer[count] = *pair_chars[STRUCT_PERFECT_MATCH];
932	} else {
933	// search for good match first
934	// if found: stop searching
935	// otherwise: continue searching for a less good match
936	for (int n_pt = 0; n_pt < PFOLD_MATCH_TYPE_COUNT; n_pt++) {
937	int len = strlen(pairs[n_pt])-1;
938	char *p = pairs[n_pt];
939	for (int n_struct = 0; n_struct < 3; n_struct++) {
940	for (int j = 0; j < len; j += 3) {
941	if ( (p[j] == structures[n_struct][count + start] && p[j+1] == structure_sai[count + start]) \|\|
942	(p[j] == structure_sai[count + start] && p[j+1] == structures[n_struct][count + start]) ) {
943	result_buffer[count] = *pair_chars[n_pt];
944	n_struct = 3; // stop searching the structures
945	n_pt = PFOLD_MATCH_TYPE_COUNT; // stop searching the pair types
946	break; // stop searching the pairs array
947	}
948	}
949	}
950	}
951	}
952	}
953	// fill the remaining buffer with spaces
954	while (count <= end - start) {
955	result_buffer[count] = ' ';
956	count++;
957	}
958	}
959	// free buffer
960	for (int i = 0; i < 4; i++) {
961	if (structures[i]) {
962	delete structures[i];
963	structures[i] = 0;
964	}
965	}
966	break;
967
968	default:
969	e4_assert(0); // function called with invalid argument for 'match_method'
970	break;
971	}
972	}
973
974	free(gap_chars);
975	free(pair_chars_2);
976	for (int i = 0; pfold_match_type_awars[i].name; i++) {
977	free(pairs[i]);
978	free(pair_chars[i]);
979	}
980	if (error) for (int i = 0; i <= end - start; i++) result_buffer[i] = ' '; // clear result buffer
981	return error;
982	}
983
984
985	GB_ERROR ED4_pfold_set_SAI(char *protstruct, GBDATA gb_main, const char alignment_name, long protstruct_len /= 0/) {
986	GB_ERROR error = 0;
987	GB_transaction ta(gb_main);
988	AW_root *aw_root = ED4_ROOT->aw_root;
989	char *SAI_name = aw_root->awar(PFOLD_AWAR_SELECTED_SAI)->read_string();
990	GBDATA *gb_protstruct = GBT_find_SAI(gb_main, SAI_name);
991
992	freeset(*protstruct, 0);
993
994	if (gb_protstruct) {
995	GBDATA *gb_data = GBT_read_sequence(gb_protstruct, alignment_name);
996	if (gb_data) *protstruct = GB_read_string(gb_data);
997	}
998
999	if (*protstruct) {
1000	if (protstruct_len) protstruct_len = (long)strlen(protstruct);
1001	}
1002	else {
1003	if (protstruct_len) protstruct_len = 0;
1004	if (aw_root->awar(PFOLD_AWAR_ENABLE)->read_int()) {
1005	error = GB_export_errorf( "SAI \"%s\" does not exist.\nDisabled protein structure display!", SAI_name );
1006	aw_root->awar(PFOLD_AWAR_ENABLE)->write_int(0);
1007	}
1008	}
1009
1010	free(SAI_name);
1011	return error;
1012	}
1013
1014	/** \brief Callback function to select the reference protein structure SAI and to
1015	* update the SAI option menu.
1016	*
1017	* \param[in] aww The calling window
1018	* \param[in,out] oms The SAI option menu
1019	* \param[in] set_sai Specifies if SAI should be updated
1020	*
1021	* The function is called whenever the selected SAI or the SAI filter is changed
1022	* in the "Protein Match Settings" dialog (see ED4_pfold_create_props_window()).
1023	* It can be called with \a set_sai defined to update the reference protein secondary
1024	* structure SAI in the editor via ED4_pfold_set_SAI() and to update the selection in
1025	* the SAI option menu. If \a set_sai is 0 only the option menu is updated. This is
1026	* necessary if only the SAI filter changed but not the selected SAI.
1027	*/
1028
1029	static void ED4_pfold_select_SAI_and_update_option_menu(AW_window *aww, AW_CL oms, AW_CL set_sai) {
1030	e4_assert(aww);
1031	AW_option_menu_struct _oms = ((AW_option_menu_struct)oms);
1032	e4_assert(_oms);
1033	char *selected_sai = ED4_ROOT->aw_root->awar(PFOLD_AWAR_SELECTED_SAI)->read_string();
1034	char *sai_filter = ED4_ROOT->aw_root->awar(PFOLD_AWAR_SAI_FILTER)->read_string();
1035
1036	if (set_sai) {
1037	const char *err = ED4_pfold_set_SAI(&ED4_ROOT->protstruct, GLOBAL_gb_main, ED4_ROOT->alignment_name, &ED4_ROOT->protstruct_len);
1038	if (err) aw_message(err);
1039	}
1040
1041	aww->clear_option_menu(_oms);
1042	aww->insert_default_option(selected_sai, "", selected_sai);
1043	GB_transaction dummy(GLOBAL_gb_main);
1044
1045	for (GBDATA *sai = GBT_first_SAI(GLOBAL_gb_main);
1046	sai;
1047	sai = GBT_next_SAI(sai))
1048	{
1049	const char *sai_name = GBT_read_name(sai);
1050	if (strcmp(sai_name, selected_sai) != 0 && strstr(sai_name, sai_filter) != 0) {
1051	aww->callback(ED4_pfold_select_SAI_and_update_option_menu, (AW_CL)_oms, true);
1052	aww->insert_option(sai_name, "", sai_name);
1053	}
1054	}
1055
1056	free(selected_sai);
1057	free(sai_filter);
1058	aww->update_option_menu();
1059	ED4_expose_all_windows();
1060	}
1061
1062
1063	AW_window ED4_pfold_create_props_window(AW_root awr, AW_cb_struct *awcbs) {
1064	AW_window_simple *aws = new AW_window_simple;
1065	aws->init( awr, "PFOLD_PROPS", "PROTEIN_MATCH_SETTINGS");
1066
1067	// create close button
1068	aws->at(10, 10);
1069	aws->auto_space(5, 2);
1070	aws->callback(AW_POPDOWN);
1071	aws->create_button("CLOSE", "CLOSE", "C");
1072
1073	// create help button
1074	aws->callback(AW_POPUP_HELP, (AW_CL)"pfold_props.hlp");
1075	aws->create_button("HELP", "HELP");
1076	aws->at_newline();
1077
1078	aws->label_length(27);
1079	int ex = 0, ey = 0;
1080	char awar[256];
1081
1082	// create toggle field for showing the protein structure match
1083	aws->label("Show protein structure match?");
1084	aws->callback(awcbs);
1085	aws->create_toggle(PFOLD_AWAR_ENABLE);
1086	aws->at_newline();
1087
1088	// create SAI option menu
1089	aws->label_length(30);
1090	AW_option_menu_struct *oms_sai = aws->create_option_menu(PFOLD_AWAR_SELECTED_SAI, "Selected Protein Structure SAI", "");
1091	ED4_pfold_select_SAI_and_update_option_menu(aws, (AW_CL)oms_sai, 0);
1092	aws->at_newline();
1093	aws->label("-> Filter SAI names for");
1094	aws->callback(ED4_pfold_select_SAI_and_update_option_menu, (AW_CL)oms_sai, 0);
1095	aws->create_input_field(PFOLD_AWAR_SAI_FILTER, 10);
1096	aws->at_newline();
1097
1098	// create match method option menu
1099	PFOLD_MATCH_METHOD match_method = (PFOLD_MATCH_METHOD) ED4_ROOT->aw_root->awar(PFOLD_AWAR_MATCH_METHOD)->read_int();
1100	aws->label_length(12);
1101	aws->create_option_menu(PFOLD_AWAR_MATCH_METHOD, "Match Method", "");
1102	for (int i = 0; const char *mm_aw = pfold_match_method_awars[i].name; i++) {
1103	aws->callback(awcbs);
1104	if (match_method == pfold_match_method_awars[i].value) {
1105	aws->insert_default_option(mm_aw, "", match_method);
1106	} else {
1107	aws->insert_option(mm_aw, "", pfold_match_method_awars[i].value);
1108	}
1109	}
1110	aws->update_option_menu();
1111	aws->at_newline();
1112
1113	// create match symbols and/or match types input fields
1114	//TODO: show only fields that are relevant for current match method -> bind to callback function?
1115	//if (match_method == SECSTRUCT_SEQUENCE) {
1116	aws->label_length(40);
1117	aws->label("Match Symbols (Range 0-100% in steps of 10%)");
1118	aws->callback(awcbs);
1119	aws->create_input_field(PFOLD_AWAR_SYMBOL_TEMPLATE_2, 10);
1120	aws->at_newline();
1121	//} else {
1122	for (int i = 0; pfold_match_type_awars[i].name; i++) {
1123	aws->label_length(12);
1124	sprintf(awar, PFOLD_AWAR_PAIR_TEMPLATE, pfold_match_type_awars[i].name);
1125	aws->label(pfold_match_type_awars[i].name);
1126	aws->callback(awcbs);
1127	aws->create_input_field(awar, 30);
1128	//TODO: is it possible to disable input field for STRUCT_UNKNOWN?
1129	//if (pfold_match_type_awars[i].value == STRUCT_UNKNOWN)
1130	if (!i) aws->get_at_position(&ex, &ey);
1131	sprintf(awar, PFOLD_AWAR_SYMBOL_TEMPLATE, pfold_match_type_awars[i].name);
1132	aws->callback(awcbs);
1133	aws->create_input_field(awar, 3);
1134	aws->at_newline();
1135	}
1136	//}
1137
1138	aws->window_fit();
1139	return (AW_window *)aws;
1140	}
1141
1142	#if 0
1143
1144	/** \brief Predicts the specified secondary structure type from the amino acid sequence.
1145	*
1146	* \param[in] sequence Amino acid sequence
1147	* \param[out] structure Predicted secondary structure
1148	* \param[in] length Size of \a sequence and \a structure
1149	* \param[in] s Secondary structure type (#ALPHA_HELIX, #BETA_SHEET or #BETA_TURN)
1150	*
1151	* The function calls ED4_pfold_find_nucleation_sites() and ED4_pfold_extend_nucleation_sites()
1152	* if s is #ALPHA_HELIX or #BETA_SHEET and ED4_pfold_find_turns() if s is #BETA_TURN.
1153	*/
1154	static void ED4_pfold_find_structure(const char sequence, char structure, int length, const PFOLD_STRUCTURE s) {
1155	e4_assert(s == ALPHA_HELIX \|\| s == BETA_SHEET \|\| s == BETA_TURN); // incorrect value for s
1156	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
1157	if (s == BETA_TURN) {
1158	ED4_pfold_find_turns(sequence, structure, length);
1159	} else {
1160	ED4_pfold_find_nucleation_sites(sequence, structure, length, s);
1161	ED4_pfold_extend_nucleation_sites(sequence, structure, length, s);
1162	}
1163	}
1164
1165	#endif
1166
1167
1168

Note: See TracBrowser for help on using the repository browser.

Download in other formats: