Context Navigation

source: branches/profile/EDIT4/ED4_protein_2nd_structure.cxx

Visit:

Last change on this file was 12803, checked in by westram, 11 years ago
merge from 'alilink' into 'trunk' adds: log:branches/alilink@12801:12802
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 51.1 KB

Line
1	// =============================================================== //
2	// //
3	// File : ED4_protein_2nd_structure.cxx //
4	// Purpose : //
5	// //
6	// Institute of Microbiology (Technical University Munich) //
7	// http://www.arb-home.de/ //
8	// //
9	// =============================================================== //
10
11
12	/*! \file ED4_protein_2nd_structure.cxx
13	* \brief Implements the functions defined in ed4_protein_2nd_structure.hxx.
14	* \author Markus Urban
15	* \date 2008-02-08
16	* \sa Refer to ed4_protein_2nd_structure.hxx for details, please.
17	*/
18
19	#include "ed4_protein_2nd_structure.hxx"
20	#include "ed4_class.hxx"
21	#include "ed4_awars.hxx"
22
23	#include <aw_awar.hxx>
24	#include <aw_msg.hxx>
25	#include <aw_root.hxx>
26	#include "arbdbt.h"
27
28	#include <iostream>
29
30	#define e4_assert(bed) arb_assert(bed)
31
32	using namespace std;
33
34	// --------------------------------------------------------------------------------
35	// exported data
36
37	//! Awars for the match type; binds the #PFOLD_MATCH_TYPE to the corresponding awar name.
38	name_value_pair pfold_match_type_awars[] = {
39	{ "Perfect_match", STRUCT_PERFECT_MATCH },
40	{ "Good_match", STRUCT_GOOD_MATCH },
41	{ "Medium_match", STRUCT_MEDIUM_MATCH },
42	{ "Bad_match", STRUCT_BAD_MATCH },
43	{ "No_match", STRUCT_NO_MATCH },
44	{ "Unknown_match", STRUCT_UNKNOWN },
45	{ 0, PFOLD_MATCH_TYPE_COUNT }
46	};
47
48	//! Symbols for the match quality (defined by #PFOLD_MATCH_TYPE) as used for match methods #SECSTRUCT_SECSTRUCT and #SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match().
49	char *pfold_pair_chars[PFOLD_PAIRS] = {
50	strdup(" "), // STRUCT_PERFECT_MATCH
51	strdup("-"), // STRUCT_GOOD_MATCH
52	strdup("~"), // STRUCT_MEDIUM_MATCH
53	strdup("+"), // STRUCT_BAD_MATCH
54	strdup("#"), // STRUCT_NO_MATCH
55	strdup("?") // STRUCT_UNKNOWN
56	};
57
58	//! Match pair definition (see #PFOLD_MATCH_TYPE) as used for match methods #SECSTRUCT_SECSTRUCT and #SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match().
59	char *pfold_pairs[PFOLD_PAIRS] = {
60	strdup("HH GG II TT EE BB SS -- -. .."), // STRUCT_PERFECT_MATCH
61	strdup("HG HI HS EB ES TS H- G- I- T- E- B- S-"), // STRUCT_GOOD_MATCH
62	strdup("HT GT IT"), // STRUCT_MEDIUM_MATCH
63	strdup("ET BT"), // STRUCT_BAD_MATCH
64	strdup("EH BH EG EI"), // STRUCT_NO_MATCH
65	strdup("") // STRUCT_UNKNOWN
66	};
67
68	static struct pfold_mem_handler {
69	~pfold_mem_handler() {
70	for (int i = 0; i<PFOLD_PAIRS; ++i) {
71	freenull(pfold_pairs[i]);
72	freenull(pfold_pair_chars[i]);
73	}
74	}
75	} pfold_dealloc;
76
77	// --------------------------------------------------------------------------------
78
79	/*! \brief Specifies the characters used for amino acid one letter code.
80	*
81	* These are the characters that represent amino acids in one letter code.
82	* The order is important as the array initializes #char2AA which is used to
83	* access array elements in the tables #cf_parameters and #cf_parameters_norm.
84	*/
85	static const char *amino_acids = "ARDNCEQGHILKMFPSTWYV";
86
87	/*! \brief Maps character to amino acid one letter code.
88	*
89	* This array maps a character to an integer value. It is initialized with the
90	* function ED4_pfold_init_statics() which creates an array of the size 256
91	* (for ISO/IEC 8859-1 character encoding). Characters that represent an amino
92	* acid get values from 0 to 19 (according to their position in #amino_acids)
93	* and all others get the value -1. That way, it can be used to get parameters
94	* from the tables #cf_parameters and #cf_parameters_norm or to check if a
95	* certain character represents an amino acid.
96	*/
97	static int *char2AA = 0;
98
99	/*! \brief Characters representing protein secondary structure.
100	*
101	* Defines the characters representing secondary structure as output by the
102	* function ED4_pfold_predict_structure(). According to common standards,
103	* these are: <BR>
104	* H = alpha-helix, <BR>
105	* E = beta-sheet, <BR>
106	* T = beta-turn.
107	*/
108	static char structure_chars[3] = { 'H', 'E', 'T' };
109
110	//! Amino acids that break a certain structure (#ALPHA_HELIX or #BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites().
111	static const char *structure_breaker[2] = {
112	"NYPG",
113	"PDESGK"
114	};
115
116	//! Amino acids that are indifferent for a certain structure (#ALPHA_HELIX or #BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites().
117	static const char *structure_indifferent[2] = {
118	"RTSC",
119	"RNHA"
120	};
121
122	//! Awars for the match method; binds the #PFOLD_MATCH_METHOD to the corresponding name that is used to create the menu in ED4_pfold_create_props_window().
123	static name_value_pair pfold_match_method_awars[4] = {
124	{ "Secondary Structure <-> Secondary Structure", SECSTRUCT_SECSTRUCT },
125	{ "Secondary Structure <-> Sequence", SECSTRUCT_SEQUENCE },
126	{ "Secondary Structure <-> Sequence (Full Prediction)", SECSTRUCT_SEQUENCE_PREDICT },
127	{ 0, PFOLD_MATCH_METHOD_COUNT }
128	};
129
130	static double max_former_value[3] = { 1.42, 1.62, 156 }; //!< Maximum former value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (in #cf_parameters_norm).
131	static double min_former_value[3] = { 0.0, 0.0, 47 }; //!< Minimum former value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (in #cf_parameters_norm).
132	static double max_breaker_value[3] = { 1.21, 2.03, 0.0 }; //!< Maximum breaker value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (no breaker values => 0).
133
134	// --------------------------------------------------------------------------------
135
136	// TODO: is there a way to prevent doxygen from stripping the comments from the table?
137	// I simply added the parameter table as verbatim environment to show the comments in
138	// the documentation.
139	/*! \brief Former and breaker values for alpha-helices and beta-sheets (= strands).
140	*
141	* \hideinitializer
142	* \par Initial value:
143	* \verbatim
144	{
145	// Helix Former Strand Former Helix Breaker Strand Breaker Amino
146	// Value Value Value Value Acid
147	// -----------------------------------------------------------------------
148	{ 1.34, 0.00, 0.00, 0.00 }, // A
149	{ 0.00, 0.00, 0.00, 0.00 }, // R
150	{ 0.50, 0.00, 0.00, 1.39 }, // D
151	{ 0.00, 0.00, 1.03, 0.00 }, // N
152	{ 0.00, 1.13, 0.00, 0.00 }, // C
153	{ 1.42, 0.00, 0.00, 2.03 }, // E
154	{ 1.05, 1.05, 0.00, 0.00 }, // Q
155	{ 0.00, 0.00, 1.21, 1.00 }, // G
156	{ 0.50, 0.00, 0.00, 0.00 }, // H
157	{ 1.02, 1.52, 0.00, 0.00 }, // I
158	{ 1.14, 1.24, 0.00, 0.00 }, // L
159	{ 1.09, 0.00, 0.00, 1.01 }, // K
160	{ 1.37, 1.00, 0.00, 0.00 }, // M
161	{ 1.07, 1.31, 0.00, 0.00 }, // F
162	{ 0.00, 0.00, 1.21, 1.36 }, // P
163	{ 0.00, 0.00, 0.00, 1.00 }, // S
164	{ 0.00, 1.13, 0.00, 0.00 }, // T
165	{ 1.02, 1.30, 0.00, 0.00 }, // W
166	{ 0.00, 1.40, 1.00, 0.00 }, // Y
167	{ 1.00, 1.62, 0.00, 0.00 }}; // V
168	\endverbatim
169	*
170	* The former and breaker values are used to find alpha-helix and beta-sheet
171	* nucleation sites in ED4_pfold_find_nucleation_sites() and to resolve overlaps
172	* in ED4_pfold_resolve_overlaps(). Addressing the array with the enums
173	* #ALPHA_HELIX or #BETA_SHEET as second index gives the former values and
174	* addressing it with #ALPHA_HELIX+2 or #BETA_SHEET+2 gives the breaker values.
175	* The first index is for the amino acid. Use #char2AA to convert an amino acid
176	* character to the corresponding index.
177	*
178	* \sa Refer to the definition in the source code for commented table.
179	*/
180	static double cf_parameters[20][4] = {
181	/* Helix Former Strand Former Helix Breaker Strand Breaker Amino
182	Value Value Value Value Acid
183	----------------------------------------------------------------------- */
184	{ 1.34, 0.00, 0.00, 0.00 }, // A
185	{ 0.00, 0.00, 0.00, 0.00 }, // R
186	{ 0.50, 0.00, 0.00, 1.39 }, // D
187	{ 0.00, 0.00, 1.03, 0.00 }, // N
188	{ 0.00, 1.13, 0.00, 0.00 }, // C
189	{ 1.42, 0.00, 0.00, 2.03 }, // E
190	{ 1.05, 1.05, 0.00, 0.00 }, // Q
191	{ 0.00, 0.00, 1.21, 1.00 }, // G
192	{ 0.50, 0.00, 0.00, 0.00 }, // H
193	{ 1.02, 1.52, 0.00, 0.00 }, // I
194	{ 1.14, 1.24, 0.00, 0.00 }, // L
195	{ 1.09, 0.00, 0.00, 1.01 }, // K
196	{ 1.37, 1.00, 0.00, 0.00 }, // M
197	{ 1.07, 1.31, 0.00, 0.00 }, // F
198	{ 0.00, 0.00, 1.21, 1.36 }, // P
199	{ 0.00, 0.00, 0.00, 1.00 }, // S
200	{ 0.00, 1.13, 0.00, 0.00 }, // T
201	{ 1.02, 1.30, 0.00, 0.00 }, // W
202	{ 0.00, 1.40, 1.00, 0.00 }, // Y
203	{ 1.00, 1.62, 0.00, 0.00 } }; // V
204
205	/*! \brief Normalized former values for alpha-helices, beta-sheets (= strands)
206	* and beta-turns as well as beta-turn probabilities.
207	*
208	* \hideinitializer
209	* \par Initial value:
210	* \verbatim
211	{
212	// P(a) P(b) P(turn) f(i) f(i+1) f(i+2) f(i+3) Amino Acid
213	// --------------------------------------------------------------------
214	{ 142, 83, 66, 0.060, 0.076, 0.035, 0.058 }, // A
215	{ 98, 93, 95, 0.070, 0.106, 0.099, 0.085 }, // R
216	{ 101, 54, 146, 0.147, 0.110, 0.179, 0.081 }, // D
217	{ 67, 89, 156, 0.161, 0.083, 0.191, 0.091 }, // N
218	{ 70, 119, 119, 0.149, 0.050, 0.117, 0.128 }, // C
219	{ 151, 37, 74, 0.056, 0.060, 0.077, 0.064 }, // E
220	{ 111, 110, 98, 0.074, 0.098, 0.037, 0.098 }, // Q
221	{ 57, 75, 156, 0.102, 0.085, 0.190, 0.152 }, // G
222	{ 100, 87, 95, 0.140, 0.047, 0.093, 0.054 }, // H
223	{ 108, 160, 47, 0.043, 0.034, 0.013, 0.056 }, // I
224	{ 121, 130, 59, 0.061, 0.025, 0.036, 0.070 }, // L
225	{ 116, 74, 101, 0.055, 0.115, 0.072, 0.095 }, // K
226	{ 145, 105, 60, 0.068, 0.082, 0.014, 0.055 }, // M
227	{ 113, 138, 60, 0.059, 0.041, 0.065, 0.065 }, // F
228	{ 57, 55, 152, 0.102, 0.301, 0.034, 0.068 }, // P
229	{ 77, 75, 143, 0.120, 0.139, 0.125, 0.106 }, // S
230	{ 83, 119, 96, 0.086, 0.108, 0.065, 0.079 }, // T
231	{ 108, 137, 96, 0.077, 0.013, 0.064, 0.167 }, // W
232	{ 69, 147, 114, 0.082, 0.065, 0.114, 0.125 }, // Y
233	{ 106, 170, 50, 0.062, 0.048, 0.028, 0.053 }}; // V
234	\endverbatim
235	*
236	* The normalized former values are used to find beta-turns in an amino acid
237	* sequence in ED4_pfold_find_turns(). Addressing the array with the enums
238	* #ALPHA_HELIX, #BETA_SHEET or #BETA_TURN as second index gives the former
239	* values and addressing it with #BETA_TURN+i \f$(1 <= i <= 4)\f$ gives the
240	* turn probabilities. The first index is for the amino acid. Use #char2AA to
241	* convert an amino acid character to the corresponding index.
242	*
243	* \sa Refer to the definition in the source code for commented table.
244	*/
245	static double cf_parameters_norm[20][7] = {
246	/* P(a) P(b) P(turn) f(i) f(i+1) f(i+2) f(i+3) Amino Acid
247	-------------------------------------------------------------------- */
248	{ 142, 83, 66, 0.060, 0.076, 0.035, 0.058 }, // A
249	{ 98, 93, 95, 0.070, 0.106, 0.099, 0.085 }, // R
250	{ 101, 54, 146, 0.147, 0.110, 0.179, 0.081 }, // D
251	{ 67, 89, 156, 0.161, 0.083, 0.191, 0.091 }, // N
252	{ 70, 119, 119, 0.149, 0.050, 0.117, 0.128 }, // C
253	{ 151, 37, 74, 0.056, 0.060, 0.077, 0.064 }, // E
254	{ 111, 110, 98, 0.074, 0.098, 0.037, 0.098 }, // Q
255	{ 57, 75, 156, 0.102, 0.085, 0.190, 0.152 }, // G
256	{ 100, 87, 95, 0.140, 0.047, 0.093, 0.054 }, // H
257	{ 108, 160, 47, 0.043, 0.034, 0.013, 0.056 }, // I
258	{ 121, 130, 59, 0.061, 0.025, 0.036, 0.070 }, // L
259	{ 116, 74, 101, 0.055, 0.115, 0.072, 0.095 }, // K
260	{ 145, 105, 60, 0.068, 0.082, 0.014, 0.055 }, // M
261	{ 113, 138, 60, 0.059, 0.041, 0.065, 0.065 }, // F
262	{ 57, 55, 152, 0.102, 0.301, 0.034, 0.068 }, // P
263	{ 77, 75, 143, 0.120, 0.139, 0.125, 0.106 }, // S
264	{ 83, 119, 96, 0.086, 0.108, 0.065, 0.079 }, // T
265	{ 108, 137, 96, 0.077, 0.013, 0.064, 0.167 }, // W
266	{ 69, 147, 114, 0.082, 0.065, 0.114, 0.125 }, // Y
267	{ 106, 170, 50, 0.062, 0.048, 0.028, 0.053 } }; // V
268
269	// --------------------------------------------------------------------------------
270
271	/*! \brief Symmetric arithmetic rounding of a double value to an integer value.
272	*
273	* \param[in] d Value to be rounded
274	* \return Rounded value
275	*
276	* Rounds a double value to an integer value using symmetric arithmetic rounding,
277	* i.e. a number \f$x.y\f$ is rounded to \f$x\f$ if \f$y < 5\f$ and to \f$x+1\f$
278	* otherwise.
279	*/
280	inline int ED4_pfold_round_sym(double d) {
281	return int(d + .5);
282	}
283
284
285	/*! \brief Initializes static variables.
286	*
287	* So far, this function only concerns #char2AA which gets initialized here.
288	* See #char2AA for details on the values. It is called by
289	* ED4_pfold_predict_structure() and ED4_pfold_calculate_secstruct_match().
290	*
291	* \attention If any other prediction function is used alone before calling one
292	* of the mentioned functions, this function has to be called first.
293	*/
294	static void ED4_pfold_init_statics() {
295	// specify the characters used for amino acid one letter code
296	if (!char2AA) {
297	char2AA = new int [256];
298	for (int i = 0; i < 256; i++) {
299	char2AA[i] = -1;
300	}
301	for (int i = 0; amino_acids[i]; i++) {
302	char2AA[(unsigned char)amino_acids[i]] = i;
303	}
304	}
305	}
306
307
308	/*! \brief Finds nucleation sites that initiate the specified structure.
309	*
310	* \param[in] sequence Amino acid sequence
311	* \param[out] structure Predicted secondary structure
312	* \param[in] length Size of \a sequence and \a structure
313	* \param[in] s Secondary structure type (either #ALPHA_HELIX or #BETA_SHEET)
314	*
315	* This function finds nucleation sites that initiate the specified structure
316	* (alpha-helix or beta-sheet). A window of a fixed size is moved over the
317	* sequence and former and breaker values (as defined by #cf_parameters) for
318	* the amino acids in the window are summed up. If the former values in this
319	* region reach a certain value and the breaker values do not exceed a certain
320	* limit a nucleation site is formed, i.e. the region is assumed to be the
321	* corresponding secondary structure. The result is stored in \a structure.
322	*/
323	static void ED4_pfold_find_nucleation_sites(const unsigned char sequence, char structure, int length, const PFOLD_STRUCTURE s) {
324	#ifdef SHOW_PROGRESS
325	cout << endl << "Searching for nucleation sites:" << endl;
326	#endif
327	e4_assert(s == ALPHA_HELIX \|\| s == BETA_SHEET); // incorrect value for s
328	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
329
330	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
331	int windowSize = (s == ALPHA_HELIX ? 6 : 5); // window size used for finding nucleation sites
332	double sumOfFormVal = 0, sumOfBreakVal = 0; // sum of former resp. breaker values in window
333	int pos; // current position in sequence
334	int count; // number of amino acids found in window
335
336	for (int i = 0; i < ((length + 1) - windowSize); i++) {
337	int aa = 0; // amino acid
338
339	pos = i;
340	for (count = 0; count < windowSize; count++) {
341	// skip gaps
342	while (pos < ((length + 1) - windowSize) &&
343	strchr(gap_chars, sequence[pos + count])) {
344	pos++;
345	}
346	aa = char2AA[sequence[pos + count]];
347	if (aa == -1) break; // unknown character found
348
349	// compute former and breaker values
350	sumOfFormVal += cf_parameters[aa][s];
351	sumOfBreakVal += cf_parameters[aa][s+2];
352	}
353
354	// assign sequence and save start and end of nucleation site for later extension
355	if ((sumOfFormVal > (windowSize - 2)) && (sumOfBreakVal < 2)) {
356	for (int j = i; j < (pos + count); j++) {
357	if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[s];
358	}
359	}
360	if (aa == -1) i = pos + count; // skip unknown character
361	sumOfFormVal = 0, sumOfBreakVal = 0;
362	}
363
364	free(gap_chars);
365	#ifdef SHOW_PROGRESS
366	cout << structure << endl;
367	#endif
368	}
369
370
371	/*! \brief Extends the found nucleation sites in both directions.
372	*
373	* \param[in] sequence Amino acid sequence
374	* \param[out] structure Predicted secondary structure
375	* \param[in] length Size of \a sequence and \a structure
376	* \param[in] s Secondary structure type (either #ALPHA_HELIX or #BETA_SHEET)
377	*
378	* The function extends the nucleation sites found by ED4_pfold_find_nucleation_sites()
379	* in both directions. Extension continues until a certain amino acid constellation
380	* is found. The amino acid 'P' breaks an alpha-helix and 'P' as well as 'E' break
381	* a beta-sheet. Also, two successive breakers or one breaker followed by an
382	* indifferent amino acid (as defined by #structure_breaker and #structure_indifferent)
383	* break the structure. The result is stored in \a structure.
384	*/
385	static void ED4_pfold_extend_nucleation_sites(const unsigned char sequence, char structure, int length, const PFOLD_STRUCTURE s) {
386	#ifdef SHOW_PROGRESS
387	cout << endl << "Extending nucleation sites:" << endl;
388	#endif
389	e4_assert(s == ALPHA_HELIX \|\| s == BETA_SHEET); // incorrect value for s
390	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
391
392	bool break_structure = false; // break the current structure
393	int start = 0, end = 0; // start and end of nucleation site
394	int neighbour = 0; // neighbour of start or end
395
396	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
397
398	// find nucleation sites and extend them in both directions (for whole sequence)
399	for (int indStruct = 0; indStruct < length; indStruct++) {
400
401	// search start and end of nucleated region
402	while (indStruct < length &&
403	((structure[indStruct] == ' ') \|\| strchr(gap_chars, sequence[indStruct]))
404	) indStruct++;
405
406	if (indStruct >= length) break;
407	// get next amino acid that is not included in nucleation site
408	start = indStruct - 1;
409	while (indStruct < length &&
410	(structure[indStruct] != ' ' \|\| strchr(gap_chars, sequence[indStruct]))) {
411	indStruct++;
412	}
413	// get next amino acid that is not included in nucleation site
414	end = indStruct;
415
416	// extend nucleated region in both directions
417	// left side:
418	while (start > 1 && strchr(gap_chars, sequence[start])) {
419	start--; // skip gaps
420	}
421	// break if no amino acid is found
422	if (start >= 0) break_structure = (char2AA[sequence[start]] == -1);
423	while (!break_structure && (start > 1) && (structure[start] == ' ')) {
424	// break if absolute breaker (P or E) is found
425	break_structure = (sequence[start] == 'P');
426	if (s == BETA_SHEET) break_structure \|= (sequence[start] == 'E');
427	if (break_structure) break;
428	// check for breaker at current position
429	break_structure = (strchr(structure_breaker[s], sequence[start]) != 0);
430	neighbour = start - 1; // get neighbour
431	while (neighbour > 0 && strchr(gap_chars, sequence[neighbour])) {
432	neighbour--; // skip gaps
433	}
434	// break if out of bounds or no amino acid is found
435	if (neighbour <= 0 \|\| char2AA[sequence[neighbour]] == -1) {
436	break;
437	}
438	// break if another breaker or indifferent amino acid is found
439	break_structure &=
440	(strchr(structure_breaker[s], sequence[neighbour]) != 0) \|\|
441	(strchr(structure_indifferent[s], sequence[neighbour]) != 0);
442	if (!break_structure) {
443	structure[start] = structure_chars[s];
444	}
445	start = neighbour; // continue with neighbour
446	}
447
448	// right side:
449	while (end < (length - 2) && strchr(gap_chars, sequence[end])) {
450	end++; // skip gaps
451	}
452	// break if no amino acid is found
453	if (end <= (length - 1)) break_structure = (char2AA[sequence[end]] == -1);
454	while (!break_structure && (end < (length - 2))) {
455	// break if absolute breaker (P or E) is found
456	break_structure = (sequence[end] == 'P');
457	if (s == BETA_SHEET) break_structure \|= (sequence[end] == 'E');
458	if (break_structure) break;
459	// check for breaker at current position
460	break_structure = (strchr(structure_breaker[s], sequence[end]) != 0);
461	neighbour = end + 1; // get neighbour
462	while (neighbour < (length - 2) && strchr(gap_chars, sequence[neighbour])) {
463	neighbour++; // skip gaps
464	}
465	// break if out of bounds or no amino acid is found
466	if (neighbour >= (length - 1) \|\| char2AA[sequence[neighbour]] == -1) {
467	end = neighbour;
468	break;
469	}
470	// break if another breaker or indifferent amino acid is found
471	break_structure &=
472	(strchr(structure_breaker[s], sequence[neighbour]) != 0) \|\|
473	(strchr(structure_indifferent[s], sequence[neighbour]) != 0);
474	if (!break_structure) {
475	structure[end] = structure_chars[s];
476	}
477	end = neighbour; // continue with neighbour
478	}
479	indStruct = end; // continue with end
480	}
481
482	free(gap_chars);
483	#ifdef SHOW_PROGRESS
484	cout << structure << endl;
485	#endif
486	}
487
488
489	/*! \brief Predicts beta-turns from the given amino acid sequence
490	*
491	* \param[in] sequence Amino acid sequence
492	* \param[out] structure Predicted secondary structure
493	* \param[in] length Size of \a sequence and \a structure
494	*
495	* A window of a fixed size is moved over the sequence and former values for
496	* alpha-helices, beta-sheets and beta-turns are summed up. In addition,
497	* beta-turn probabilities are multiplied. The values are specified in
498	* #cf_parameters_norm. If the former values for beta-turn are greater than
499	* the ones for alpha-helix and beta-sheet and the turn probabilities
500	* exceed a certain limit the region is assumed to be a beta-turn. The result
501	* is stored in \a structure.
502	*/
503	static void ED4_pfold_find_turns(const unsigned char sequence, char structure, int length) {
504	#ifdef SHOW_PROGRESS
505	cout << endl << "Searching for beta-turns: " << endl;
506	#endif
507	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
508
509	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
510	const int windowSize = 4; // window size
511	double P_a = 0, P_b = 0, P_turn = 0; // former values for helix, sheet and beta-turn
512	double p_t = 1; // probability for beta-turn
513	int pos; // position in sequence
514	int count; // position in window
515	int aa; // amino acid
516
517	for (int i = 0; i < ((length + 1) - windowSize); i++) {
518	pos = i;
519	for (count = 0; count < windowSize; count++) {
520	// skip gaps
521	while (pos < ((length + 1) - windowSize) &&
522	strchr(gap_chars, sequence[pos + count])) {
523	pos++;
524	}
525	aa = char2AA[sequence[pos + count]];
526	if (aa == -1) break; // unknown character found
527
528	// compute former values and turn probability
529	P_a += cf_parameters_norm[aa][0];
530	P_b += cf_parameters_norm[aa][1];
531	P_turn += cf_parameters_norm[aa][2];
532	p_t *= cf_parameters_norm[aa][3 + count];
533	}
534	if (count != 0) {
535	P_a /= count;
536	P_b /= count;
537	P_turn /= count;
538	if ((p_t > 0.000075) && (P_turn > 100) && (P_turn > P_a) && (P_turn > P_b)) {
539	for (int j = i; j < (pos + count); j++) {
540	if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[BETA_TURN];
541	}
542	}
543	}
544	if (aa == -1) i = pos + count; // skip unknown character
545	p_t = 1, P_a = 0, P_b = 0, P_turn = 0;
546	}
547
548	free(gap_chars);
549	#ifdef SHOW_PROGRESS
550	cout << structure << endl;
551	#endif
552	}
553
554
555	/*! \brief Resolves overlaps of predicted secondary structures and creates structure summary.
556	*
557	* \param[in] sequence Amino acid sequence
558	* \param[in,out] structures Predicted secondary structures (#ALPHA_HELIX, #BETA_SHEET,
559	* #BETA_TURN and #STRUCTURE_SUMMARY, in this order)
560	* \param[in] length Size of \a sequence and \a structures[i]
561	*
562	* The function takes the given predicted structures (alpha-helix, beta-sheet
563	* and beta-turn) and searches for overlapping regions. If a beta-turn is found
564	* the structure summary is assumed to be a beta-turn. For overlapping alpha-helices
565	* and beta-sheets the former values are summed up for this region and the
566	* structure summary is assumed to be the structure type with the higher former
567	* value. The result is stored in \a structures[3] (= \a structures[#STRUCTURE_SUMMARY]).
568	*
569	* \attention I couldn't find a standard procedure for resolving overlaps and
570	* there might be other (possibly better) ways to do that.
571	*/
572	static void ED4_pfold_resolve_overlaps(const unsigned char sequence, char structures[4], int length) {
573	#ifdef SHOW_PROGRESS
574	cout << endl << "Resolving overlaps: " << endl;
575	#endif
576	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
577
578	int start = -1; // start of overlap
579	int end = -1; // end of overlap
580	double P_a = 0; // sum of former values for alpha-helix in overlapping regions
581	double P_b = 0; // sum of former values for beta-sheet in overlapping regions
582	PFOLD_STRUCTURE s; // structure with the highest former values
583	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
584
585	// scan structures for overlaps
586	for (int pos = 0; pos < length; pos++) {
587
588	// if beta-turn is found at position pos -> summary is beta-turn
589	if (structures[BETA_TURN][pos] != ' ') {
590	structures[STRUCTURE_SUMMARY][pos] = structure_chars[BETA_TURN];
591
592	// if helix and sheet are overlapping and no beta-turn is found -> check which structure has the highest sum of former values
593	}
594	else if ((structures[ALPHA_HELIX][pos] != ' ') && (structures[BETA_SHEET][pos] != ' ')) {
595
596	// search start and end of overlap (as long as no beta-turn is found)
597	start = pos;
598	end = pos;
599	while (structures[ALPHA_HELIX][end] != ' ' && structures[BETA_SHEET][end] != ' ' &&
600	structures[BETA_TURN][end] == ' ') {
601	end++;
602	}
603
604	// calculate P_a and P_b for overlap
605	for (int i = start; i < end; i++) {
606	// skip gaps
607	while (i < end && strchr(gap_chars, sequence[i])) {
608	i++;
609	}
610	int aa = char2AA[sequence[i]];
611	if (aa != -1) {
612	P_a += cf_parameters[aa][ALPHA_HELIX];
613	P_b += cf_parameters[aa][BETA_SHEET];
614	}
615	}
616
617	// check which structure is more likely and set s appropriately
618	s = (P_a > P_b) ? ALPHA_HELIX : BETA_SHEET;
619
620	// set structure for overlapping region
621	for (int i = start; i < end; i++) {
622	structures[STRUCTURE_SUMMARY][i] = structure_chars[s];
623	}
624
625	// set variables for next pass of loop resp. end of loop
626	P_a = 0, P_b = 0;
627	pos = end - 1;
628
629	// if helix and sheet are not overlapping and no beta-turn is found -> set structure accordingly
630	}
631	else {
632	// summary at position pos is helix resp. sheet
633	if (structures[ALPHA_HELIX][pos] != ' ') {
634	structures[STRUCTURE_SUMMARY][pos] = structure_chars[ALPHA_HELIX];
635	}
636	else if (structures[BETA_SHEET][pos] != ' ') {
637	structures[STRUCTURE_SUMMARY][pos] = structure_chars[BETA_SHEET];
638	}
639	}
640	}
641
642	free(gap_chars);
643	#ifdef SHOW_PROGRESS
644	cout << structures[summary] << endl;
645	#endif
646	}
647
648
649	/*! \brief Predicts protein secondary structures from the amino acid sequence.
650	*
651	* \param[in] sequence Amino acid sequence
652	* \param[out] structures Predicted secondary structures (#ALPHA_HELIX, #BETA_SHEET,
653	* #BETA_TURN and #STRUCTURE_SUMMARY, in this order)
654	* \param[in] length Size of \a sequence and \a structures[i]
655	* \return Error description, if an error occurred; 0 otherwise
656	*
657	* This function predicts the protein secondary structures from the amino acid
658	* sequence according to the Chou-Fasman algorithm. In a first step, nucleation sites
659	* for alpha-helices and beta-sheets are found using ED4_pfold_find_nucleation_sites().
660	* In a next step, the found structures are extended obeying certain rules with
661	* ED4_pfold_extend_nucleation_sites(). Beta-turns are found with the function
662	* ED4_pfold_find_turns(). In a final step, overlapping regions are identified and
663	* resolved to create a structure summary with ED4_pfold_resolve_overlaps().
664	* The results are written to \a structures[i] and can be accessed via the enums
665	* #ALPHA_HELIX, #BETA_SHEET, #BETA_TURN and #STRUCTURE_SUMMARY.
666	*/
667	static GB_ERROR ED4_pfold_predict_structure(const unsigned char sequence, char structures[4], int length) {
668	#ifdef SHOW_PROGRESS
669	cout << endl << "Predicting secondary structure for sequence:" << endl << sequence << endl;
670	#endif
671	GB_ERROR error = 0;
672	e4_assert((int)strlen((const char *)sequence) == length);
673
674	// init memory
675	ED4_pfold_init_statics();
676	e4_assert(char2AA);
677
678	// predict structure
679	ED4_pfold_find_nucleation_sites(sequence, structures[ALPHA_HELIX], length, ALPHA_HELIX);
680	ED4_pfold_find_nucleation_sites(sequence, structures[BETA_SHEET], length, BETA_SHEET);
681	ED4_pfold_extend_nucleation_sites(sequence, structures[ALPHA_HELIX], length, ALPHA_HELIX);
682	ED4_pfold_extend_nucleation_sites(sequence, structures[BETA_SHEET], length, BETA_SHEET);
683	ED4_pfold_find_turns(sequence, structures[BETA_TURN], length);
684	ED4_pfold_resolve_overlaps(sequence, structures, length);
685
686	return error;
687	}
688
689	GB_ERROR ED4_pfold_calculate_secstruct_match(const unsigned char structure_sai, const unsigned char structure_cmp, const int start, const int end, char *result_buffer, PFOLD_MATCH_METHOD match_method) {
690	GB_ERROR error = 0;
691	e4_assert(structure_sai);
692	e4_assert(structure_cmp);
693	e4_assert(start >= 0);
694	e4_assert(result_buffer);
695	e4_assert(match_method >= 0 && match_method < PFOLD_MATCH_METHOD_COUNT);
696	ED4_pfold_init_statics();
697	e4_assert(char2AA);
698
699	e4_assert(end >= start);
700
701	size_t end_minus_start = size_t(end-start); // @@@ use this
702
703	size_t length = strlen((const char *)structure_sai);
704	size_t match_end = std::min(std::min(end_minus_start, length), strlen((const char *)structure_cmp));
705
706	enum { BEND = 3, NOSTRUCT = 4 };
707	char *struct_chars[] = {
708	strdup("HGI"), // helical structures (enum ALPHA_HELIX)
709	strdup("EB"), // sheet-like structures (enum BETA_SHEET)
710	strdup("T"), // beta-turn (enum BETA_TURN)
711	strdup("S"), // bends (enum BEND)
712	strdup("") // no structure (enum NOSTRUCT)
713	};
714
715	// init awars
716	AW_root *awr = ED4_ROOT->aw_root;
717	char *gap_chars = awr->awar(ED4_AWAR_GAP_CHARS)->read_string();
718	char *pairs[PFOLD_MATCH_TYPE_COUNT] = { 0 };
719	char *pair_chars[PFOLD_MATCH_TYPE_COUNT] = { 0 };
720	char *pair_chars_2 = awr->awar(PFOLD_AWAR_SYMBOL_TEMPLATE_2)->read_string();
721	char awar[256];
722
723	for (int i = 0; pfold_match_type_awars[i].name; i++) {
724	sprintf(awar, PFOLD_AWAR_PAIR_TEMPLATE, pfold_match_type_awars[i].name);
725	pairs[i] = awr->awar(awar)->read_string();
726	sprintf(awar, PFOLD_AWAR_SYMBOL_TEMPLATE, pfold_match_type_awars[i].name);
727	pair_chars[i] = awr->awar(awar)->read_string();
728	}
729
730	int struct_start = start;
731	int struct_end = start;
732	size_t count = 0;
733	int current_struct = 4;
734	int aa = -1;
735	double prob = 0;
736
737	// TODO: move this check to callback for the corresponding field?
738	if (strlen(pair_chars_2) != 10) {
739	error = GB_export_error("You have to define 10 match symbols.");
740	}
741
742	if (!error) {
743	switch (match_method) {
744
745	case SECSTRUCT_SECSTRUCT:
746	// TODO: one could try to find out, if structure_cmp is really a secondary structure and not a sequence (define awar for allowed symbols in secondary structure)
747	for (count = 0; count < match_end; count++) {
748	result_buffer[count] = *pair_chars[STRUCT_UNKNOWN];
749	for (int n_pt = 0; n_pt < PFOLD_MATCH_TYPE_COUNT; n_pt++) {
750	int len = strlen(pairs[n_pt])-1;
751	char *p = pairs[n_pt];
752	for (int j = 0; j < len; j += 3) {
753	if ((p[j] == structure_sai[count + start] && p[j+1] == structure_cmp[count + start]) \|\|
754	(p[j] == structure_cmp[count + start] && p[j+1] == structure_sai[count + start])) {
755	result_buffer[count] = *pair_chars[n_pt];
756	n_pt = PFOLD_MATCH_TYPE_COUNT; // stop searching the pair types
757	break; // stop searching the pairs array
758	}
759	}
760	}
761	}
762
763	// fill the remaining buffer with spaces
764	while (count <= end_minus_start) {
765	result_buffer[count] = ' ';
766	count++;
767	}
768	break;
769
770	case SECSTRUCT_SEQUENCE:
771	// clear result buffer
772	for (size_t i = 0; i <= end_minus_start; i++) result_buffer[i] = ' ';
773
774	// skip gaps
775	while (structure_sai[struct_start] != '\0' && structure_cmp[struct_start] != '\0' &&
776	strchr(gap_chars, structure_sai[struct_start]) &&
777	strchr(gap_chars, structure_cmp[struct_start])) {
778	struct_start++;
779	}
780	if (structure_sai[struct_start] == '\0' \|\| structure_cmp[struct_start] == '\0') break;
781
782	// check structure at the first displayed position and find out where it starts
783	for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
784	;
785	}
786	if (current_struct != BEND && current_struct != NOSTRUCT) {
787	struct_start--; // check structure left of start
788	while (struct_start >= 0) {
789	// skip gaps
790	while (struct_start > 0 &&
791	strchr(gap_chars, structure_sai[struct_start]) &&
792	strchr(gap_chars, structure_cmp[struct_start])) {
793	struct_start--;
794	}
795	aa = char2AA[structure_cmp[struct_start]];
796	if (struct_start == 0 && aa == -1) { // nothing was found
797	break;
798	}
799	else if (strchr(struct_chars[current_struct], structure_sai[struct_start]) && aa != -1) {
800	prob += cf_former(aa, current_struct) - cf_breaker(aa, current_struct); // sum up probabilities
801	struct_start--;
802	count++;
803	}
804	else {
805	break;
806	}
807	}
808	}
809
810	// parse structures
811	struct_start = start;
812	// skip gaps
813	while (structure_sai[struct_start] != '\0' && structure_cmp[struct_start] != '\0' &&
814	strchr(gap_chars, structure_sai[struct_start]) &&
815	strchr(gap_chars, structure_cmp[struct_start])) {
816	struct_start++;
817	}
818	if (structure_sai[struct_start] == '\0' \|\| structure_cmp[struct_start] == '\0') break;
819	struct_end = struct_start;
820	while (struct_end < end) {
821	aa = char2AA[structure_cmp[struct_end]];
822	if (current_struct == NOSTRUCT) { // no structure found -> move on
823	struct_end++;
824	}
825	else if (aa == -1) { // structure found but no corresponding amino acid -> doesn't fit at all
826	result_buffer[struct_end - start] = pair_chars_2[0];
827	struct_end++;
828	}
829	else if (current_struct == BEND) { // bend found -> fits perfectly everywhere
830	result_buffer[struct_end - start] = pair_chars_2[9];
831	struct_end++;
832	}
833	else { // helix, sheet or beta-turn found -> while structure doesn't change: sum up probabilities
834	while (structure_sai[struct_end] != '\0') {
835	// skip gaps
836	while (strchr(gap_chars, structure_sai[struct_end]) &&
837	strchr(gap_chars, structure_cmp[struct_end]) &&
838	structure_sai[struct_end] != '\0' && structure_cmp[struct_end] != '\0') {
839	struct_end++;
840	}
841	aa = char2AA[structure_cmp[struct_end]];
842	if (structure_sai[struct_end] != '\0' && structure_cmp[struct_end] != '\0' &&
843	strchr(struct_chars[current_struct], structure_sai[struct_end]) && aa != -1) {
844	prob += cf_former(aa, current_struct) - cf_breaker(aa, current_struct); // sum up probabilities
845	struct_end++;
846	count++;
847	}
848	else {
849	break;
850	}
851	}
852
853	if (count != 0) {
854	// compute average and normalize probability
855	prob /= count;
856	prob = (prob + max_breaker_value[current_struct] - min_former_value[current_struct]) / (max_breaker_value[current_struct] + max_former_value[current_struct] - min_former_value[current_struct]);
857
858	#if 0 // code w/o effect
859	// map to match characters and store in result_buffer
860	int prob_normalized = ED4_pfold_round_sym(prob * 9);
861	// e4_assert(prob_normalized >= 0 && prob_normalized <= 9); // if this happens check if normalization is correct or some undefined characters mess everything up
862	char prob_symbol = *pair_chars[STRUCT_UNKNOWN];
863	if (prob_normalized >= 0 && prob_normalized <= 9) {
864	prob_symbol = pair_chars_2[prob_normalized];
865	}
866	#endif
867	}
868	}
869
870	// find next structure type
871	if (structure_sai[struct_end] == '\0' \|\| structure_cmp[struct_end] == '\0') {
872	break;
873	}
874	else {
875	prob = 0;
876	count = 0;
877	struct_start = struct_end;
878	for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
879	;
880	}
881	}
882	}
883	break;
884
885	case SECSTRUCT_SEQUENCE_PREDICT:
886	// predict structures from structure_cmp and compare with structure_sai
887	char *structures[4];
888	for (int i = 0; i < 4 && !error; i++) {
889	structures[i] = new char [length + 1];
890	if (!structures[i]) {
891	error = "Out of memory";
892	}
893	else {
894	for (size_t j = 0; j < length; j++) {
895	structures[i][j] = ' ';
896	}
897	structures[i][length] = '\0';
898	}
899	}
900	if (!error) error = ED4_pfold_predict_structure(structure_cmp, structures, length);
901	if (!error) {
902	for (count = 0; count < match_end; count++) {
903	result_buffer[count] = *pair_chars[STRUCT_UNKNOWN];
904	if (!strchr(gap_chars, structure_sai[count + start]) && strchr(gap_chars, structure_cmp[count + start])) {
905	result_buffer[count] = *pair_chars[STRUCT_NO_MATCH];
906	} else if (strchr(gap_chars, structure_sai[count + start]) \|\|
907	(structures[ALPHA_HELIX][count + start] == ' ' && structures[BETA_SHEET][count + start] == ' ' && structures[BETA_TURN][count + start] == ' ')) {
908	result_buffer[count] = *pair_chars[STRUCT_PERFECT_MATCH];
909	}
910	else {
911	// search for good match first
912	// if found: stop searching
913	// otherwise: continue searching for a less good match
914	for (int n_pt = 0; n_pt < PFOLD_MATCH_TYPE_COUNT; n_pt++) {
915	int len = strlen(pairs[n_pt])-1;
916	char *p = pairs[n_pt];
917	for (int n_struct = 0; n_struct < 3; n_struct++) {
918	for (int j = 0; j < len; j += 3) {
919	if ((p[j] == structures[n_struct][count + start] && p[j+1] == structure_sai[count + start]) \|\|
920	(p[j] == structure_sai[count + start] && p[j+1] == structures[n_struct][count + start])) {
921	result_buffer[count] = *pair_chars[n_pt];
922	n_struct = 3; // stop searching the structures
923	n_pt = PFOLD_MATCH_TYPE_COUNT; // stop searching the pair types
924	break; // stop searching the pairs array
925	}
926	}
927	}
928	}
929	}
930	}
931	// fill the remaining buffer with spaces
932	while (count <= end_minus_start) {
933	result_buffer[count] = ' ';
934	count++;
935	}
936	}
937	// free buffer
938	for (int i = 0; i < 4; i++) {
939	if (structures[i]) {
940	delete structures[i];
941	structures[i] = 0;
942	}
943	}
944	break;
945
946	default:
947	e4_assert(0); // function called with invalid argument for 'match_method'
948	break;
949	}
950	}
951
952	free(gap_chars);
953	free(pair_chars_2);
954	for (int i = 0; pfold_match_type_awars[i].name; i++) {
955	free(pairs[i]);
956	free(pair_chars[i]);
957	}
958	if (error) for (size_t i = 0; i <= end_minus_start; i++) result_buffer[i] = ' '; // clear result buffer
959	return error;
960	}
961
962
963	GB_ERROR ED4_pfold_set_SAI(char *protstruct, GBDATA gb_main, const char alignment_name, long protstruct_len) {
964	GB_ERROR error = 0;
965	GB_transaction ta(gb_main);
966	AW_root *aw_root = ED4_ROOT->aw_root;
967	char *SAI_name = aw_root->awar(PFOLD_AWAR_SELECTED_SAI)->read_string();
968	GBDATA *gb_protstruct = GBT_find_SAI(gb_main, SAI_name);
969
970	freenull(*protstruct);
971
972	if (gb_protstruct) {
973	GBDATA *gb_data = GBT_find_sequence(gb_protstruct, alignment_name);
974	if (gb_data) *protstruct = GB_read_string(gb_data);
975	}
976
977	if (*protstruct) {
978	if (protstruct_len) protstruct_len = (long)strlen(protstruct);
979	}
980	else {
981	if (protstruct_len) protstruct_len = 0;
982	if (aw_root->awar(PFOLD_AWAR_ENABLE)->read_int()) {
983	error = GBS_global_string("SAI \"%s\" does not exist.\nDisabled protein structure display!", SAI_name);
984	aw_root->awar(PFOLD_AWAR_ENABLE)->write_int(0);
985	}
986	}
987
988	free(SAI_name);
989	return error;
990	}
991
992	/*! \brief Callback function to select the reference protein structure SAI and to
993	* update the SAI option menu.
994	*
995	* \param[in] aww The calling window
996	* \param[in,out] oms The SAI option menu
997	* \param[in] set_sai Specifies if SAI should be updated
998	*
999	* The function is called whenever the selected SAI or the SAI filter is changed
1000	* in the "Protein Match Settings" dialog (see ED4_pfold_create_props_window()).
1001	* It can be called with \a set_sai defined to update the reference protein secondary
1002	* structure SAI in the editor via ED4_pfold_set_SAI() and to update the selection in
1003	* the SAI option menu. If \a set_sai is 0 only the option menu is updated. This is
1004	* necessary if only the SAI filter changed but not the selected SAI.
1005	*/
1006
1007	static void ED4_pfold_select_SAI_and_update_option_menu(AW_window *aww, AW_CL oms, AW_CL set_sai) {
1008	e4_assert(aww);
1009	AW_option_menu_struct _oms = ((AW_option_menu_struct)oms);
1010	e4_assert(_oms);
1011	char *selected_sai = ED4_ROOT->aw_root->awar(PFOLD_AWAR_SELECTED_SAI)->read_string();
1012	char *sai_filter = ED4_ROOT->aw_root->awar(PFOLD_AWAR_SAI_FILTER)->read_string();
1013
1014	if (set_sai) {
1015	const char *err = ED4_pfold_set_SAI(&ED4_ROOT->protstruct, GLOBAL_gb_main, ED4_ROOT->alignment_name, &ED4_ROOT->protstruct_len);
1016	if (err) aw_message(err);
1017	}
1018
1019	aww->clear_option_menu(_oms);
1020	aww->insert_default_option(selected_sai, "", selected_sai);
1021	GB_transaction ta(GLOBAL_gb_main);
1022
1023	for (GBDATA *sai = GBT_first_SAI(GLOBAL_gb_main);
1024	sai;
1025	sai = GBT_next_SAI(sai))
1026	{
1027	const char *sai_name = GBT_read_name(sai);
1028	if (strcmp(sai_name, selected_sai) != 0 && strstr(sai_name, sai_filter) != 0) {
1029	aww->callback(ED4_pfold_select_SAI_and_update_option_menu, (AW_CL)_oms, true);
1030	aww->insert_option(sai_name, "", sai_name);
1031	}
1032	}
1033
1034	free(selected_sai);
1035	free(sai_filter);
1036	aww->update_option_menu();
1037	// ED4_expose_all_windows();
1038	// @@@ need update here ?
1039	}
1040
1041
1042	AW_window ED4_pfold_create_props_window(AW_root awr, void (cb)(AW_window)) {
1043	AW_window_simple *aws = new AW_window_simple;
1044	aws->init(awr, "PFOLD_PROPS", "PROTEIN_MATCH_SETTINGS");
1045
1046	// create close button
1047	aws->at(10, 10);
1048	aws->auto_space(5, 2);
1049	aws->callback(AW_POPDOWN);
1050	aws->create_button("CLOSE", "CLOSE", "C");
1051
1052	// create help button
1053	aws->callback(makeHelpCallback("pfold_props.hlp"));
1054	aws->create_button("HELP", "HELP");
1055	aws->at_newline();
1056
1057	aws->label_length(27);
1058	int ex = 0, ey = 0;
1059	char awar[256];
1060
1061	// create toggle field for showing the protein structure match
1062	aws->label("Show protein structure match?");
1063	aws->callback(makeWindowCallback(cb));
1064	aws->create_toggle(PFOLD_AWAR_ENABLE);
1065	aws->at_newline();
1066
1067	// create SAI option menu
1068	aws->label_length(30);
1069	aws->label("Selected Protein Structure SAI");
1070	AW_option_menu_struct *oms_sai = aws->create_option_menu(PFOLD_AWAR_SELECTED_SAI, true);
1071	ED4_pfold_select_SAI_and_update_option_menu(aws, (AW_CL)oms_sai, 0);
1072	aws->at_newline();
1073	aws->label("-> Filter SAI names for");
1074	aws->callback(ED4_pfold_select_SAI_and_update_option_menu, (AW_CL)oms_sai, 0);
1075	aws->create_input_field(PFOLD_AWAR_SAI_FILTER, 10);
1076	aws->at_newline();
1077
1078	// create match method option menu
1079	PFOLD_MATCH_METHOD match_method = (PFOLD_MATCH_METHOD) ED4_ROOT->aw_root->awar(PFOLD_AWAR_MATCH_METHOD)->read_int();
1080	aws->label_length(12);
1081	aws->label("Match Method");
1082	aws->create_option_menu(PFOLD_AWAR_MATCH_METHOD, true);
1083	for (int i = 0; const char *mm_aw = pfold_match_method_awars[i].name; i++) {
1084	aws->callback(makeWindowCallback(cb));
1085	if (match_method == pfold_match_method_awars[i].value) {
1086	aws->insert_default_option(mm_aw, "", match_method);
1087	}
1088	else {
1089	aws->insert_option(mm_aw, "", pfold_match_method_awars[i].value);
1090	}
1091	}
1092	aws->update_option_menu();
1093	aws->at_newline();
1094
1095	// create match symbols and/or match types input fields
1096	// TODO: show only fields that are relevant for current match method -> bind to callback function?
1097	aws->label_length(40);
1098	aws->label("Match Symbols (Range 0-100% in steps of 10%)");
1099	aws->callback(makeWindowCallback(cb));
1100	aws->create_input_field(PFOLD_AWAR_SYMBOL_TEMPLATE_2, 10);
1101	aws->at_newline();
1102	for (int i = 0; pfold_match_type_awars[i].name; i++) {
1103	aws->label_length(12);
1104	sprintf(awar, PFOLD_AWAR_PAIR_TEMPLATE, pfold_match_type_awars[i].name);
1105	aws->label(pfold_match_type_awars[i].name);
1106	aws->callback(makeWindowCallback(cb));
1107	aws->create_input_field(awar, 30);
1108	// TODO: is it possible to disable input field for STRUCT_UNKNOWN?
1109	// if (pfold_match_type_awars[i].value == STRUCT_UNKNOWN)
1110	if (!i) aws->get_at_position(&ex, &ey);
1111	sprintf(awar, PFOLD_AWAR_SYMBOL_TEMPLATE, pfold_match_type_awars[i].name);
1112	aws->callback(makeWindowCallback(cb));
1113	aws->create_input_field(awar, 3);
1114	aws->at_newline();
1115	}
1116
1117	aws->window_fit();
1118	return (AW_window *)aws;
1119	}
1120

Note: See TracBrowser for help on using the repository browser.

Download in other formats: