Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

ED4_protein_2nd_structure.cxx

Visit:

Last change on this file was 19206, checked in by westram, 3 years ago
reintegrates 'ali' into 'trunk' adds: log:branches/ali@19183:19205
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 52.4 KB

Line
1	// =============================================================== //
2	// //
3	// File : ED4_protein_2nd_structure.cxx //
4	// Purpose : //
5	// //
6	// Institute of Microbiology (Technical University Munich) //
7	// http://www.arb-home.de/ //
8	// //
9	// =============================================================== //
10
11
12	/*! \file ED4_protein_2nd_structure.cxx
13	* \brief Implements the functions defined in ed4_protein_2nd_structure.hxx.
14	* \author Markus Urban
15	* \date 2008-02-08
16	* \sa Refer to ed4_protein_2nd_structure.hxx for details, please.
17	*/
18
19	#include "ed4_protein_2nd_structure.hxx"
20	#include "ed4_class.hxx"
21	#include "ed4_awars.hxx"
22
23	#include <aw_awar.hxx>
24	#include <aw_msg.hxx>
25	#include <aw_root.hxx>
26	#include "arbdbt.h"
27
28	#include <iostream>
29	#include <awt_config_manager.hxx>
30
31	#define e4_assert(bed) arb_assert(bed)
32
33	using namespace std;
34
35	// --------------------------------------------------------------------------------
36	// exported data
37
38	//! Awars for the match type; binds the #PFOLD_MATCH_TYPE to the corresponding awar name.
39	name_value_pair pfold_match_type_awars[] = {
40	{ "Perfect_match", STRUCT_PERFECT_MATCH },
41	{ "Good_match", STRUCT_GOOD_MATCH },
42	{ "Medium_match", STRUCT_MEDIUM_MATCH },
43	{ "Bad_match", STRUCT_BAD_MATCH },
44	{ "No_match", STRUCT_NO_MATCH },
45	{ "Unknown_match", STRUCT_UNKNOWN },
46	{ NULp, PFOLD_MATCH_TYPE_COUNT }
47	};
48
49	//! Symbols for the match quality (defined by #PFOLD_MATCH_TYPE) as used for match methods #SECSTRUCT_SECSTRUCT and #SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match().
50	char *pfold_pair_chars[PFOLD_PAIRS] = {
51	ARB_strdup(" "), // STRUCT_PERFECT_MATCH
52	ARB_strdup("-"), // STRUCT_GOOD_MATCH
53	ARB_strdup("~"), // STRUCT_MEDIUM_MATCH
54	ARB_strdup("+"), // STRUCT_BAD_MATCH
55	ARB_strdup("#"), // STRUCT_NO_MATCH
56	ARB_strdup("?") // STRUCT_UNKNOWN
57	};
58
59	//! Match pair definition (see #PFOLD_MATCH_TYPE) as used for match methods #SECSTRUCT_SECSTRUCT and #SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match().
60	char *pfold_pairs[PFOLD_PAIRS] = {
61	ARB_strdup("HH GG II TT EE BB SS -- -. .."), // STRUCT_PERFECT_MATCH
62	ARB_strdup("HG HI HS EB ES TS H- G- I- T- E- B- S-"), // STRUCT_GOOD_MATCH
63	ARB_strdup("HT GT IT"), // STRUCT_MEDIUM_MATCH
64	ARB_strdup("ET BT"), // STRUCT_BAD_MATCH
65	ARB_strdup("EH BH EG EI"), // STRUCT_NO_MATCH
66	ARB_strdup("") // STRUCT_UNKNOWN
67	};
68
69	static struct pfold_mem_handler {
70	~pfold_mem_handler() {
71	for (int i = 0; i<PFOLD_PAIRS; ++i) {
72	freenull(pfold_pairs[i]);
73	freenull(pfold_pair_chars[i]);
74	}
75	}
76	} pfold_dealloc;
77
78	// --------------------------------------------------------------------------------
79
80	/*! \brief Specifies the characters used for amino acid one letter code.
81	*
82	* These are the characters that represent amino acids in one letter code.
83	* The order is important as the array initializes #char2AA which is used to
84	* access array elements in the tables #cf_parameters and #cf_parameters_norm.
85	*/
86	static const char *amino_acids = "ARDNCEQGHILKMFPSTWYV";
87
88	/*! \brief Maps character to amino acid one letter code.
89	*
90	* This array maps a character to an integer value. It is initialized with the
91	* function ED4_pfold_init_statics() which creates an array of the size 256
92	* (for ISO/IEC 8859-1 character encoding). Characters that represent an amino
93	* acid get values from 0 to 19 (according to their position in #amino_acids)
94	* and all others get the value -1. That way, it can be used to get parameters
95	* from the tables #cf_parameters and #cf_parameters_norm or to check if a
96	* certain character represents an amino acid.
97	*/
98	static int *char2AA = NULp;
99
100	/*! \brief Characters representing protein secondary structure.
101	*
102	* Defines the characters representing secondary structure as output by the
103	* function ED4_pfold_predict_structure(). According to common standards,
104	* these are: <BR>
105	* H = alpha-helix, <BR>
106	* E = beta-sheet, <BR>
107	* T = beta-turn.
108	*/
109	static char structure_chars[3] = { 'H', 'E', 'T' };
110
111	//! Amino acids that break a certain structure (#ALPHA_HELIX or #BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites().
112	static const char *structure_breaker[2] = {
113	"NYPG",
114	"PDESGK"
115	};
116
117	//! Amino acids that are indifferent for a certain structure (#ALPHA_HELIX or #BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites().
118	static const char *structure_indifferent[2] = {
119	"RTSC",
120	"RNHA"
121	};
122
123	//! Awars for the match method; binds the #PFOLD_MATCH_METHOD to the corresponding name that is used to create the menu in ED4_pfold_create_props_window().
124	static name_value_pair pfold_match_method_awars[4] = {
125	{ "Secondary Structure <-> Secondary Structure", SECSTRUCT_SECSTRUCT },
126	{ "Secondary Structure <-> Sequence", SECSTRUCT_SEQUENCE },
127	{ "Secondary Structure <-> Sequence (Full Prediction)", SECSTRUCT_SEQUENCE_PREDICT },
128	{ NULp, PFOLD_MATCH_METHOD_COUNT }
129	};
130
131	static double max_former_value[3] = { 1.42, 1.62, 156 }; //!< Maximum former value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (in #cf_parameters_norm).
132	static double min_former_value[3] = { 0.0, 0.0, 47 }; //!< Minimum former value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (in #cf_parameters_norm).
133	static double max_breaker_value[3] = { 1.21, 2.03, 0.0 }; //!< Maximum breaker value for alpha-helix, beta-sheet (in #cf_parameters) and beta-turn (no breaker values => 0).
134
135	// --------------------------------------------------------------------------------
136
137	// TODO: is there a way to prevent doxygen from stripping the comments from the table?
138	// I simply added the parameter table as verbatim environment to show the comments in
139	// the documentation.
140	/*! \brief Former and breaker values for alpha-helices and beta-sheets (= strands).
141	*
142	* \hideinitializer
143	* \par Initial value:
144	* \verbatim
145	{
146	// Helix Former Strand Former Helix Breaker Strand Breaker Amino
147	// Value Value Value Value Acid
148	// -----------------------------------------------------------------------
149	{ 1.34, 0.00, 0.00, 0.00 }, // A
150	{ 0.00, 0.00, 0.00, 0.00 }, // R
151	{ 0.50, 0.00, 0.00, 1.39 }, // D
152	{ 0.00, 0.00, 1.03, 0.00 }, // N
153	{ 0.00, 1.13, 0.00, 0.00 }, // C
154	{ 1.42, 0.00, 0.00, 2.03 }, // E
155	{ 1.05, 1.05, 0.00, 0.00 }, // Q
156	{ 0.00, 0.00, 1.21, 1.00 }, // G
157	{ 0.50, 0.00, 0.00, 0.00 }, // H
158	{ 1.02, 1.52, 0.00, 0.00 }, // I
159	{ 1.14, 1.24, 0.00, 0.00 }, // L
160	{ 1.09, 0.00, 0.00, 1.01 }, // K
161	{ 1.37, 1.00, 0.00, 0.00 }, // M
162	{ 1.07, 1.31, 0.00, 0.00 }, // F
163	{ 0.00, 0.00, 1.21, 1.36 }, // P
164	{ 0.00, 0.00, 0.00, 1.00 }, // S
165	{ 0.00, 1.13, 0.00, 0.00 }, // T
166	{ 1.02, 1.30, 0.00, 0.00 }, // W
167	{ 0.00, 1.40, 1.00, 0.00 }, // Y
168	{ 1.00, 1.62, 0.00, 0.00 }}; // V
169	\endverbatim
170	*
171	* The former and breaker values are used to find alpha-helix and beta-sheet
172	* nucleation sites in ED4_pfold_find_nucleation_sites() and to resolve overlaps
173	* in ED4_pfold_resolve_overlaps(). Addressing the array with the enums
174	* #ALPHA_HELIX or #BETA_SHEET as second index gives the former values and
175	* addressing it with #ALPHA_HELIX+2 or #BETA_SHEET+2 gives the breaker values.
176	* The first index is for the amino acid. Use #char2AA to convert an amino acid
177	* character to the corresponding index.
178	*
179	* \sa Refer to the definition in the source code for commented table.
180	*/
181	static double cf_parameters[20][4] = {
182	/* Helix Former Strand Former Helix Breaker Strand Breaker Amino
183	Value Value Value Value Acid
184	----------------------------------------------------------------------- */
185	{ 1.34, 0.00, 0.00, 0.00 }, // A
186	{ 0.00, 0.00, 0.00, 0.00 }, // R
187	{ 0.50, 0.00, 0.00, 1.39 }, // D
188	{ 0.00, 0.00, 1.03, 0.00 }, // N
189	{ 0.00, 1.13, 0.00, 0.00 }, // C
190	{ 1.42, 0.00, 0.00, 2.03 }, // E
191	{ 1.05, 1.05, 0.00, 0.00 }, // Q
192	{ 0.00, 0.00, 1.21, 1.00 }, // G
193	{ 0.50, 0.00, 0.00, 0.00 }, // H
194	{ 1.02, 1.52, 0.00, 0.00 }, // I
195	{ 1.14, 1.24, 0.00, 0.00 }, // L
196	{ 1.09, 0.00, 0.00, 1.01 }, // K
197	{ 1.37, 1.00, 0.00, 0.00 }, // M
198	{ 1.07, 1.31, 0.00, 0.00 }, // F
199	{ 0.00, 0.00, 1.21, 1.36 }, // P
200	{ 0.00, 0.00, 0.00, 1.00 }, // S
201	{ 0.00, 1.13, 0.00, 0.00 }, // T
202	{ 1.02, 1.30, 0.00, 0.00 }, // W
203	{ 0.00, 1.40, 1.00, 0.00 }, // Y
204	{ 1.00, 1.62, 0.00, 0.00 } }; // V
205
206	/*! \brief Normalized former values for alpha-helices, beta-sheets (= strands)
207	* and beta-turns as well as beta-turn probabilities.
208	*
209	* \hideinitializer
210	* \par Initial value:
211	* \verbatim
212	{
213	// P(a) P(b) P(turn) f(i) f(i+1) f(i+2) f(i+3) Amino Acid
214	// --------------------------------------------------------------------
215	{ 142, 83, 66, 0.060, 0.076, 0.035, 0.058 }, // A
216	{ 98, 93, 95, 0.070, 0.106, 0.099, 0.085 }, // R
217	{ 101, 54, 146, 0.147, 0.110, 0.179, 0.081 }, // D
218	{ 67, 89, 156, 0.161, 0.083, 0.191, 0.091 }, // N
219	{ 70, 119, 119, 0.149, 0.050, 0.117, 0.128 }, // C
220	{ 151, 37, 74, 0.056, 0.060, 0.077, 0.064 }, // E
221	{ 111, 110, 98, 0.074, 0.098, 0.037, 0.098 }, // Q
222	{ 57, 75, 156, 0.102, 0.085, 0.190, 0.152 }, // G
223	{ 100, 87, 95, 0.140, 0.047, 0.093, 0.054 }, // H
224	{ 108, 160, 47, 0.043, 0.034, 0.013, 0.056 }, // I
225	{ 121, 130, 59, 0.061, 0.025, 0.036, 0.070 }, // L
226	{ 116, 74, 101, 0.055, 0.115, 0.072, 0.095 }, // K
227	{ 145, 105, 60, 0.068, 0.082, 0.014, 0.055 }, // M
228	{ 113, 138, 60, 0.059, 0.041, 0.065, 0.065 }, // F
229	{ 57, 55, 152, 0.102, 0.301, 0.034, 0.068 }, // P
230	{ 77, 75, 143, 0.120, 0.139, 0.125, 0.106 }, // S
231	{ 83, 119, 96, 0.086, 0.108, 0.065, 0.079 }, // T
232	{ 108, 137, 96, 0.077, 0.013, 0.064, 0.167 }, // W
233	{ 69, 147, 114, 0.082, 0.065, 0.114, 0.125 }, // Y
234	{ 106, 170, 50, 0.062, 0.048, 0.028, 0.053 }}; // V
235	\endverbatim
236	*
237	* The normalized former values are used to find beta-turns in an amino acid
238	* sequence in ED4_pfold_find_turns(). Addressing the array with the enums
239	* #ALPHA_HELIX, #BETA_SHEET or #BETA_TURN as second index gives the former
240	* values and addressing it with #BETA_TURN+i \f$(1 <= i <= 4)\f$ gives the
241	* turn probabilities. The first index is for the amino acid. Use #char2AA to
242	* convert an amino acid character to the corresponding index.
243	*
244	* \sa Refer to the definition in the source code for commented table.
245	*/
246	static double cf_parameters_norm[20][7] = {
247	/* P(a) P(b) P(turn) f(i) f(i+1) f(i+2) f(i+3) Amino Acid
248	-------------------------------------------------------------------- */
249	{ 142, 83, 66, 0.060, 0.076, 0.035, 0.058 }, // A
250	{ 98, 93, 95, 0.070, 0.106, 0.099, 0.085 }, // R
251	{ 101, 54, 146, 0.147, 0.110, 0.179, 0.081 }, // D
252	{ 67, 89, 156, 0.161, 0.083, 0.191, 0.091 }, // N
253	{ 70, 119, 119, 0.149, 0.050, 0.117, 0.128 }, // C
254	{ 151, 37, 74, 0.056, 0.060, 0.077, 0.064 }, // E
255	{ 111, 110, 98, 0.074, 0.098, 0.037, 0.098 }, // Q
256	{ 57, 75, 156, 0.102, 0.085, 0.190, 0.152 }, // G
257	{ 100, 87, 95, 0.140, 0.047, 0.093, 0.054 }, // H
258	{ 108, 160, 47, 0.043, 0.034, 0.013, 0.056 }, // I
259	{ 121, 130, 59, 0.061, 0.025, 0.036, 0.070 }, // L
260	{ 116, 74, 101, 0.055, 0.115, 0.072, 0.095 }, // K
261	{ 145, 105, 60, 0.068, 0.082, 0.014, 0.055 }, // M
262	{ 113, 138, 60, 0.059, 0.041, 0.065, 0.065 }, // F
263	{ 57, 55, 152, 0.102, 0.301, 0.034, 0.068 }, // P
264	{ 77, 75, 143, 0.120, 0.139, 0.125, 0.106 }, // S
265	{ 83, 119, 96, 0.086, 0.108, 0.065, 0.079 }, // T
266	{ 108, 137, 96, 0.077, 0.013, 0.064, 0.167 }, // W
267	{ 69, 147, 114, 0.082, 0.065, 0.114, 0.125 }, // Y
268	{ 106, 170, 50, 0.062, 0.048, 0.028, 0.053 } }; // V
269
270	// --------------------------------------------------------------------------------
271
272	/*! \brief Symmetric arithmetic rounding of a double value to an integer value.
273	*
274	* \param[in] d Value to be rounded
275	* \return Rounded value
276	*
277	* Rounds a double value to an integer value using symmetric arithmetic rounding,
278	* i.e. a number \f$x.y\f$ is rounded to \f$x\f$ if \f$y < 5\f$ and to \f$x+1\f$
279	* otherwise.
280	*/
281	inline int ED4_pfold_round_sym(double d) {
282	return int(d + .5);
283	}
284
285
286	/*! \brief Initializes static variables.
287	*
288	* So far, this function only concerns #char2AA which gets initialized here.
289	* See #char2AA for details on the values. It is called by
290	* ED4_pfold_predict_structure() and ED4_pfold_calculate_secstruct_match().
291	*
292	* \attention If any other prediction function is used alone before calling one
293	* of the mentioned functions, this function has to be called first.
294	*/
295	static void ED4_pfold_init_statics() {
296	// specify the characters used for amino acid one letter code
297	if (!char2AA) {
298	char2AA = new int [256];
299	for (int i = 0; i < 256; i++) {
300	char2AA[i] = -1;
301	}
302	for (int i = 0; amino_acids[i]; i++) {
303	char2AA[(unsigned char)amino_acids[i]] = i;
304	}
305	}
306	}
307
308
309	/*! \brief Finds nucleation sites that initiate the specified structure.
310	*
311	* \param[in] sequence Amino acid sequence
312	* \param[out] structure Predicted secondary structure
313	* \param[in] length Size of \a sequence and \a structure
314	* \param[in] s Secondary structure type (either #ALPHA_HELIX or #BETA_SHEET)
315	*
316	* This function finds nucleation sites that initiate the specified structure
317	* (alpha-helix or beta-sheet). A window of a fixed size is moved over the
318	* sequence and former and breaker values (as defined by #cf_parameters) for
319	* the amino acids in the window are summed up. If the former values in this
320	* region reach a certain value and the breaker values do not exceed a certain
321	* limit a nucleation site is formed, i.e. the region is assumed to be the
322	* corresponding secondary structure. The result is stored in \a structure.
323	*/
324	static void ED4_pfold_find_nucleation_sites(const unsigned char sequence, char structure, int length, const PFOLD_STRUCTURE s) {
325	#ifdef SHOW_PROGRESS
326	cout << endl << "Searching for nucleation sites:" << endl;
327	#endif
328	e4_assert(s == ALPHA_HELIX \|\| s == BETA_SHEET); // incorrect value for s
329	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
330
331	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
332	int windowSize = (s == ALPHA_HELIX ? 6 : 5); // window size used for finding nucleation sites
333	double sumOfFormVal = 0, sumOfBreakVal = 0; // sum of former resp. breaker values in window
334	int pos; // current position in sequence
335	int count; // number of amino acids found in window
336
337	for (int i = 0; i < ((length + 1) - windowSize); i++) {
338	int aa = 0; // amino acid
339
340	pos = i;
341	for (count = 0; count < windowSize; count++) {
342	// skip gaps
343	while (pos < ((length + 1) - windowSize) &&
344	strchr(gap_chars, sequence[pos + count])) {
345	pos++;
346	}
347	aa = char2AA[sequence[pos + count]];
348	if (aa == -1) break; // unknown character found
349
350	// compute former and breaker values
351	sumOfFormVal += cf_parameters[aa][s];
352	sumOfBreakVal += cf_parameters[aa][s+2];
353	}
354
355	// assign sequence and save start and end of nucleation site for later extension
356	if ((sumOfFormVal > (windowSize - 2)) && (sumOfBreakVal < 2)) {
357	for (int j = i; j < (pos + count); j++) {
358	if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[s];
359	}
360	}
361	if (aa == -1) i = pos + count; // skip unknown character
362	sumOfFormVal = 0, sumOfBreakVal = 0;
363	}
364
365	free(gap_chars);
366	#ifdef SHOW_PROGRESS
367	cout << structure << endl;
368	#endif
369	}
370
371
372	/*! \brief Extends the found nucleation sites in both directions.
373	*
374	* \param[in] sequence Amino acid sequence
375	* \param[out] structure Predicted secondary structure
376	* \param[in] length Size of \a sequence and \a structure
377	* \param[in] s Secondary structure type (either #ALPHA_HELIX or #BETA_SHEET)
378	*
379	* The function extends the nucleation sites found by ED4_pfold_find_nucleation_sites()
380	* in both directions. Extension continues until a certain amino acid constellation
381	* is found. The amino acid 'P' breaks an alpha-helix and 'P' as well as 'E' break
382	* a beta-sheet. Also, two successive breakers or one breaker followed by an
383	* indifferent amino acid (as defined by #structure_breaker and #structure_indifferent)
384	* break the structure. The result is stored in \a structure.
385	*/
386	static void ED4_pfold_extend_nucleation_sites(const unsigned char sequence, char structure, int length, const PFOLD_STRUCTURE s) {
387	#ifdef SHOW_PROGRESS
388	cout << endl << "Extending nucleation sites:" << endl;
389	#endif
390	e4_assert(s == ALPHA_HELIX \|\| s == BETA_SHEET); // incorrect value for s
391	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
392
393	bool break_structure = false; // break the current structure
394	int start = 0, end = 0; // start and end of nucleation site
395	int neighbour = 0; // neighbour of start or end
396
397	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
398
399	// find nucleation sites and extend them in both directions (for whole sequence)
400	for (int indStruct = 0; indStruct < length; indStruct++) {
401
402	// search start and end of nucleated region
403	while (indStruct < length &&
404	((structure[indStruct] == ' ') \|\| strchr(gap_chars, sequence[indStruct]))
405	) indStruct++;
406
407	if (indStruct >= length) break;
408	// get next amino acid that is not included in nucleation site
409	start = indStruct - 1;
410	while (indStruct < length &&
411	(structure[indStruct] != ' ' \|\| strchr(gap_chars, sequence[indStruct]))) {
412	indStruct++;
413	}
414	// get next amino acid that is not included in nucleation site
415	end = indStruct;
416
417	// extend nucleated region in both directions
418	// left side:
419	while (start > 1 && strchr(gap_chars, sequence[start])) {
420	start--; // skip gaps
421	}
422	// break if no amino acid is found
423	if (start >= 0) break_structure = (char2AA[sequence[start]] == -1);
424	while (!break_structure && (start > 1) && (structure[start] == ' ')) {
425	// break if absolute breaker (P or E) is found
426	break_structure = (sequence[start] == 'P');
427	if (s == BETA_SHEET) break_structure \|= (sequence[start] == 'E');
428	if (break_structure) break;
429	// check for breaker at current position
430	break_structure = strchr(structure_breaker[s], sequence[start]);
431	neighbour = start - 1; // get neighbour
432	while (neighbour > 0 && strchr(gap_chars, sequence[neighbour])) {
433	neighbour--; // skip gaps
434	}
435	// break if out of bounds or no amino acid is found
436	if (neighbour <= 0 \|\| char2AA[sequence[neighbour]] == -1) {
437	break;
438	}
439	// break if another breaker or indifferent amino acid is found
440	break_structure &=
441	strchr(structure_breaker[s], sequence[neighbour]) \|\|
442	strchr(structure_indifferent[s], sequence[neighbour]);
443	if (!break_structure) {
444	structure[start] = structure_chars[s];
445	}
446	start = neighbour; // continue with neighbour
447	}
448
449	// right side:
450	while (end < (length - 2) && strchr(gap_chars, sequence[end])) {
451	end++; // skip gaps
452	}
453	// break if no amino acid is found
454	if (end <= (length - 1)) break_structure = (char2AA[sequence[end]] == -1);
455	while (!break_structure && (end < (length - 2))) {
456	// break if absolute breaker (P or E) is found
457	break_structure = (sequence[end] == 'P');
458	if (s == BETA_SHEET) break_structure \|= (sequence[end] == 'E');
459	if (break_structure) break;
460	// check for breaker at current position
461	break_structure = strchr(structure_breaker[s], sequence[end]);
462	neighbour = end + 1; // get neighbour
463	while (neighbour < (length - 2) && strchr(gap_chars, sequence[neighbour])) {
464	neighbour++; // skip gaps
465	}
466	// break if out of bounds or no amino acid is found
467	if (neighbour >= (length - 1) \|\| char2AA[sequence[neighbour]] == -1) {
468	end = neighbour;
469	break;
470	}
471	// break if another breaker or indifferent amino acid is found
472	break_structure &=
473	strchr(structure_breaker[s], sequence[neighbour]) \|\|
474	strchr(structure_indifferent[s], sequence[neighbour]);
475	if (!break_structure) {
476	structure[end] = structure_chars[s];
477	}
478	end = neighbour; // continue with neighbour
479	}
480	indStruct = end; // continue with end
481	}
482
483	free(gap_chars);
484	#ifdef SHOW_PROGRESS
485	cout << structure << endl;
486	#endif
487	}
488
489
490	/*! \brief Predicts beta-turns from the given amino acid sequence
491	*
492	* \param[in] sequence Amino acid sequence
493	* \param[out] structure Predicted secondary structure
494	* \param[in] length Size of \a sequence and \a structure
495	*
496	* A window of a fixed size is moved over the sequence and former values for
497	* alpha-helices, beta-sheets and beta-turns are summed up. In addition,
498	* beta-turn probabilities are multiplied. The values are specified in
499	* #cf_parameters_norm. If the former values for beta-turn are greater than
500	* the ones for alpha-helix and beta-sheet and the turn probabilities
501	* exceed a certain limit the region is assumed to be a beta-turn. The result
502	* is stored in \a structure.
503	*/
504	static void ED4_pfold_find_turns(const unsigned char sequence, char structure, int length) {
505	#ifdef SHOW_PROGRESS
506	cout << endl << "Searching for beta-turns: " << endl;
507	#endif
508	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
509
510	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
511	const int windowSize = 4; // window size
512	double P_a = 0, P_b = 0, P_turn = 0; // former values for helix, sheet and beta-turn
513	double p_t = 1; // probability for beta-turn
514	int pos; // position in sequence
515	int count; // position in window
516	int aa; // amino acid
517
518	for (int i = 0; i < ((length + 1) - windowSize); i++) {
519	pos = i;
520	for (count = 0; count < windowSize; count++) {
521	// skip gaps
522	while (pos < ((length + 1) - windowSize) &&
523	strchr(gap_chars, sequence[pos + count])) {
524	pos++;
525	}
526	aa = char2AA[sequence[pos + count]];
527	if (aa == -1) break; // unknown character found
528
529	// compute former values and turn probability
530	P_a += cf_parameters_norm[aa][0];
531	P_b += cf_parameters_norm[aa][1];
532	P_turn += cf_parameters_norm[aa][2];
533	p_t *= cf_parameters_norm[aa][3 + count];
534	}
535	if (count != 0) {
536	P_a /= count;
537	P_b /= count;
538	P_turn /= count;
539	if ((p_t > 0.000075) && (P_turn > 100) && (P_turn > P_a) && (P_turn > P_b)) {
540	for (int j = i; j < (pos + count); j++) {
541	if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[BETA_TURN];
542	}
543	}
544	}
545	if (aa == -1) i = pos + count; // skip unknown character
546	p_t = 1, P_a = 0, P_b = 0, P_turn = 0;
547	}
548
549	free(gap_chars);
550	#ifdef SHOW_PROGRESS
551	cout << structure << endl;
552	#endif
553	}
554
555
556	/*! \brief Resolves overlaps of predicted secondary structures and creates structure summary.
557	*
558	* \param[in] sequence Amino acid sequence
559	* \param[in,out] structures Predicted secondary structures (#ALPHA_HELIX, #BETA_SHEET,
560	* #BETA_TURN and #STRUCTURE_SUMMARY, in this order)
561	* \param[in] length Size of \a sequence and \a structures[i]
562	*
563	* The function takes the given predicted structures (alpha-helix, beta-sheet
564	* and beta-turn) and searches for overlapping regions. If a beta-turn is found
565	* the structure summary is assumed to be a beta-turn. For overlapping alpha-helices
566	* and beta-sheets the former values are summed up for this region and the
567	* structure summary is assumed to be the structure type with the higher former
568	* value. The result is stored in \a structures[3] (= \a structures[#STRUCTURE_SUMMARY]).
569	*
570	* \attention I couldn't find a standard procedure for resolving overlaps and
571	* there might be other (possibly better) ways to do that.
572	*/
573	static void ED4_pfold_resolve_overlaps(const unsigned char sequence, char structures[4], int length) {
574	#ifdef SHOW_PROGRESS
575	cout << endl << "Resolving overlaps: " << endl;
576	#endif
577	e4_assert(char2AA); // char2AA not initialized; ED4_pfold_init_statics() failed or hasn't been called yet
578
579	int start = -1; // start of overlap
580	int end = -1; // end of overlap
581	double P_a = 0; // sum of former values for alpha-helix in overlapping regions
582	double P_b = 0; // sum of former values for beta-sheet in overlapping regions
583	PFOLD_STRUCTURE s; // structure with the highest former values
584	char *gap_chars = ED4_ROOT->aw_root->awar(ED4_AWAR_GAP_CHARS)->read_string(); // gap characters
585
586	// scan structures for overlaps
587	for (int pos = 0; pos < length; pos++) {
588
589	// if beta-turn is found at position pos -> summary is beta-turn
590	if (structures[BETA_TURN][pos] != ' ') {
591	structures[STRUCTURE_SUMMARY][pos] = structure_chars[BETA_TURN];
592
593	// if helix and sheet are overlapping and no beta-turn is found -> check which structure has the highest sum of former values
594	}
595	else if ((structures[ALPHA_HELIX][pos] != ' ') && (structures[BETA_SHEET][pos] != ' ')) {
596
597	// search start and end of overlap (as long as no beta-turn is found)
598	start = pos;
599	end = pos;
600	while (structures[ALPHA_HELIX][end] != ' ' && structures[BETA_SHEET][end] != ' ' &&
601	structures[BETA_TURN][end] == ' ') {
602	end++;
603	}
604
605	// calculate P_a and P_b for overlap
606	for (int i = start; i < end; i++) {
607	// skip gaps
608	while (i < end && strchr(gap_chars, sequence[i])) {
609	i++;
610	}
611	int aa = char2AA[sequence[i]];
612	if (aa != -1) {
613	P_a += cf_parameters[aa][ALPHA_HELIX];
614	P_b += cf_parameters[aa][BETA_SHEET];
615	}
616	}
617
618	// check which structure is more likely and set s appropriately
619	s = (P_a > P_b) ? ALPHA_HELIX : BETA_SHEET;
620
621	// set structure for overlapping region
622	for (int i = start; i < end; i++) {
623	structures[STRUCTURE_SUMMARY][i] = structure_chars[s];
624	}
625
626	// set variables for next pass of loop resp. end of loop
627	P_a = 0, P_b = 0;
628	pos = end - 1;
629
630	// if helix and sheet are not overlapping and no beta-turn is found -> set structure accordingly
631	}
632	else {
633	// summary at position pos is helix resp. sheet
634	if (structures[ALPHA_HELIX][pos] != ' ') {
635	structures[STRUCTURE_SUMMARY][pos] = structure_chars[ALPHA_HELIX];
636	}
637	else if (structures[BETA_SHEET][pos] != ' ') {
638	structures[STRUCTURE_SUMMARY][pos] = structure_chars[BETA_SHEET];
639	}
640	}
641	}
642
643	free(gap_chars);
644	#ifdef SHOW_PROGRESS
645	cout << structures[summary] << endl;
646	#endif
647	}
648
649
650	/*! \brief Predicts protein secondary structures from the amino acid sequence.
651	*
652	* \param[in] sequence Amino acid sequence
653	* \param[out] structures Predicted secondary structures (#ALPHA_HELIX, #BETA_SHEET,
654	* #BETA_TURN and #STRUCTURE_SUMMARY, in this order)
655	* \param[in] length Size of \a sequence and \a structures[i]
656	* \return Error description, if an error occurred; 0 otherwise
657	*
658	* This function predicts the protein secondary structures from the amino acid
659	* sequence according to the Chou-Fasman algorithm. In a first step, nucleation sites
660	* for alpha-helices and beta-sheets are found using ED4_pfold_find_nucleation_sites().
661	* In a next step, the found structures are extended obeying certain rules with
662	* ED4_pfold_extend_nucleation_sites(). Beta-turns are found with the function
663	* ED4_pfold_find_turns(). In a final step, overlapping regions are identified and
664	* resolved to create a structure summary with ED4_pfold_resolve_overlaps().
665	* The results are written to \a structures[i] and can be accessed via the enums
666	* #ALPHA_HELIX, #BETA_SHEET, #BETA_TURN and #STRUCTURE_SUMMARY.
667	*/
668	static GB_ERROR ED4_pfold_predict_structure(const unsigned char sequence, char structures[4], int length) {
669	#ifdef SHOW_PROGRESS
670	cout << endl << "Predicting secondary structure for sequence:" << endl << sequence << endl;
671	#endif
672	GB_ERROR error = NULp;
673	e4_assert((int)strlen((const char *)sequence) == length);
674
675	// init memory
676	ED4_pfold_init_statics();
677	e4_assert(char2AA);
678
679	// predict structure
680	ED4_pfold_find_nucleation_sites(sequence, structures[ALPHA_HELIX], length, ALPHA_HELIX);
681	ED4_pfold_find_nucleation_sites(sequence, structures[BETA_SHEET], length, BETA_SHEET);
682	ED4_pfold_extend_nucleation_sites(sequence, structures[ALPHA_HELIX], length, ALPHA_HELIX);
683	ED4_pfold_extend_nucleation_sites(sequence, structures[BETA_SHEET], length, BETA_SHEET);
684	ED4_pfold_find_turns(sequence, structures[BETA_TURN], length);
685	ED4_pfold_resolve_overlaps(sequence, structures, length);
686
687	return error;
688	}
689
690	GB_ERROR ED4_pfold_calculate_secstruct_match(const unsigned char structure_sai, const unsigned char structure_cmp, const int start, const int end, char *result_buffer, PFOLD_MATCH_METHOD match_method) {
691	GB_ERROR error = NULp;
692	e4_assert(structure_sai);
693	e4_assert(structure_cmp);
694	e4_assert(start >= 0);
695	e4_assert(result_buffer);
696	e4_assert(match_method >= 0 && match_method < PFOLD_MATCH_METHOD_COUNT);
697	ED4_pfold_init_statics();
698	e4_assert(char2AA);
699
700	e4_assert(end >= start);
701
702	size_t end_minus_start = size_t(end-start); // @@@ use this
703
704	size_t length = strlen((const char *)structure_sai);
705	size_t match_end = std::min(std::min(end_minus_start, length), strlen((const char *)structure_cmp));
706
707	enum { BEND = 3, NOSTRUCT = 4 };
708	char *struct_chars[] = {
709	ARB_strdup("HGI"), // helical structures (enum ALPHA_HELIX)
710	ARB_strdup("EB"), // sheet-like structures (enum BETA_SHEET)
711	ARB_strdup("T"), // beta-turn (enum BETA_TURN)
712	ARB_strdup("S"), // bends (enum BEND)
713	ARB_strdup("") // no structure (enum NOSTRUCT)
714	};
715
716	// init awars
717	AW_root *awr = ED4_ROOT->aw_root;
718	char *gap_chars = awr->awar(ED4_AWAR_GAP_CHARS)->read_string();
719	char *pairs[PFOLD_MATCH_TYPE_COUNT] = { NULp };
720	char *pair_chars[PFOLD_MATCH_TYPE_COUNT] = { NULp };
721	char *pair_chars_2 = awr->awar(PFOLD_AWAR_SYMBOL_TEMPLATE_2)->read_string();
722	char awar[256];
723
724	for (int i = 0; pfold_match_type_awars[i].name; i++) {
725	sprintf(awar, PFOLD_AWAR_PAIR_TEMPLATE, pfold_match_type_awars[i].name);
726	pairs[i] = awr->awar(awar)->read_string();
727	sprintf(awar, PFOLD_AWAR_SYMBOL_TEMPLATE, pfold_match_type_awars[i].name);
728	pair_chars[i] = awr->awar(awar)->read_string();
729	}
730
731	int struct_start = start;
732	int struct_end = start;
733	size_t count = 0;
734	int current_struct = 4;
735	int aa = -1;
736	double prob = 0;
737
738	// TODO: move this check to callback for the corresponding field?
739	if (strlen(pair_chars_2) != 10) {
740	error = GB_export_error("You have to define 10 match symbols.");
741	}
742
743	if (!error) {
744	switch (match_method) {
745
746	case SECSTRUCT_SECSTRUCT:
747	// TODO: one could try to find out, if structure_cmp is really a secondary structure and not a sequence (define awar for allowed symbols in secondary structure)
748	for (count = 0; count < match_end; count++) {
749	result_buffer[count] = *pair_chars[STRUCT_UNKNOWN];
750	for (int n_pt = 0; n_pt < PFOLD_MATCH_TYPE_COUNT; n_pt++) {
751	int len = strlen(pairs[n_pt])-1;
752	char *p = pairs[n_pt];
753	for (int j = 0; j < len; j += 3) {
754	if ((p[j] == structure_sai[count + start] && p[j+1] == structure_cmp[count + start]) \|\|
755	(p[j] == structure_cmp[count + start] && p[j+1] == structure_sai[count + start])) {
756	result_buffer[count] = *pair_chars[n_pt];
757	n_pt = PFOLD_MATCH_TYPE_COUNT; // stop searching the pair types
758	break; // stop searching the pairs array
759	}
760	}
761	}
762	}
763
764	// fill the remaining buffer with spaces
765	while (count <= end_minus_start) { // LOOP_VECTORIZED[!<720]
766	result_buffer[count] = ' ';
767	count++;
768	}
769	break;
770
771	case SECSTRUCT_SEQUENCE:
772	// clear result buffer
773	for (size_t i = 0; i <= end_minus_start; i++) result_buffer[i] = ' '; // LOOP_VECTORIZED[!<720]
774
775	// skip gaps
776	while (structure_sai[struct_start] != '\0' && structure_cmp[struct_start] != '\0' &&
777	strchr(gap_chars, structure_sai[struct_start]) &&
778	strchr(gap_chars, structure_cmp[struct_start])) {
779	struct_start++;
780	}
781	if (structure_sai[struct_start] == '\0' \|\| structure_cmp[struct_start] == '\0') break;
782
783	// check structure at the first displayed position and find out where it starts
784	for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
785	;
786	}
787	if (current_struct != BEND && current_struct != NOSTRUCT) {
788	struct_start--; // check structure left of start
789	while (struct_start >= 0) {
790	// skip gaps
791	while (struct_start > 0 &&
792	strchr(gap_chars, structure_sai[struct_start]) &&
793	strchr(gap_chars, structure_cmp[struct_start])) {
794	struct_start--;
795	}
796	aa = char2AA[structure_cmp[struct_start]];
797	if (struct_start == 0 && aa == -1) { // nothing was found
798	break;
799	}
800	else if (strchr(struct_chars[current_struct], structure_sai[struct_start]) && aa != -1) {
801	prob += cf_former(aa, current_struct) - cf_breaker(aa, current_struct); // sum up probabilities
802	struct_start--;
803	count++;
804	}
805	else {
806	break;
807	}
808	}
809	}
810
811	// parse structures
812	struct_start = start;
813	// skip gaps
814	while (structure_sai[struct_start] != '\0' && structure_cmp[struct_start] != '\0' &&
815	strchr(gap_chars, structure_sai[struct_start]) &&
816	strchr(gap_chars, structure_cmp[struct_start])) {
817	struct_start++;
818	}
819	if (structure_sai[struct_start] == '\0' \|\| structure_cmp[struct_start] == '\0') break;
820	struct_end = struct_start;
821	while (struct_end < end) {
822	aa = char2AA[structure_cmp[struct_end]];
823	if (current_struct == NOSTRUCT) { // no structure found -> move on
824	struct_end++;
825	}
826	else if (aa == -1) { // structure found but no corresponding amino acid -> doesn't fit at all
827	result_buffer[struct_end - start] = pair_chars_2[0];
828	struct_end++;
829	}
830	else if (current_struct == BEND) { // bend found -> fits perfectly everywhere
831	result_buffer[struct_end - start] = pair_chars_2[9];
832	struct_end++;
833	}
834	else { // helix, sheet or beta-turn found -> while structure doesn't change: sum up probabilities
835	while (structure_sai[struct_end] != '\0') {
836	// skip gaps
837	while (strchr(gap_chars, structure_sai[struct_end]) &&
838	strchr(gap_chars, structure_cmp[struct_end]) &&
839	structure_sai[struct_end] != '\0' && structure_cmp[struct_end] != '\0') {
840	struct_end++;
841	}
842	aa = char2AA[structure_cmp[struct_end]];
843	if (structure_sai[struct_end] != '\0' && structure_cmp[struct_end] != '\0' &&
844	strchr(struct_chars[current_struct], structure_sai[struct_end]) && aa != -1) {
845	prob += cf_former(aa, current_struct) - cf_breaker(aa, current_struct); // sum up probabilities
846	struct_end++;
847	count++;
848	}
849	else {
850	break;
851	}
852	}
853
854	if (count != 0) {
855	// compute average and normalize probability
856	prob /= count;
857	prob = (prob + max_breaker_value[current_struct] - min_former_value[current_struct]) / (max_breaker_value[current_struct] + max_former_value[current_struct] - min_former_value[current_struct]);
858
859	#if 0 // code w/o effect
860	// map to match characters and store in result_buffer
861	int prob_normalized = ED4_pfold_round_sym(prob * 9);
862	// e4_assert(prob_normalized >= 0 && prob_normalized <= 9); // if this happens check if normalization is correct or some undefined characters mess everything up
863	char prob_symbol = *pair_chars[STRUCT_UNKNOWN];
864	if (prob_normalized >= 0 && prob_normalized <= 9) {
865	prob_symbol = pair_chars_2[prob_normalized];
866	}
867	#endif
868	}
869	}
870
871	// find next structure type
872	if (structure_sai[struct_end] == '\0' \|\| structure_cmp[struct_end] == '\0') {
873	break;
874	}
875	else {
876	prob = 0;
877	count = 0;
878	struct_start = struct_end;
879	for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
880	;
881	}
882	}
883	}
884	break;
885
886	case SECSTRUCT_SEQUENCE_PREDICT:
887	// predict structures from structure_cmp and compare with structure_sai
888	char *structures[4];
889	for (int i = 0; i < 4 && !error; i++) {
890	structures[i] = new char [length + 1];
891	if (!structures[i]) {
892	error = "Out of memory";
893	}
894	else {
895	for (size_t j = 0; j < length; j++) { // LOOP_VECTORIZED[!<810]
896	structures[i][j] = ' ';
897	}
898	structures[i][length] = '\0';
899	}
900	}
901	if (!error) error = ED4_pfold_predict_structure(structure_cmp, structures, length);
902	if (!error) {
903	for (count = 0; count < match_end; count++) {
904	result_buffer[count] = *pair_chars[STRUCT_UNKNOWN];
905	if (!strchr(gap_chars, structure_sai[count + start]) && strchr(gap_chars, structure_cmp[count + start])) {
906	result_buffer[count] = *pair_chars[STRUCT_NO_MATCH];
907	} else if (strchr(gap_chars, structure_sai[count + start]) \|\|
908	(structures[ALPHA_HELIX][count + start] == ' ' && structures[BETA_SHEET][count + start] == ' ' && structures[BETA_TURN][count + start] == ' ')) {
909	result_buffer[count] = *pair_chars[STRUCT_PERFECT_MATCH];
910	}
911	else {
912	// search for good match first
913	// if found: stop searching
914	// otherwise: continue searching for a less good match
915	for (int n_pt = 0; n_pt < PFOLD_MATCH_TYPE_COUNT; n_pt++) {
916	int len = strlen(pairs[n_pt])-1;
917	char *p = pairs[n_pt];
918	for (int n_struct = 0; n_struct < 3; n_struct++) {
919	for (int j = 0; j < len; j += 3) {
920	if ((p[j] == structures[n_struct][count + start] && p[j+1] == structure_sai[count + start]) \|\|
921	(p[j] == structure_sai[count + start] && p[j+1] == structures[n_struct][count + start])) {
922	result_buffer[count] = *pair_chars[n_pt];
923	n_struct = 3; // stop searching the structures
924	n_pt = PFOLD_MATCH_TYPE_COUNT; // stop searching the pair types
925	break; // stop searching the pairs array
926	}
927	}
928	}
929	}
930	}
931	}
932	// fill the remaining buffer with spaces
933	while (count <= end_minus_start) { // LOOP_VECTORIZED[!<720]
934	result_buffer[count] = ' ';
935	count++;
936	}
937	}
938	// free buffer
939	for (int i = 0; i < 4; i++) {
940	if (structures[i]) {
941	delete structures[i];
942	structures[i] = NULp;
943	}
944	}
945	break;
946
947	default:
948	e4_assert(0); // function called with invalid argument for 'match_method'
949	break;
950	}
951	}
952
953	free(gap_chars);
954	free(pair_chars_2);
955	for (int i = 0; pfold_match_type_awars[i].name; i++) {
956	free(pairs[i]);
957	free(pair_chars[i]);
958	}
959	if (error) {
960	// clear result buffer
961	for (size_t i = 0; i <= end_minus_start; i++) result_buffer[i] = ' '; // LOOP_VECTORIZED[!<720]
962	}
963	return error;
964	}
965
966
967	GB_ERROR ED4_pfold_set_SAI(char *protstruct, GBDATA gb_main, const char alignment_name, long protstruct_len) { // @@@ use references for protstruct + protstruct_len
968	GB_ERROR error = NULp;
969	GB_transaction ta(gb_main);
970	AW_root *aw_root = ED4_ROOT->aw_root;
971	char *SAI_name = aw_root->awar(PFOLD_AWAR_SELECTED_SAI)->read_string();
972	GBDATA *gb_protstruct = GBT_find_SAI(gb_main, SAI_name);
973
974	freenull(*protstruct);
975
976	if (gb_protstruct) {
977	GBDATA *gb_data = GBT_find_sequence(gb_protstruct, alignment_name);
978	if (gb_data) *protstruct = GB_read_string(gb_data); // @@@ NOT_ALL_SAI_HAVE_DATA
979	}
980
981	if (*protstruct) {
982	if (protstruct_len) protstruct_len = (long)strlen(protstruct);
983	}
984	else {
985	if (protstruct_len) protstruct_len = NULp;
986	if (aw_root->awar(PFOLD_AWAR_ENABLE)->read_int()) {
987	error = GBS_global_string("SAI \"%s\" does not exist.\nDisabled protein structure display!", SAI_name);
988	aw_root->awar(PFOLD_AWAR_ENABLE)->write_int(0);
989	}
990	}
991
992	free(SAI_name);
993	return error;
994	}
995
996	/*! \brief Callback function to select the reference protein structure SAI and to
997	* update the SAI option menu.
998	*
999	* \param[in] aww The calling window
1000	* \param[in,out] oms The SAI option menu
1001	* \param[in] set_sai Specifies if SAI should be updated
1002	*
1003	* The function is called whenever the selected SAI or the SAI filter is changed
1004	* in the "Protein Match Settings" dialog (see ED4_pfold_create_props_window()).
1005	* It can be called with \a set_sai defined to update the reference protein secondary
1006	* structure SAI in the editor via ED4_pfold_set_SAI() and to update the selection in
1007	* the SAI option menu. If \a set_sai is 0 only the option menu is updated. This is
1008	* necessary if only the SAI filter changed but not the selected SAI.
1009	*/
1010
1011	static void ED4_pfold_select_SAI_and_update_option_menu(AW_window aww, AW_option_menu_struct oms, bool set_sai) {
1012	e4_assert(aww);
1013	e4_assert(oms);
1014	char *selected_sai = ED4_ROOT->aw_root->awar(PFOLD_AWAR_SELECTED_SAI)->read_string();
1015	char *sai_filter = ED4_ROOT->aw_root->awar(PFOLD_AWAR_SAI_FILTER)->read_string();
1016
1017	GBDATA *gb_main = ED4_ROOT->get_gb_main();
1018	if (set_sai) {
1019	const char *err = ED4_pfold_set_SAI(&ED4_ROOT->protstruct, gb_main, ED4_ROOT->get_alignment_name(), &ED4_ROOT->protstruct_len);
1020	if (err) aw_message(err);
1021	}
1022
1023	aww->clear_option_menu(oms);
1024	aww->insert_default_option(selected_sai, "", selected_sai);
1025	GB_transaction ta(gb_main);
1026
1027	for (GBDATA *sai = GBT_first_SAI(gb_main);
1028	sai;
1029	sai = GBT_next_SAI(sai))
1030	{
1031	const char *sai_name = GBT_get_name_or_description(sai);
1032	if (strcmp(sai_name, selected_sai) != 0 && strstr(sai_name, sai_filter)) {
1033	aww->callback(makeWindowCallback(ED4_pfold_select_SAI_and_update_option_menu, oms, true)); // @@@ used as OPTIONMENU_SELECT_CB (see #559)
1034	aww->insert_option(sai_name, "", sai_name);
1035	}
1036	}
1037
1038	free(selected_sai);
1039	free(sai_filter);
1040	aww->update_option_menu();
1041	// ED4_expose_all_windows();
1042	// @@@ need update here ?
1043	}
1044
1045	static void setup_pfold_config(AWT_config_definition& cdef) {
1046	cdef.add(PFOLD_AWAR_ENABLE, "enable");
1047	cdef.add(PFOLD_AWAR_SELECTED_SAI, "sai");
1048	cdef.add(PFOLD_AWAR_SAI_FILTER, "saifilter");
1049	cdef.add(PFOLD_AWAR_MATCH_METHOD, "matchmethod");
1050	cdef.add(PFOLD_AWAR_SYMBOL_TEMPLATE_2, "matchsymbols2");
1051
1052	char awar[256];
1053	for (int i = 0; pfold_match_type_awars[i].name; i++) {
1054	const char *name = pfold_match_type_awars[i].name;
1055	sprintf(awar, PFOLD_AWAR_PAIR_TEMPLATE, name);
1056	cdef.add(awar, name);
1057	sprintf(awar, PFOLD_AWAR_SYMBOL_TEMPLATE, name);
1058	cdef.add(awar, GBS_global_string("%s_symbol", name));
1059	}
1060	}
1061
1062	AW_window ED4_pfold_create_props_window(AW_root awr, const WindowCallback *refreshCallback) {
1063	AW_window_simple *aws = new AW_window_simple;
1064	aws->init(awr, "PFOLD_PROPS", "PROTEIN_MATCH_SETTINGS");
1065
1066	// create close button
1067	aws->at(10, 10);
1068	aws->auto_space(5, 2);
1069	aws->callback(AW_POPDOWN);
1070	aws->create_button("CLOSE", "CLOSE", "C");
1071
1072	// create help button
1073	aws->callback(makeHelpCallback("pfold_props.hlp"));
1074	aws->create_button("HELP", "HELP");
1075
1076	AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, "pfold", makeConfigSetupCallback(setup_pfold_config));
1077
1078	aws->at_newline();
1079
1080	aws->label_length(27);
1081	int ex = 0, ey = 0;
1082	char awar[256];
1083
1084	// create toggle field for showing the protein structure match
1085	aws->label("Show protein structure match?");
1086	aws->callback(*refreshCallback); // @@@ used as TOGGLE_CLICK_CB (see #559)
1087	aws->create_toggle(PFOLD_AWAR_ENABLE);
1088	aws->at_newline();
1089
1090	// create SAI option menu
1091	aws->label_length(30);
1092	aws->label("Selected Protein Structure SAI");
1093	AW_option_menu_struct *oms_sai = aws->create_option_menu(PFOLD_AWAR_SELECTED_SAI);
1094	ED4_pfold_select_SAI_and_update_option_menu(aws, oms_sai, false);
1095	aws->at_newline();
1096	aws->label("-> Filter SAI names for");
1097	aws->callback(makeWindowCallback(ED4_pfold_select_SAI_and_update_option_menu, oms_sai, false)); // @@@ used as INPUTFIELD_CB (see #559)
1098	aws->create_input_field(PFOLD_AWAR_SAI_FILTER, 10);
1099	aws->at_newline();
1100
1101	// create match method option menu
1102	PFOLD_MATCH_METHOD match_method = (PFOLD_MATCH_METHOD) ED4_ROOT->aw_root->awar(PFOLD_AWAR_MATCH_METHOD)->read_int();
1103	aws->label_length(12);
1104	aws->label("Match Method");
1105	aws->create_option_menu(PFOLD_AWAR_MATCH_METHOD);
1106	for (int i = 0; const char *mm_aw = pfold_match_method_awars[i].name; i++) {
1107	aws->callback(*refreshCallback); // @@@ used as OPTIONMENU_SELECT_CB (see #559)
1108	if (match_method == pfold_match_method_awars[i].value) {
1109	aws->insert_default_option(mm_aw, "", match_method);
1110	}
1111	else {
1112	aws->insert_option(mm_aw, "", pfold_match_method_awars[i].value);
1113	}
1114	}
1115	aws->update_option_menu();
1116	aws->at_newline();
1117
1118	// create match symbols and/or match types input fields
1119	// TODO: show only fields that are relevant for current match method -> bind to callback function?
1120	aws->label_length(40);
1121	aws->label("Match Symbols (Range 0-100% in steps of 10%)");
1122	aws->callback(*refreshCallback); // @@@ used as INPUTFIELD_CB (see #559)
1123	aws->create_input_field(PFOLD_AWAR_SYMBOL_TEMPLATE_2, 10);
1124	aws->at_newline();
1125	for (int i = 0; pfold_match_type_awars[i].name; i++) {
1126	aws->label_length(12);
1127	sprintf(awar, PFOLD_AWAR_PAIR_TEMPLATE, pfold_match_type_awars[i].name);
1128	aws->label(pfold_match_type_awars[i].name);
1129	aws->callback(*refreshCallback); // @@@ used as INPUTFIELD_CB (see #559)
1130	aws->create_input_field(awar, 30);
1131	// TODO: is it possible to disable input field for STRUCT_UNKNOWN?
1132	// if (pfold_match_type_awars[i].value == STRUCT_UNKNOWN)
1133	if (!i) aws->get_at_position(&ex, &ey);
1134	sprintf(awar, PFOLD_AWAR_SYMBOL_TEMPLATE, pfold_match_type_awars[i].name);
1135	aws->callback(*refreshCallback); // @@@ used as INPUTFIELD_CB (see #559)
1136	aws->create_input_field(awar, 3);
1137	aws->at_newline();
1138	}
1139
1140	aws->window_fit();
1141	return aws;
1142	}
1143

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/lib/EDIT4/ED4_protein_2nd_structure.cxx

Download in other formats: