1 | #if DEBUG && !_DEBUG |
---|
2 | #define _DEBUG 1 |
---|
3 | #endif |
---|
4 | |
---|
5 | #if _DEBUG && !DEBUG |
---|
6 | #define DEBUG 1 |
---|
7 | #endif |
---|
8 | |
---|
9 | #if _MSC_VER |
---|
10 | #define TIMING 0 |
---|
11 | #endif |
---|
12 | |
---|
13 | #define VER_3_52 0 |
---|
14 | |
---|
15 | #ifdef _MSC_VER // Miscrosoft compiler |
---|
16 | #pragma warning(disable : 4800) // int-bool conversion |
---|
17 | #pragma warning(disable : 4996) // deprecated names like strdup, isatty. |
---|
18 | #endif |
---|
19 | |
---|
20 | extern const char *MUSCLE_LONG_VERSION; |
---|
21 | #define SHORT_VERSION "3.8" |
---|
22 | |
---|
23 | #include <stdlib.h> |
---|
24 | #include <string.h> |
---|
25 | #include <ctype.h> |
---|
26 | #include <stdarg.h> |
---|
27 | #include <stdio.h> |
---|
28 | |
---|
29 | #define DOUBLE_AFFINE 0 |
---|
30 | #define SINGLE_AFFINE 1 |
---|
31 | #define PAF 0 |
---|
32 | |
---|
33 | #include "types.h" |
---|
34 | #include "intmath.h" |
---|
35 | #include "alpha.h" |
---|
36 | #include "params.h" |
---|
37 | |
---|
38 | #ifndef _WIN32 |
---|
39 | #define stricmp strcasecmp |
---|
40 | #define strnicmp strncasecmp |
---|
41 | #define _snprintf snprintf |
---|
42 | #define _fsopen(name, mode, share) fopen((name), (mode)) |
---|
43 | #endif |
---|
44 | |
---|
45 | #if DEBUG |
---|
46 | #undef assert |
---|
47 | #define assert(b) Call_MY_ASSERT(__FILE__, __LINE__, b, #b) |
---|
48 | void Call_MY_ASSERT(const char *file, int line, bool b, const char *msg); |
---|
49 | #else |
---|
50 | #define assert(exp) ((void)0) |
---|
51 | #endif |
---|
52 | |
---|
53 | extern int g_argc; |
---|
54 | extern char **g_argv; |
---|
55 | |
---|
56 | #define Rotate(a, b, c) { SCORE *tmp = a; a = b; b = c; c = tmp; } |
---|
57 | |
---|
58 | const double VERY_LARGE_DOUBLE = 1e20; |
---|
59 | |
---|
60 | extern unsigned g_uTreeSplitNode1; |
---|
61 | extern unsigned g_uTreeSplitNode2; |
---|
62 | |
---|
63 | // Number of elements in array a[] |
---|
64 | #define countof(a) (sizeof(a)/sizeof(a[0])) |
---|
65 | |
---|
66 | // Maximum of two of any type |
---|
67 | #define Max2(a, b) ((a) > (b) ? (a) : (b)) |
---|
68 | |
---|
69 | // Maximum of three of any type |
---|
70 | #define Max3(a, b, c) Max2(Max2(a, b), c) |
---|
71 | |
---|
72 | // Minimum of two of any type |
---|
73 | #define Min2(a, b) ((a) < (b) ? (a) : (b)) |
---|
74 | |
---|
75 | // Maximum of four of any type |
---|
76 | #define Max4(a, b, c, d) Max2(Max2(a, b), Max2(c, d)) |
---|
77 | |
---|
78 | const double VERY_NEGATIVE_DOUBLE = -9e29; |
---|
79 | const float VERY_NEGATIVE_FLOAT = (float) -9e29; |
---|
80 | |
---|
81 | const double BLOSUM_DIST = 0.62; // todo settable |
---|
82 | |
---|
83 | // insane value for uninitialized variables |
---|
84 | const unsigned uInsane = 8888888; |
---|
85 | const int iInsane = 8888888; |
---|
86 | const SCORE scoreInsane = 8888888; |
---|
87 | const char cInsane = (char) 0xcd; // int 3 instruction, used e.g. for unint. memory |
---|
88 | const double dInsane = VERY_NEGATIVE_DOUBLE; |
---|
89 | const float fInsane = VERY_NEGATIVE_FLOAT; |
---|
90 | const char INVALID_STATE = '*'; |
---|
91 | const BASETYPE BTInsane = (BASETYPE) dInsane; |
---|
92 | const WEIGHT wInsane = BTInsane; |
---|
93 | |
---|
94 | extern double g_dNAN; |
---|
95 | |
---|
96 | extern unsigned long g_tStart; |
---|
97 | |
---|
98 | void Quit(const char szFormat[], ...); |
---|
99 | void Warning(const char szFormat[], ...); |
---|
100 | void TrimBlanks(char szStr[]); |
---|
101 | void TrimLeadingBlanks(char szStr[]); |
---|
102 | void TrimTrailingBlanks(char szStr[]); |
---|
103 | void Log(const char szFormat[], ...); |
---|
104 | bool Verbose(); |
---|
105 | const char *ScoreToStr(SCORE Score); |
---|
106 | const char *ScoreToStrL(SCORE Score); |
---|
107 | SCORE StrToScore(const char *pszStr); |
---|
108 | void Break(); |
---|
109 | |
---|
110 | double VecSum(const double v[], unsigned n); |
---|
111 | bool IsValidInteger(const char *Str); |
---|
112 | bool IsValidSignedInteger(const char *Str); |
---|
113 | bool IsValidIdentifier(const char *Str); |
---|
114 | bool IsValidFloatChar(char c); |
---|
115 | bool isident(char c); |
---|
116 | bool isidentf(char c); |
---|
117 | |
---|
118 | void TreeFromSeqVect(const SeqVect &c, Tree &tree, CLUSTER Cluster, |
---|
119 | DISTANCE Distance, ROOT Root, const char *SaveFileName = 0); |
---|
120 | void TreeFromMSA(const MSA &msa, Tree &tree, CLUSTER Cluster, |
---|
121 | DISTANCE Distance, ROOT Root, const char *SaveFileName = 0); |
---|
122 | |
---|
123 | void StripGaps(char szStr[]); |
---|
124 | void StripWhitespace(char szStr[]); |
---|
125 | const char *GetTimeAsStr(); |
---|
126 | unsigned CalcBLOSUMWeights(MSA &Aln, ClusterTree &BlosumCluster); |
---|
127 | void CalcGSCWeights(MSA &Aln, const ClusterTree &BlosumCluster); |
---|
128 | void AssertNormalized(const PROB p[]); |
---|
129 | void AssertNormalizedOrZero(const PROB p[]); |
---|
130 | void AssertNormalized(const double p[]); |
---|
131 | bool VectorIsZero(const double dValues[], unsigned n); |
---|
132 | void VectorSet(double dValues[], unsigned n, double d); |
---|
133 | bool VectorIsZero(const float dValues[], unsigned n); |
---|
134 | void VectorSet(float dValues[], unsigned n, float d); |
---|
135 | |
---|
136 | // @@TODO should be "not linux" |
---|
137 | #if _WIN32 |
---|
138 | double log2(double x); // Defined in <math.h> on Linux |
---|
139 | #endif |
---|
140 | |
---|
141 | double pow2(double x); |
---|
142 | double lnTolog2(double ln); |
---|
143 | |
---|
144 | double lp2(double x); |
---|
145 | SCORE SumLog(SCORE x, SCORE y); |
---|
146 | SCORE SumLog(SCORE x, SCORE y, SCORE z); |
---|
147 | SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z); |
---|
148 | |
---|
149 | double lp2Fast(double x); |
---|
150 | double SumLogFast(double x, double y); |
---|
151 | double SumLogFast(double x, double y, double z); |
---|
152 | double SumLogFast(double w, double x, double y, double z); |
---|
153 | |
---|
154 | void chkmem(const char szMsg[] = ""); |
---|
155 | |
---|
156 | void Normalize(PROB p[], unsigned n); |
---|
157 | void Normalize(PROB p[], unsigned n, double dRequiredTotal); |
---|
158 | void NormalizeUnlessZero(PROB p[], unsigned n); |
---|
159 | |
---|
160 | void DebugPrintf(const char szFormat[], ...); |
---|
161 | void SetListFileName(const char *ptrListFileName, bool bAppend); |
---|
162 | void ModelFromAlign(const char *strInputFileName, const char *strModelFileName, |
---|
163 | double dMaxNIC); |
---|
164 | double GetMemUseMB(); |
---|
165 | double GetRAMSizeMB(); |
---|
166 | double GetPeakMemUseMB(); |
---|
167 | void CheckMemUse(); |
---|
168 | const char *ElapsedTimeAsString(); |
---|
169 | char *SecsToHHMMSS(long lSecs, char szStr[]); |
---|
170 | double GetCPUGHz(); |
---|
171 | SCORE GetBlosum62(unsigned uLetterA, unsigned uLetterB); |
---|
172 | SCORE GetBlosum62d(unsigned uLetterA, unsigned uLetterB); |
---|
173 | SCORE GetBlosum50(unsigned uLetterA, unsigned uLetterB); |
---|
174 | void AssertNormalizedDist(const PROB p[], unsigned N); |
---|
175 | void CmdLineError(const char *Format, ...); |
---|
176 | void Fatal(const char *Format, ...); |
---|
177 | void InitCmd(); |
---|
178 | void ExecCommandLine(int argc, char *argv[]); |
---|
179 | void DoCmd(); |
---|
180 | void SetLogFile(); |
---|
181 | void NameFromPath(const char szPath[], char szName[], unsigned uBytes); |
---|
182 | char *strsave(const char *s); |
---|
183 | void DistKmer20_3(const SeqVect &v, DistFunc &DF); |
---|
184 | void DistKbit20_3(const SeqVect &v, DistFunc &DF); |
---|
185 | void DistKmer6_6(const SeqVect &v, DistFunc &DF); |
---|
186 | void DistKmer4_6(const SeqVect &v, DistFunc &DF); |
---|
187 | void DistPWKimura(const SeqVect &v, DistFunc &DF); |
---|
188 | void FastDistKmer(const SeqVect &v, DistFunc &DF); |
---|
189 | void DistUnaligned(const SeqVect &v, DISTANCE DistMethod, DistFunc &DF); |
---|
190 | double PctIdToMAFFTDist(double dPctId); |
---|
191 | double KimuraDist(double dPctId); |
---|
192 | void SetFastParams(); |
---|
193 | void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
194 | unsigned uLengthB); |
---|
195 | void ValidateMuscleIds(const MSA &msa); |
---|
196 | void ValidateMuscleIds(const Tree &tree); |
---|
197 | void TraceBackToPath(int **TraceBack, unsigned uLengthA, |
---|
198 | unsigned uLengthB, PWPath &Path); |
---|
199 | void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB, |
---|
200 | char LastEdge, PWPath &Path); |
---|
201 | SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path, |
---|
202 | bool bLockLeft = false, bool bLockRight = false); |
---|
203 | SCORE AlignTwoProfs( |
---|
204 | const ProfPos *PA, unsigned uLengthA, WEIGHT wA, |
---|
205 | const ProfPos *PB, unsigned uLengthB, WEIGHT wB, |
---|
206 | PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut); |
---|
207 | void AlignTwoProfsGivenPath(const PWPath &Path, |
---|
208 | const ProfPos *PA, unsigned uLengthA, WEIGHT wA, |
---|
209 | const ProfPos *PB, unsigned uLengthB, WEIGHT wB, |
---|
210 | ProfPos **ptrPOut, unsigned *ptruLengthOut); |
---|
211 | void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB, |
---|
212 | MSA &msaCombined); |
---|
213 | void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB, |
---|
214 | MSA &msaCombined); |
---|
215 | SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA, |
---|
216 | const ProfPos *PB, unsigned uLengthB, const PWPath &Path); |
---|
217 | SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
218 | unsigned uLengthB, PWPath &Path); |
---|
219 | SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
220 | unsigned uLengthB, PWPath &Path); |
---|
221 | SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
222 | unsigned uLengthB, PWPath &Path); |
---|
223 | SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
224 | unsigned uLengthB, PWPath &Path); |
---|
225 | SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
226 | unsigned uLengthB, PWPath &Path); |
---|
227 | void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2, |
---|
228 | WEIGHT *Weights); |
---|
229 | SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path); |
---|
230 | bool RefineHoriz(MSA &msaIn, const Tree &tree, unsigned uIters, bool bLockLeft, bool bLockRight); |
---|
231 | bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters); |
---|
232 | SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
233 | unsigned uLengthB, PWPath &Path); |
---|
234 | |
---|
235 | void SetInputFileName(const char *pstrFileName); |
---|
236 | void SetIter(unsigned uIter); |
---|
237 | void IncIter(); |
---|
238 | void SetMaxIters(unsigned uMaxIters); |
---|
239 | void Progress(unsigned uStep, unsigned uTotalSteps); |
---|
240 | void Progress(const char *szFormat, ...); |
---|
241 | void SetStartTime(); |
---|
242 | void ProgressStepsDone(); |
---|
243 | void SetProgressDesc(const char szDesc[]); |
---|
244 | void SetSeqStats(unsigned uSeqCount, unsigned uMaxL, unsigned uAvgL); |
---|
245 | |
---|
246 | void SetNewHandler(); |
---|
247 | void SaveCurrentAlignment(); |
---|
248 | void SetCurrentAlignment(MSA &msa); |
---|
249 | void SetOutputFileName(const char *out); |
---|
250 | |
---|
251 | #if DEBUG |
---|
252 | void SetMuscleSeqVect(SeqVect &v); |
---|
253 | void SetMuscleInputMSA(MSA &msa); |
---|
254 | void ValidateMuscleIds(const MSA &msa); |
---|
255 | void ValidateMuscleIds(const Tree &tree); |
---|
256 | #else |
---|
257 | #define SetMuscleSeqVect(x) /* empty */ |
---|
258 | #define SetMuscleInputMSA(x) /* empty */ |
---|
259 | #define ValidateMuscleIds(x) /* empty */ |
---|
260 | #endif |
---|
261 | |
---|
262 | void ProcessArgVect(int argc, char *argv[]); |
---|
263 | void ProcessArgStr(const char *Str); |
---|
264 | void Usage(); |
---|
265 | void SetParams(); |
---|
266 | |
---|
267 | void SortCounts(const FCOUNT fcCounts[], unsigned SortOrder[]); |
---|
268 | unsigned ResidueGroupFromFCounts(const FCOUNT fcCounts[]); |
---|
269 | FCOUNT SumCounts(const FCOUNT Counts[]); |
---|
270 | |
---|
271 | bool FlagOpt(const char *Name); |
---|
272 | const char *ValueOpt(const char *Name); |
---|
273 | void DoMuscle(); |
---|
274 | void ProfDB(); |
---|
275 | void DoSP(); |
---|
276 | void ProgAlignSubFams(); |
---|
277 | void Run(); |
---|
278 | void ListParams(); |
---|
279 | void OnException(); |
---|
280 | void SetSeqWeightMethod(SEQWEIGHT Method); |
---|
281 | SEQWEIGHT GetSeqWeightMethod(); |
---|
282 | WEIGHT GetMuscleSeqWeightById(unsigned uId); |
---|
283 | void ListDiagSavings(); |
---|
284 | void CheckMaxTime(); |
---|
285 | const char *MaxSecsToStr(); |
---|
286 | unsigned long GetStartTime(); |
---|
287 | |
---|
288 | void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a); |
---|
289 | ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a); |
---|
290 | |
---|
291 | void CalcDistRangeKmer6_6(const MSA &msa, unsigned uRow, float Dist[]); |
---|
292 | void CalcDistRangeKmer20_3(const MSA &msa, unsigned uRow, float Dist[]); |
---|
293 | void CalcDistRangeKmer20_4(const MSA &msa, unsigned uRow, float Dist[]); |
---|
294 | void CalcDistRangePctIdKimura(const MSA &msa, unsigned uRow, float Dist[]); |
---|
295 | void CalcDistRangePctIdLog(const MSA &msa, unsigned uRow, float Dist[]); |
---|
296 | |
---|
297 | void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a); |
---|
298 | void MakeRootMSABrenner(SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a); |
---|
299 | |
---|
300 | void Refine(); |
---|
301 | void Local(); |
---|
302 | void Profile(); |
---|
303 | void PPScore(); |
---|
304 | void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage); |
---|
305 | |
---|
306 | char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, |
---|
307 | bool DeleteGaps = true); |
---|
308 | SCORE SW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
309 | unsigned uLengthB, PWPath &Path); |
---|
310 | void TraceBackSW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
311 | unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_, |
---|
312 | unsigned uPrefixLengthAMax, unsigned uPrefixLengthBMax, PWPath &Path); |
---|
313 | void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[], |
---|
314 | unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2); |
---|
315 | void SetPPScore(bool bRespectFlagOpts = true); |
---|
316 | void SetPPScore(PPSCORE p); |
---|
317 | SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
318 | unsigned uLengthB, PWPath &Path); |
---|
319 | bool MissingCommand(); |
---|
320 | void Credits(); |
---|
321 | void ProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut); |
---|
322 | void MHackStart(SeqVect &v); |
---|
323 | void MHackEnd(MSA &msa); |
---|
324 | void WriteScoreFile(const MSA &msa); |
---|
325 | char ConsensusChar(const ProfPos &PP); |
---|
326 | void Stabilize(const MSA &msa, MSA &msaStable); |
---|
327 | void MuscleOutput(MSA &msa); |
---|
328 | PTR_SCOREMATRIX ReadMx(TextFile &File); |
---|
329 | void MemPlus(size_t Bytes, char *Where); |
---|
330 | void MemMinus(size_t Bytes, char *Where); |
---|