| 1 | #include "muscle.h" |
|---|
| 2 | #include <math.h> |
|---|
| 3 | |
|---|
| 4 | PROB ScoreToProb(SCORE Score) |
|---|
| 5 | { |
|---|
| 6 | if (MINUS_INFINITY >= Score) |
|---|
| 7 | return 0.0; |
|---|
| 8 | return (PROB) pow(2.0, (double) Score/INTSCALE); |
|---|
| 9 | } |
|---|
| 10 | |
|---|
| 11 | //#if 0 |
|---|
| 12 | //static const double log2e = log2(exp(1.0)); |
|---|
| 13 | // |
|---|
| 14 | //double lnTolog2(double ln) |
|---|
| 15 | // { |
|---|
| 16 | // return ln*log2e; |
|---|
| 17 | // } |
|---|
| 18 | // |
|---|
| 19 | //double log2(double x) |
|---|
| 20 | // { |
|---|
| 21 | // if (0 == x) |
|---|
| 22 | // return MINUS_INFINITY; |
|---|
| 23 | // |
|---|
| 24 | // static const double dInvLn2 = 1.0/log(2.0); |
|---|
| 25 | //// Multiply by inverse of log(2) just in case multiplication |
|---|
| 26 | //// is faster than division. |
|---|
| 27 | // return log(x)*dInvLn2; |
|---|
| 28 | // } |
|---|
| 29 | //#endif |
|---|
| 30 | |
|---|
| 31 | //SCORE ProbToScore(PROB Prob) |
|---|
| 32 | // { |
|---|
| 33 | // if (0.0 == Prob) |
|---|
| 34 | // return MINUS_INFINITY; |
|---|
| 35 | //// return (SCORE) floor(INTSCALE*log2(Prob)); |
|---|
| 36 | // return (SCORE) log2(Prob); |
|---|
| 37 | // } |
|---|
| 38 | |
|---|
| 39 | WEIGHT DoubleToWeight(double d) |
|---|
| 40 | { |
|---|
| 41 | assert(d >= 0); |
|---|
| 42 | return (WEIGHT) (INTSCALE*d); |
|---|
| 43 | } |
|---|
| 44 | |
|---|
| 45 | double WeightToDouble(WEIGHT w) |
|---|
| 46 | { |
|---|
| 47 | return (double) w / (double) INTSCALE; |
|---|
| 48 | } |
|---|
| 49 | |
|---|
| 50 | SCORE DoubleToScore(double d) |
|---|
| 51 | { |
|---|
| 52 | return (SCORE)(d*(double) INTSCALE); |
|---|
| 53 | } |
|---|
| 54 | |
|---|
| 55 | bool ScoreEq(SCORE s1, SCORE s2) |
|---|
| 56 | { |
|---|
| 57 | return BTEq(s1, s2); |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | static bool BTEq2(BASETYPE b1, BASETYPE b2) |
|---|
| 61 | { |
|---|
| 62 | double diff = fabs(b1 - b2); |
|---|
| 63 | if (diff < 0.0001) |
|---|
| 64 | return true; |
|---|
| 65 | double sum = fabs(b1) + fabs(b2); |
|---|
| 66 | return diff/sum < 0.005; |
|---|
| 67 | } |
|---|
| 68 | |
|---|
| 69 | bool BTEq(double b1, double b2) |
|---|
| 70 | { |
|---|
| 71 | return BTEq2((BASETYPE) b1, (BASETYPE) b2); |
|---|
| 72 | } |
|---|
| 73 | |
|---|
| 74 | //const double dLn2 = log(2.0); |
|---|
| 75 | |
|---|
| 76 | //// pow2(x)=2^x |
|---|
| 77 | //double pow2(double x) |
|---|
| 78 | // { |
|---|
| 79 | // if (MINUS_INFINITY == x) |
|---|
| 80 | // return 0; |
|---|
| 81 | // return exp(x*dLn2); |
|---|
| 82 | // } |
|---|
| 83 | |
|---|
| 84 | //// lp2(x) = log2(1 + 2^-x), x >= 0 |
|---|
| 85 | //double lp2(double x) |
|---|
| 86 | // { |
|---|
| 87 | // return log2(1 + pow2(-x)); |
|---|
| 88 | // } |
|---|
| 89 | |
|---|
| 90 | // SumLog(x, y) = log2(2^x + 2^y) |
|---|
| 91 | //SCORE SumLog(SCORE x, SCORE y) |
|---|
| 92 | // { |
|---|
| 93 | // return (SCORE) log2(pow2(x) + pow2(y)); |
|---|
| 94 | // } |
|---|
| 95 | // |
|---|
| 96 | //// SumLog(x, y, z) = log2(2^x + 2^y + 2^z) |
|---|
| 97 | //SCORE SumLog(SCORE x, SCORE y, SCORE z) |
|---|
| 98 | // { |
|---|
| 99 | // return (SCORE) log2(pow2(x) + pow2(y) + pow2(z)); |
|---|
| 100 | // } |
|---|
| 101 | // |
|---|
| 102 | //// SumLog(w, x, y, z) = log2(2^w + 2^x + 2^y + 2^z) |
|---|
| 103 | //SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z) |
|---|
| 104 | // { |
|---|
| 105 | // return (SCORE) log2(pow2(w) + pow2(x) + pow2(y) + pow2(z)); |
|---|
| 106 | // } |
|---|
| 107 | |
|---|
| 108 | //SCORE lp2Fast(SCORE x) |
|---|
| 109 | // { |
|---|
| 110 | // assert(x >= 0); |
|---|
| 111 | // const int iTableSize = 1000; |
|---|
| 112 | // const double dRange = 20.0; |
|---|
| 113 | // const double dScale = dRange/iTableSize; |
|---|
| 114 | // static SCORE dValue[iTableSize]; |
|---|
| 115 | // static bool bInit = false; |
|---|
| 116 | // if (!bInit) |
|---|
| 117 | // { |
|---|
| 118 | // for (int i = 0; i < iTableSize; ++i) |
|---|
| 119 | // dValue[i] = (SCORE) lp2(i*dScale); |
|---|
| 120 | // bInit = true; |
|---|
| 121 | // } |
|---|
| 122 | // if (x >= dRange) |
|---|
| 123 | // return 0.0; |
|---|
| 124 | // int i = (int) (x/dScale); |
|---|
| 125 | // assert(i >= 0 && i < iTableSize); |
|---|
| 126 | // SCORE dResult = dValue[i]; |
|---|
| 127 | // assert(BTEq(dResult, lp2(x))); |
|---|
| 128 | // return dResult; |
|---|
| 129 | // } |
|---|
| 130 | // |
|---|
| 131 | //// SumLog(x, y) = log2(2^x + 2^y) |
|---|
| 132 | //SCORE SumLogFast(SCORE x, SCORE y) |
|---|
| 133 | // { |
|---|
| 134 | // if (MINUS_INFINITY == x) |
|---|
| 135 | // { |
|---|
| 136 | // if (MINUS_INFINITY == y) |
|---|
| 137 | // return MINUS_INFINITY; |
|---|
| 138 | // return y; |
|---|
| 139 | // } |
|---|
| 140 | // else if (MINUS_INFINITY == y) |
|---|
| 141 | // return x; |
|---|
| 142 | // |
|---|
| 143 | // SCORE dResult; |
|---|
| 144 | // if (x > y) |
|---|
| 145 | // dResult = x + lp2Fast(x-y); |
|---|
| 146 | // else |
|---|
| 147 | // dResult = y + lp2Fast(y-x); |
|---|
| 148 | // assert(SumLog(x, y) == dResult); |
|---|
| 149 | // return dResult; |
|---|
| 150 | // } |
|---|
| 151 | // |
|---|
| 152 | //SCORE SumLogFast(SCORE x, SCORE y, SCORE z) |
|---|
| 153 | // { |
|---|
| 154 | // SCORE dResult = SumLogFast(x, SumLogFast(y, z)); |
|---|
| 155 | // assert(SumLog(x, y, z) == dResult); |
|---|
| 156 | // return dResult; |
|---|
| 157 | // } |
|---|
| 158 | |
|---|
| 159 | //SCORE SumLogFast(SCORE w, SCORE x, SCORE y, SCORE z) |
|---|
| 160 | // { |
|---|
| 161 | // SCORE dResult = SumLogFast(SumLogFast(w, x), SumLogFast(y, z)); |
|---|
| 162 | // assert(SumLog(w, x, y, z) == dResult); |
|---|
| 163 | // return dResult; |
|---|
| 164 | // } |
|---|
| 165 | |
|---|
| 166 | double VecSum(const double v[], unsigned n) |
|---|
| 167 | { |
|---|
| 168 | double dSum = 0.0; |
|---|
| 169 | for (unsigned i = 0; i < n; ++i) |
|---|
| 170 | dSum += v[i]; |
|---|
| 171 | return dSum; |
|---|
| 172 | } |
|---|
| 173 | |
|---|
| 174 | void Normalize(PROB p[], unsigned n) |
|---|
| 175 | { |
|---|
| 176 | unsigned i; |
|---|
| 177 | PROB dSum = 0.0; |
|---|
| 178 | for (i = 0; i < n; ++i) |
|---|
| 179 | dSum += p[i]; |
|---|
| 180 | if (0.0 == dSum) |
|---|
| 181 | Quit("Normalize, sum=0"); |
|---|
| 182 | for (i = 0; i < n; ++i) |
|---|
| 183 | p[i] /= dSum; |
|---|
| 184 | } |
|---|
| 185 | |
|---|
| 186 | void NormalizeUnlessZero(PROB p[], unsigned n) |
|---|
| 187 | { |
|---|
| 188 | unsigned i; |
|---|
| 189 | PROB dSum = 0.0; |
|---|
| 190 | for (i = 0; i < n; ++i) |
|---|
| 191 | dSum += p[i]; |
|---|
| 192 | if (0.0 == dSum) |
|---|
| 193 | return; |
|---|
| 194 | for (i = 0; i < n; ++i) |
|---|
| 195 | p[i] /= dSum; |
|---|
| 196 | } |
|---|
| 197 | |
|---|
| 198 | void Normalize(PROB p[], unsigned n, double dRequiredTotal) |
|---|
| 199 | { |
|---|
| 200 | unsigned i; |
|---|
| 201 | double dSum = 0.0; |
|---|
| 202 | for (i = 0; i < n; ++i) |
|---|
| 203 | dSum += p[i]; |
|---|
| 204 | if (0.0 == dSum) |
|---|
| 205 | Quit("Normalize, sum=0"); |
|---|
| 206 | double dFactor = dRequiredTotal / dSum; |
|---|
| 207 | for (i = 0; i < n; ++i) |
|---|
| 208 | p[i] *= (PROB) dFactor; |
|---|
| 209 | } |
|---|
| 210 | |
|---|
| 211 | bool VectorIsZero(const double dValues[], unsigned n) |
|---|
| 212 | { |
|---|
| 213 | for (unsigned i = 0; i < n; ++i) |
|---|
| 214 | if (dValues[i] != 0.0) |
|---|
| 215 | return false; |
|---|
| 216 | return true; |
|---|
| 217 | } |
|---|
| 218 | |
|---|
| 219 | void VectorSet(double dValues[], unsigned n, double d) |
|---|
| 220 | { |
|---|
| 221 | for (unsigned i = 0; i < n; ++i) |
|---|
| 222 | dValues[i] = d; |
|---|
| 223 | } |
|---|
| 224 | |
|---|
| 225 | bool VectorIsZero(const float dValues[], unsigned n) |
|---|
| 226 | { |
|---|
| 227 | for (unsigned i = 0; i < n; ++i) |
|---|
| 228 | if (dValues[i] != 0.0) |
|---|
| 229 | return false; |
|---|
| 230 | return true; |
|---|
| 231 | } |
|---|
| 232 | |
|---|
| 233 | void VectorSet(float dValues[], unsigned n, float d) |
|---|
| 234 | { |
|---|
| 235 | for (unsigned i = 0; i < n; ++i) |
|---|
| 236 | dValues[i] = d; |
|---|
| 237 | } |
|---|
| 238 | |
|---|
| 239 | double Correl(const double P[], const double Q[], unsigned uCount) |
|---|
| 240 | { |
|---|
| 241 | double dSumP = 0.0; |
|---|
| 242 | double dSumQ = 0.0; |
|---|
| 243 | for (unsigned n = 0; n < uCount; ++n) |
|---|
| 244 | { |
|---|
| 245 | dSumP += P[n]; |
|---|
| 246 | dSumQ += Q[n]; |
|---|
| 247 | } |
|---|
| 248 | const double dMeanP = dSumP/uCount; |
|---|
| 249 | const double dMeanQ = dSumQ/uCount; |
|---|
| 250 | |
|---|
| 251 | double dSum1 = 0.0; |
|---|
| 252 | double dSum2 = 0.0; |
|---|
| 253 | double dSum3 = 0.0; |
|---|
| 254 | for (unsigned n = 0; n < uCount; ++n) |
|---|
| 255 | { |
|---|
| 256 | const double dDiffP = P[n] - dMeanP; |
|---|
| 257 | const double dDiffQ = Q[n] - dMeanQ; |
|---|
| 258 | dSum1 += dDiffP*dDiffQ; |
|---|
| 259 | dSum2 += dDiffP*dDiffP; |
|---|
| 260 | dSum3 += dDiffQ*dDiffQ; |
|---|
| 261 | } |
|---|
| 262 | if (0 == dSum1) |
|---|
| 263 | return 0; |
|---|
| 264 | const double dCorrel = dSum1 / sqrt(dSum2*dSum3); |
|---|
| 265 | return dCorrel; |
|---|
| 266 | } |
|---|
| 267 | |
|---|
| 268 | float Correl(const float P[], const float Q[], unsigned uCount) |
|---|
| 269 | { |
|---|
| 270 | float dSumP = 0.0; |
|---|
| 271 | float dSumQ = 0.0; |
|---|
| 272 | for (unsigned n = 0; n < uCount; ++n) |
|---|
| 273 | { |
|---|
| 274 | dSumP += P[n]; |
|---|
| 275 | dSumQ += Q[n]; |
|---|
| 276 | } |
|---|
| 277 | const float dMeanP = dSumP/uCount; |
|---|
| 278 | const float dMeanQ = dSumQ/uCount; |
|---|
| 279 | |
|---|
| 280 | float dSum1 = 0.0; |
|---|
| 281 | float dSum2 = 0.0; |
|---|
| 282 | float dSum3 = 0.0; |
|---|
| 283 | for (unsigned n = 0; n < uCount; ++n) |
|---|
| 284 | { |
|---|
| 285 | const float dDiffP = P[n] - dMeanP; |
|---|
| 286 | const float dDiffQ = Q[n] - dMeanQ; |
|---|
| 287 | dSum1 += dDiffP*dDiffQ; |
|---|
| 288 | dSum2 += dDiffP*dDiffP; |
|---|
| 289 | dSum3 += dDiffQ*dDiffQ; |
|---|
| 290 | } |
|---|
| 291 | if (0 == dSum1) |
|---|
| 292 | return 0; |
|---|
| 293 | const float dCorrel = dSum1 / (float) sqrt(dSum2*dSum3); |
|---|
| 294 | return dCorrel; |
|---|
| 295 | } |
|---|
| 296 | |
|---|
| 297 | // Simple (but slow) function to compute Pearson ranks |
|---|
| 298 | // that allows for ties. Correctness and simplicity |
|---|
| 299 | // are priorities over speed here. |
|---|
| 300 | void Rank(const float P[], float Ranks[], unsigned uCount) |
|---|
| 301 | { |
|---|
| 302 | for (unsigned n = 0; n < uCount; ++n) |
|---|
| 303 | { |
|---|
| 304 | unsigned uNumberGreater = 0; |
|---|
| 305 | unsigned uNumberEqual = 0; |
|---|
| 306 | unsigned uNumberLess = 0; |
|---|
| 307 | double dValue = P[n]; |
|---|
| 308 | for (unsigned i = 0; i < uCount; ++i) |
|---|
| 309 | { |
|---|
| 310 | double v = P[i]; |
|---|
| 311 | if (v == dValue) |
|---|
| 312 | ++uNumberEqual; |
|---|
| 313 | else if (v < dValue) |
|---|
| 314 | ++uNumberLess; |
|---|
| 315 | else |
|---|
| 316 | ++uNumberGreater; |
|---|
| 317 | } |
|---|
| 318 | assert(uNumberEqual >= 1); |
|---|
| 319 | assert(uNumberEqual + uNumberLess + uNumberGreater == uCount); |
|---|
| 320 | Ranks[n] = (float) (1 + uNumberLess + (uNumberEqual - 1)/2.0); |
|---|
| 321 | } |
|---|
| 322 | } |
|---|
| 323 | |
|---|
| 324 | void Rank(const double P[], double Ranks[], unsigned uCount) |
|---|
| 325 | { |
|---|
| 326 | for (unsigned n = 0; n < uCount; ++n) |
|---|
| 327 | { |
|---|
| 328 | unsigned uNumberGreater = 0; |
|---|
| 329 | unsigned uNumberEqual = 0; |
|---|
| 330 | unsigned uNumberLess = 0; |
|---|
| 331 | double dValue = P[n]; |
|---|
| 332 | for (unsigned i = 0; i < uCount; ++i) |
|---|
| 333 | { |
|---|
| 334 | double v = P[i]; |
|---|
| 335 | if (v == dValue) |
|---|
| 336 | ++uNumberEqual; |
|---|
| 337 | else if (v < dValue) |
|---|
| 338 | ++uNumberLess; |
|---|
| 339 | else |
|---|
| 340 | ++uNumberGreater; |
|---|
| 341 | } |
|---|
| 342 | assert(uNumberEqual >= 1); |
|---|
| 343 | assert(uNumberEqual + uNumberLess + uNumberGreater == uCount); |
|---|
| 344 | Ranks[n] = (double) (1 + uNumberLess + (uNumberEqual - 1)/2.0); |
|---|
| 345 | } |
|---|
| 346 | } |
|---|
| 347 | |
|---|
| 348 | FCOUNT SumCounts(const FCOUNT Counts[]) |
|---|
| 349 | { |
|---|
| 350 | FCOUNT Sum = 0; |
|---|
| 351 | for (int i = 0; i < 20; ++i) |
|---|
| 352 | Sum += Counts[i]; |
|---|
| 353 | return Sum; |
|---|
| 354 | } |
|---|