| 1 | #include "Globaldp.hpp" |
|---|
| 2 | |
|---|
| 3 | namespace MXSCARNA { |
|---|
| 4 | |
|---|
| 5 | double Globaldp::ribosum_matrix[16][16] |
|---|
| 6 | = { { -2.49, -7.04, -8.24, -4.32, -8.84, -14.37, -4.68, -12.64, -6.86, -5.03, -8.39, -5.84, -4.01, -11.32, -6.16, -9.05 }, |
|---|
| 7 | { -7.04, -2.11, -8.89, -2.04, -9.37, -9.08, -5.86, -10.45, -9.73, -3.81, -11.05, -4.72, -5.33, -8.67, -6.93, -7.83 }, |
|---|
| 8 | { -8.24, -8.89, -0.80, -5.13, -10.41, -14.53, -4.57, -10.14, -8.61, -5.77, -5.38, -6.60, -5.43, -8.87, -5.94, -11.07 }, |
|---|
| 9 | { -4.32, -2.04, -5.13, 4.49, -5.56, -6.71, 1.67, -5.17, -5.33, 2.70, -5.61, 0.59, 1.61, -4.81, -0.51, -2.98 }, |
|---|
| 10 | { -8.84, -9.37, -10.41, -5.56, -5.13, -10.45, -3.57, -8.49, -7.98, -5.95, -11.36, -7.93, -2.42, -7.08, -5.63, -8.39 }, |
|---|
| 11 | { -14.37, -9.08, -14.53, -6.71, -10.45, -3.59, -5.71, -5.77, -12.43, -3.70, -12.58, -7.88, -6.88, -7.40, -8.41, -5.41 }, |
|---|
| 12 | { -4.68, -5.86, -4.57, 1.67, -3.57, -5.71, 5.36, -4.96, -6.00, 2.11, -4.66, -0.27, 2.75, -4.91, 1.32, -3.67 }, |
|---|
| 13 | { -12.64, -10.45, -10.14, -5.17, -8.49, -5.77, -4.96, -2.28, -7.71, -5.84, -13.69, -5.61, -4.72, -3.83, -7.36, -5.21 }, |
|---|
| 14 | { -6.86, -9.73, -8.61, -5.33, -7.98, -12.43, -6.00, -7.71, -1.05, -4.88, -8.67, -6.10, -5.85, -6.63, -7.55, -11.54 }, |
|---|
| 15 | { -5.03, -3.81, -5.77, 2.70, -5.95, -3.70, 2.11, -5.84, -4.88, 5.62, -4.13, 1.21, 1.60, -4.49, -0.08, -3.90 }, |
|---|
| 16 | { -8.39, -11.05, -5.38, -5.61, -11.36, -12.58, -4.66, -13.69, -8.67, -4.13, -1.98, -5.77, -5.75, -12.01, -4.27, -10.79 }, |
|---|
| 17 | { -5.84, -4.72, -6.60, 0.59, -7.93, -7.88, -0.27, -5.61, -6.10, 1.21, -5.77, 3.47, -0.57, -5.30, -2.09, -4.45 }, |
|---|
| 18 | { -4.01, -5.33, -5.43, 1.61, -2.42, -6.88, 2.75, -4.72, -5.85, 1.60, -5.75, -0.57, 4.97, -2.98, 1.14, -3.39 }, |
|---|
| 19 | { -11.32, -8.67, -8.87, -4.81, -7.08, -7.40, -4.91, -3.83, -6.63, -4.49, -12.01, -5.30, -2.98, -3.21, -4.76, -5.97 }, |
|---|
| 20 | { -6.16, -6.93, -5.94, -0.51, -5.63, -8.41, 1.32, -7.36, -7.55, -0.08, -4.27, -2.09, 1.14, -4.76, 3.36, -4.28 }, |
|---|
| 21 | { -9.05, -7.83, -11.07, -2.98, -8.39, -5.41, -3.67, -5.21, -11.54, -3.90, -10.79, -4.45, -3.39, -5.97, -4.28, -0.02 } |
|---|
| 22 | }; |
|---|
| 23 | |
|---|
| 24 | |
|---|
| 25 | Trimat<float>* |
|---|
| 26 | Globaldp:: |
|---|
| 27 | makeProfileBPPMatrix(const MultiSequence *Sequences) |
|---|
| 28 | { |
|---|
| 29 | int length = Sequences->GetSequence(0)->GetLength(); |
|---|
| 30 | float thr = THR; |
|---|
| 31 | Trimat<float> *consBppMat = new Trimat<float>(length + 1); |
|---|
| 32 | fill(consBppMat->begin(), consBppMat->end(), 0); |
|---|
| 33 | |
|---|
| 34 | int number = Sequences->GetNumSequences(); |
|---|
| 35 | for(int seqNum = 0; seqNum < number; seqNum++) { |
|---|
| 36 | SafeVector<int> *tmpMap = Sequences->GetSequence(seqNum)->GetMappingNumber(); |
|---|
| 37 | int label = Sequences->GetSequence(seqNum)->GetLabel(); |
|---|
| 38 | BPPMatrix *tmpBppMatrix = BPPMatrices[label]; |
|---|
| 39 | |
|---|
| 40 | for(int i = 1; i <= length ; i++) { |
|---|
| 41 | int originI = tmpMap->at(i); |
|---|
| 42 | for(int j = i; j <= length; j++) { |
|---|
| 43 | int originJ = tmpMap->at(j); |
|---|
| 44 | if(originI != 0 && originJ != 0) { |
|---|
| 45 | float tmpProb = tmpBppMatrix->GetProb(originI, originJ); |
|---|
| 46 | |
|---|
| 47 | if(tmpProb >= thr) { |
|---|
| 48 | consBppMat->ref(i, j) += tmpProb; |
|---|
| 49 | } |
|---|
| 50 | } |
|---|
| 51 | } |
|---|
| 52 | } |
|---|
| 53 | } |
|---|
| 54 | |
|---|
| 55 | /* compute the mean of base pairing probability */ |
|---|
| 56 | for(int i = 1; i <= length; i++) { |
|---|
| 57 | for(int j = i; j <= length; j++) { |
|---|
| 58 | consBppMat->ref(i,j) = consBppMat->ref(i,j)/(float)number; |
|---|
| 59 | } |
|---|
| 60 | } |
|---|
| 61 | |
|---|
| 62 | return consBppMat; |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | float |
|---|
| 66 | Globaldp:: |
|---|
| 67 | incrementalScorePSCPair(const StemCandidate &sc1, const StemCandidate &sc2) |
|---|
| 68 | { |
|---|
| 69 | int wordLength = WORDLENGTH; |
|---|
| 70 | |
|---|
| 71 | int pos1, rvpos1; |
|---|
| 72 | if (sc1.GetDistance() > 0) { |
|---|
| 73 | pos1 = sc1.GetPosition(); |
|---|
| 74 | rvpos1 = sc1.GetRvposition(); |
|---|
| 75 | } |
|---|
| 76 | else { |
|---|
| 77 | pos1 = sc1.GetRvposition(); |
|---|
| 78 | rvpos1 = sc1.GetPosition(); |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | int pos2, rvpos2; |
|---|
| 82 | if (sc2.GetDistance() > 0) { |
|---|
| 83 | pos2 = sc2.GetPosition(); |
|---|
| 84 | rvpos2 = sc2.GetRvposition(); |
|---|
| 85 | } |
|---|
| 86 | else { |
|---|
| 87 | pos2 = sc2.GetRvposition(); |
|---|
| 88 | rvpos2 = sc2.GetPosition(); |
|---|
| 89 | } |
|---|
| 90 | /* |
|---|
| 91 | cout << "1:" << i << " " << j << " " << sc1.GetDistance() << " " << sc2.GetDistance() << endl; |
|---|
| 92 | cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc2.GetPosition() << " " << sc2.GetRvposition() << endl; |
|---|
| 93 | */ |
|---|
| 94 | float score = |
|---|
| 95 | posterior[sc1.GetPosition() + wordLength - 1][sc2.GetPosition() + wordLength - 1] |
|---|
| 96 | // * sc1.GetBaseScore(wordLength - 1) |
|---|
| 97 | * profileBPPMat1->ref(pos1 + wordLength - 1, rvpos1) |
|---|
| 98 | * posterior[sc1.GetRvposition()][sc2.GetRvposition()] |
|---|
| 99 | // * sc2.GetBaseScore(wordLength - 1); |
|---|
| 100 | * profileBPPMat2->ref(pos2 + wordLength - 1, rvpos2); |
|---|
| 101 | |
|---|
| 102 | |
|---|
| 103 | /* |
|---|
| 104 | incrementalWordSimilarity(sc1, sc2, i, j) |
|---|
| 105 | + incrementalProbabilityScore(sc1, sc2) * multiDeltaScore |
|---|
| 106 | + incrementalStackingScore(sc1, sc2) * multiDeltaStacking; |
|---|
| 107 | */ |
|---|
| 108 | |
|---|
| 109 | return score*sc1.GetNumSeq()*sc2.GetNumSeq(); |
|---|
| 110 | } |
|---|
| 111 | |
|---|
| 112 | float |
|---|
| 113 | Globaldp:: |
|---|
| 114 | incrementalProbabilityScore(const StemCandidate &sc1, const StemCandidate &sc2) |
|---|
| 115 | { |
|---|
| 116 | int wordLength = WORDLENGTH; |
|---|
| 117 | return sc1.GetBaseScore(wordLength-1) + sc2.GetBaseScore(wordLength-1); |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | float |
|---|
| 121 | Globaldp:: |
|---|
| 122 | incrementalStackingScore(const StemCandidate &sc1, const StemCandidate &sc2) |
|---|
| 123 | { |
|---|
| 124 | return - (sc1.GetStacking() + sc2.GetStacking()); |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | float |
|---|
| 128 | Globaldp:: |
|---|
| 129 | incrementalWordSimilarity(const StemCandidate &sc1, const StemCandidate &sc2, int i, int j) |
|---|
| 130 | { |
|---|
| 131 | int numSeq1 = sc1.GetNumSeq(); |
|---|
| 132 | int numSeq2 = sc2.GetNumSeq(); |
|---|
| 133 | |
|---|
| 134 | float score = 0; |
|---|
| 135 | |
|---|
| 136 | for(int k = 0; k < 16; k++) { |
|---|
| 137 | for(int l = 0; l < 16; l++) { |
|---|
| 138 | score += countContConp1[k][i]*countContConp2[l][j]*ribosum_matrix[k][l]; |
|---|
| 139 | } |
|---|
| 140 | } |
|---|
| 141 | |
|---|
| 142 | return score/(numSeq1*numSeq2); |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | float |
|---|
| 146 | Globaldp:: |
|---|
| 147 | scorePSCPair(const StemCandidate &sc1, const StemCandidate &sc2) |
|---|
| 148 | { |
|---|
| 149 | |
|---|
| 150 | |
|---|
| 151 | int wordLength = WORDLENGTH; |
|---|
| 152 | float score = 0; |
|---|
| 153 | |
|---|
| 154 | int pos1, rvpos1; |
|---|
| 155 | if (sc1.GetDistance() > 0) { |
|---|
| 156 | pos1 = sc1.GetPosition(); |
|---|
| 157 | rvpos1 = sc1.GetRvposition(); |
|---|
| 158 | } |
|---|
| 159 | else { |
|---|
| 160 | pos1 = sc1.GetRvposition(); |
|---|
| 161 | rvpos1 = sc1.GetPosition(); |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | int pos2, rvpos2; |
|---|
| 165 | if (sc2.GetDistance() > 0) { |
|---|
| 166 | pos2 = sc2.GetPosition(); |
|---|
| 167 | rvpos2 = sc2.GetRvposition(); |
|---|
| 168 | } |
|---|
| 169 | else { |
|---|
| 170 | pos2 = sc2.GetRvposition(); |
|---|
| 171 | rvpos2 = sc2.GetPosition(); |
|---|
| 172 | } |
|---|
| 173 | |
|---|
| 174 | for (int k = 0; k < wordLength; k++) { |
|---|
| 175 | score += |
|---|
| 176 | posterior[sc1.GetPosition() + k][sc2.GetPosition() + k] |
|---|
| 177 | * profileBPPMat1->ref(pos1 + k, rvpos1 + wordLength - 1 - k) |
|---|
| 178 | // * sc1.GetBaseScore(k) |
|---|
| 179 | * posterior[sc1.GetRvposition() + wordLength - 1 - k][sc2.GetRvposition() + wordLength - 1 - k] |
|---|
| 180 | // * sc2.GetBaseScore(k); |
|---|
| 181 | * profileBPPMat2->ref(pos2 + k, rvpos2 + wordLength - 1 - k); |
|---|
| 182 | } |
|---|
| 183 | // validation code |
|---|
| 184 | /* |
|---|
| 185 | if (profileBPPMat1->ref(pos1 , rvpos1 + wordLength - 1) != sc1.GetBaseScore(0)) { |
|---|
| 186 | cout << "1 " << profileBPPMat1->ref(pos1, rvpos1 + wordLength - 1) << " " << sc1.GetBaseScore(0) << endl; |
|---|
| 187 | } |
|---|
| 188 | if (profileBPPMat2->ref(pos2, rvpos2 + wordLength - 1) != sc2.GetBaseScore(0)) { |
|---|
| 189 | cout << profileBPPMat2->ref(pos2, rvpos2 + wordLength - 1) << " " << sc2.GetBaseScore(0) << endl; |
|---|
| 190 | } |
|---|
| 191 | if (profileBPPMat1->ref(pos1 + 1, rvpos1 + wordLength - 1 - 1) != sc1.GetBaseScore(1)) { |
|---|
| 192 | cout << "1 " << profileBPPMat1->ref(pos1 + 1, rvpos1 + wordLength - 1 - 1) << " " << sc1.GetBaseScore(1) << endl; |
|---|
| 193 | } |
|---|
| 194 | if (profileBPPMat2->ref(pos2 + 1, rvpos2 + wordLength - 1 - 1) != sc2.GetBaseScore(1)) { |
|---|
| 195 | cout << profileBPPMat2->ref(pos2 + 1, rvpos2 + wordLength - 1 - 1) << " " << sc2.GetBaseScore(1) << endl; |
|---|
| 196 | }*/ |
|---|
| 197 | |
|---|
| 198 | // cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc1.GetDistance() << endl; |
|---|
| 199 | // cout << sc2.GetPosition() << " " << sc2.GetRvposition() << " " << sc2.GetDistance() << endl; |
|---|
| 200 | /* |
|---|
| 201 | wordSimilarity(sc1, sc2, i, j) |
|---|
| 202 | + probabilityScore(sc1, sc2) * multiScore |
|---|
| 203 | + stackingScore(sc1, sc2) * multiStacking |
|---|
| 204 | |
|---|
| 205 | */ |
|---|
| 206 | /* |
|---|
| 207 | if (sc1.GetDistance() < 0 && sc2.GetDistance() < 0) { |
|---|
| 208 | cout << "2:" << i << " " << j << " " << sc1.GetDistance() << " " << sc2.GetDistance() << endl; |
|---|
| 209 | cout << sc1.GetPosition() << " " << sc1.GetRvposition() << " " << sc2.GetPosition() << " " << sc2.GetRvposition() << endl; |
|---|
| 210 | cout << "2:score" << score << endl; |
|---|
| 211 | |
|---|
| 212 | } |
|---|
| 213 | */ |
|---|
| 214 | return score*sc1.GetNumSeq()*sc2.GetNumSeq(); |
|---|
| 215 | } |
|---|
| 216 | |
|---|
| 217 | float |
|---|
| 218 | Globaldp:: |
|---|
| 219 | wordSimilarity(const StemCandidate &sc1, const StemCandidate &sc2, int i, int j) |
|---|
| 220 | { |
|---|
| 221 | int wordLength = WORDLENGTH; |
|---|
| 222 | |
|---|
| 223 | int numSeq1 = sc1.GetNumSeq(); |
|---|
| 224 | int numSeq2 = sc2.GetNumSeq(); |
|---|
| 225 | |
|---|
| 226 | float score = 0; |
|---|
| 227 | |
|---|
| 228 | for(int k = 0; k < 16; k++) { |
|---|
| 229 | for(int l = 0; l < 16; l++) { |
|---|
| 230 | for(int iter = 0; iter < wordLength; iter++) { |
|---|
| 231 | score += countConp1[iter][k][i]*countConp2[iter][l][j]*ribosum_matrix[k][l]; |
|---|
| 232 | } |
|---|
| 233 | } |
|---|
| 234 | } |
|---|
| 235 | |
|---|
| 236 | return score/(numSeq1*numSeq2); |
|---|
| 237 | } |
|---|
| 238 | |
|---|
| 239 | int |
|---|
| 240 | Globaldp:: |
|---|
| 241 | returnBasePairType(char s, char r) |
|---|
| 242 | { |
|---|
| 243 | if (s == 'A') { |
|---|
| 244 | if (r == 'A') return 0; |
|---|
| 245 | else if (r == 'C') return 1; |
|---|
| 246 | else if (r == 'G') return 2; |
|---|
| 247 | else if (r == 'U') return 3; |
|---|
| 248 | } |
|---|
| 249 | else if (s == 'C') { |
|---|
| 250 | if (r == 'A') return 4; |
|---|
| 251 | else if (r == 'C') return 5; |
|---|
| 252 | else if (r == 'G') return 6; |
|---|
| 253 | else if (r == 'U') return 7; |
|---|
| 254 | } |
|---|
| 255 | else if (s == 'G') { |
|---|
| 256 | if (r == 'A') return 8; |
|---|
| 257 | else if (r == 'C') return 9; |
|---|
| 258 | else if (r == 'G') return 10; |
|---|
| 259 | else if (r == 'U') return 11; |
|---|
| 260 | } |
|---|
| 261 | else if (s == 'U') { |
|---|
| 262 | if (r == 'A') return 12; |
|---|
| 263 | else if (r == 'C') return 13; |
|---|
| 264 | else if (r == 'G') return 14; |
|---|
| 265 | else if (r == 'U') return 15; |
|---|
| 266 | } |
|---|
| 267 | |
|---|
| 268 | return 16; |
|---|
| 269 | } |
|---|
| 270 | |
|---|
| 271 | float |
|---|
| 272 | Globaldp:: |
|---|
| 273 | probabilityScore(const StemCandidate &sc1, const StemCandidate &sc2) |
|---|
| 274 | { |
|---|
| 275 | return sc1.GetScore() + sc2.GetScore(); |
|---|
| 276 | } |
|---|
| 277 | |
|---|
| 278 | float |
|---|
| 279 | Globaldp:: |
|---|
| 280 | stackingScore(const StemCandidate &sc1, const StemCandidate &sc2) |
|---|
| 281 | { |
|---|
| 282 | return - (sc1.GetStemStacking() + sc2.GetStemStacking()); |
|---|
| 283 | } |
|---|
| 284 | |
|---|
| 285 | float |
|---|
| 286 | Globaldp:: |
|---|
| 287 | distancePenalty(const StemCandidate &sc1, const StemCandidate &sc2) |
|---|
| 288 | { |
|---|
| 289 | return std::sqrt((float)std::abs(sc1.GetDistance() - sc2.GetDistance())); |
|---|
| 290 | } |
|---|
| 291 | |
|---|
| 292 | float |
|---|
| 293 | Globaldp:: |
|---|
| 294 | Run() |
|---|
| 295 | { |
|---|
| 296 | Initialize(); |
|---|
| 297 | DP(); |
|---|
| 298 | float score = traceBack(); |
|---|
| 299 | |
|---|
| 300 | // printMatch(); |
|---|
| 301 | //cout << "score=" << score << endl; |
|---|
| 302 | return score; |
|---|
| 303 | } |
|---|
| 304 | |
|---|
| 305 | void |
|---|
| 306 | Globaldp:: |
|---|
| 307 | Initialize() |
|---|
| 308 | { |
|---|
| 309 | int nPscs1 = pscs1->size(); |
|---|
| 310 | int nPscs2 = pscs2->size(); |
|---|
| 311 | |
|---|
| 312 | |
|---|
| 313 | for(int i = 0; i < nPscs1; i++) { |
|---|
| 314 | for(int j = 0; j < nPscs2; j++) { |
|---|
| 315 | VM[i][j] = 0; |
|---|
| 316 | VG[i][j] = 0; |
|---|
| 317 | } |
|---|
| 318 | } |
|---|
| 319 | |
|---|
| 320 | VM[0][0] = 0; |
|---|
| 321 | VG[0][0] = IMPOSSIBLE; |
|---|
| 322 | |
|---|
| 323 | for (int i = 1; i < nPscs1; i++) { |
|---|
| 324 | VM[i][0] = IMPOSSIBLE; VG[i][0] = 0; |
|---|
| 325 | } |
|---|
| 326 | for (int j = 1; j < nPscs2; j++) { |
|---|
| 327 | VM[0][j] = IMPOSSIBLE; VG[0][j] = 0; |
|---|
| 328 | } |
|---|
| 329 | |
|---|
| 330 | for (int i = 0; i < nPscs1; i++) { |
|---|
| 331 | for (int j = 0; j < nPscs2; j++) { |
|---|
| 332 | traceMI[i][j] = 0; traceMJ[i][j] = 0; |
|---|
| 333 | traceGI[i][j] = 0; traceGJ[i][j] = 0; |
|---|
| 334 | } |
|---|
| 335 | } |
|---|
| 336 | |
|---|
| 337 | int wordLength = WORDLENGTH; |
|---|
| 338 | int size1 = pscs1->size(); |
|---|
| 339 | int size2 = pscs2->size(); |
|---|
| 340 | |
|---|
| 341 | for(int i = 0; i < wordLength; i++) { |
|---|
| 342 | for(int j = 0; j < 17; j++) { |
|---|
| 343 | for(int k = 0; k < (int)pscs1->size(); k++) { |
|---|
| 344 | countConp1[i][j][k] = 0; |
|---|
| 345 | } |
|---|
| 346 | } |
|---|
| 347 | } |
|---|
| 348 | for(int i = 0; i < wordLength; i++) { |
|---|
| 349 | for(int j = 0; j < 17; j++) { |
|---|
| 350 | for(int k = 0; k < (int)pscs2->size(); k++) { |
|---|
| 351 | countConp2[i][j][k] = 0; |
|---|
| 352 | } |
|---|
| 353 | } |
|---|
| 354 | } |
|---|
| 355 | for(int i = 0; i < 17; i++) { |
|---|
| 356 | for(int j = 0; j < (int)pscs1->size()+1; j++) { |
|---|
| 357 | countContConp1[i][j] = 0; |
|---|
| 358 | } |
|---|
| 359 | } |
|---|
| 360 | for(int i = 0; i < 17; i++) { |
|---|
| 361 | for(int j = 0; j < (int)pscs2->size()+1; j++) { |
|---|
| 362 | countContConp2[i][j] = 0; |
|---|
| 363 | } |
|---|
| 364 | } |
|---|
| 365 | |
|---|
| 366 | for(int iter = 1; iter < size1; iter++) { |
|---|
| 367 | |
|---|
| 368 | const StemCandidate &sc1 = pscs1->at(iter); |
|---|
| 369 | int numSeq1 = sc1.GetNumSeq(); |
|---|
| 370 | for (int i = 0; i < wordLength; i++) { |
|---|
| 371 | |
|---|
| 372 | for(int k = 0; k < numSeq1; k++) { |
|---|
| 373 | // const Sequence *seq = seqs1->GetSequence(k); |
|---|
| 374 | string substr = sc1.GetSubstr(k); |
|---|
| 375 | string rvstr = sc1.GetRvstr(k); |
|---|
| 376 | |
|---|
| 377 | char sChar = substr[i]; |
|---|
| 378 | char rChar = rvstr[wordLength - 1 -i]; |
|---|
| 379 | |
|---|
| 380 | int type = returnBasePairType(sChar, rChar); |
|---|
| 381 | ++countConp1[i][type][iter]; |
|---|
| 382 | } |
|---|
| 383 | } |
|---|
| 384 | for(int k = 0; k < numSeq1; k++) { |
|---|
| 385 | // const Sequence *seq = seqs1->GetSequence(k); |
|---|
| 386 | string substr = sc1.GetSubstr(k); |
|---|
| 387 | string rvstr = sc1.GetRvstr(k); |
|---|
| 388 | |
|---|
| 389 | char sChar = substr[wordLength-1]; |
|---|
| 390 | char rChar = rvstr[0]; |
|---|
| 391 | |
|---|
| 392 | int type = returnBasePairType(sChar, rChar); |
|---|
| 393 | ++countContConp1[type][iter]; |
|---|
| 394 | |
|---|
| 395 | } |
|---|
| 396 | } |
|---|
| 397 | for(int iter = 1; iter < size2; iter++) { |
|---|
| 398 | const StemCandidate &sc2 = pscs2->at(iter); |
|---|
| 399 | int numSeq2 = sc2.GetNumSeq(); |
|---|
| 400 | for (int i = 0; i < wordLength; i++) { |
|---|
| 401 | |
|---|
| 402 | for(int k = 0; k < numSeq2; k++) { |
|---|
| 403 | // const Sequence *seq = seqs2->GetSequence(k); |
|---|
| 404 | string substr = sc2.GetSubstr(k); |
|---|
| 405 | string rvstr = sc2.GetRvstr(k); |
|---|
| 406 | |
|---|
| 407 | char sChar = substr[i]; |
|---|
| 408 | char rChar = rvstr[wordLength - 1 -i]; |
|---|
| 409 | |
|---|
| 410 | int type = returnBasePairType(sChar, rChar); |
|---|
| 411 | ++countConp2[i][type][iter]; |
|---|
| 412 | } |
|---|
| 413 | } |
|---|
| 414 | for(int k = 0; k < numSeq2; k++) { |
|---|
| 415 | // const Sequence *seq = seqs2->GetSequence(k); |
|---|
| 416 | string substr = sc2.GetSubstr(k); |
|---|
| 417 | string rvstr = sc2.GetRvstr(k); |
|---|
| 418 | |
|---|
| 419 | char sChar = substr[wordLength-1]; |
|---|
| 420 | char rChar = rvstr[0]; |
|---|
| 421 | |
|---|
| 422 | int type = returnBasePairType(sChar, rChar); |
|---|
| 423 | ++countContConp2[type][iter]; |
|---|
| 424 | |
|---|
| 425 | } |
|---|
| 426 | } |
|---|
| 427 | profileBPPMat1 = makeProfileBPPMatrix(seqs1); |
|---|
| 428 | profileBPPMat2 = makeProfileBPPMatrix(seqs2); |
|---|
| 429 | } |
|---|
| 430 | |
|---|
| 431 | void |
|---|
| 432 | Globaldp:: |
|---|
| 433 | DP() |
|---|
| 434 | { |
|---|
| 435 | int nPscs1 = pscs1->size(); |
|---|
| 436 | int nPscs2 = pscs2->size(); |
|---|
| 437 | |
|---|
| 438 | for(int i = 1; i < nPscs1; i++) { |
|---|
| 439 | const StemCandidate &sc1 = pscs1->at(i); |
|---|
| 440 | |
|---|
| 441 | for(int j = 1; j < nPscs2; j++) { |
|---|
| 442 | const StemCandidate &sc2 = pscs2->at(j); |
|---|
| 443 | |
|---|
| 444 | float max = IMPOSSIBLE; |
|---|
| 445 | int traceI = 0; |
|---|
| 446 | int traceJ = 0; |
|---|
| 447 | int alpha = sc1.GetContPos(), beta = sc2.GetContPos(); |
|---|
| 448 | if( (alpha > 0) && (beta > 0) ) { |
|---|
| 449 | max = VM[alpha][beta] + incrementalScorePSCPair(sc1, sc2); |
|---|
| 450 | traceI = alpha; |
|---|
| 451 | traceJ = beta; |
|---|
| 452 | } |
|---|
| 453 | |
|---|
| 454 | float similarity = scorePSCPair(sc1, sc2); |
|---|
| 455 | int p = sc1.GetBeforePos(), q = sc2.GetBeforePos(); |
|---|
| 456 | float tmpScore = VM[p][q] + similarity; |
|---|
| 457 | // cout << i << " " << j << " " << p << " " << q << " " << tmpScore << " " << VM[p][q] << " " << similarity << " " << endl; |
|---|
| 458 | |
|---|
| 459 | if(max <= tmpScore) { |
|---|
| 460 | max = tmpScore; |
|---|
| 461 | traceI = p; |
|---|
| 462 | traceJ = q; |
|---|
| 463 | } |
|---|
| 464 | |
|---|
| 465 | tmpScore = VG[p][q] + similarity; |
|---|
| 466 | if(max <= tmpScore) { |
|---|
| 467 | max = tmpScore; |
|---|
| 468 | traceI = traceGI[p][q]; |
|---|
| 469 | traceJ = traceGJ[p][q]; |
|---|
| 470 | } |
|---|
| 471 | |
|---|
| 472 | VM[i][j] = max; |
|---|
| 473 | traceMI[i][j] = traceI; |
|---|
| 474 | traceMJ[i][j] = traceJ; |
|---|
| 475 | |
|---|
| 476 | max = VM[i][j-1]; |
|---|
| 477 | traceI = i; |
|---|
| 478 | traceJ = j-1; |
|---|
| 479 | |
|---|
| 480 | tmpScore = VM[i-1][j]; |
|---|
| 481 | if(max <= tmpScore) { |
|---|
| 482 | max = tmpScore; |
|---|
| 483 | traceI = i-1; |
|---|
| 484 | traceJ = j; |
|---|
| 485 | } |
|---|
| 486 | |
|---|
| 487 | tmpScore = VG[i-1][j]; |
|---|
| 488 | if(max <= tmpScore) { |
|---|
| 489 | max = tmpScore; |
|---|
| 490 | traceI = traceGI[i-1][j]; |
|---|
| 491 | traceJ = traceGJ[i-1][j]; |
|---|
| 492 | } |
|---|
| 493 | |
|---|
| 494 | tmpScore = VG[i][j-1]; |
|---|
| 495 | if(max <= tmpScore) { |
|---|
| 496 | max = tmpScore; |
|---|
| 497 | traceI = traceGI[i][j-1]; |
|---|
| 498 | traceJ = traceGJ[i][j-1]; |
|---|
| 499 | } |
|---|
| 500 | |
|---|
| 501 | VG[i][j] = max; |
|---|
| 502 | traceGI[i][j] = traceI; |
|---|
| 503 | traceGJ[i][j] = traceJ; |
|---|
| 504 | } |
|---|
| 505 | } |
|---|
| 506 | } |
|---|
| 507 | |
|---|
| 508 | float |
|---|
| 509 | Globaldp:: |
|---|
| 510 | traceBack() |
|---|
| 511 | { |
|---|
| 512 | int nPscs1 = pscs1->size(); |
|---|
| 513 | int nPscs2 = pscs2->size(); |
|---|
| 514 | |
|---|
| 515 | float max = IMPOSSIBLE; |
|---|
| 516 | int traceI, traceJ; |
|---|
| 517 | for (int i = 1; i < nPscs1; i++) { |
|---|
| 518 | for (int j = 1; j < nPscs2; j++) { |
|---|
| 519 | if(max < VM[i][j]) { |
|---|
| 520 | max = VM[i][j]; |
|---|
| 521 | traceI = i; |
|---|
| 522 | traceJ = j; |
|---|
| 523 | } |
|---|
| 524 | } |
|---|
| 525 | } |
|---|
| 526 | |
|---|
| 527 | while ( (traceI != 0) && (traceJ != 0) ) { |
|---|
| 528 | matchPSCS1->push_back(traceI); matchPSCS2->push_back(traceJ); |
|---|
| 529 | int tmpI = traceMI[traceI][traceJ]; |
|---|
| 530 | int tmpJ = traceMJ[traceI][traceJ]; |
|---|
| 531 | traceI = tmpI; traceJ = tmpJ; |
|---|
| 532 | } |
|---|
| 533 | |
|---|
| 534 | if(traceI != 0 && traceJ != 0) { |
|---|
| 535 | std::cout << "error in trace back of pscs:" << traceI << " " << traceJ << endl; |
|---|
| 536 | } |
|---|
| 537 | |
|---|
| 538 | return max; |
|---|
| 539 | } |
|---|
| 540 | |
|---|
| 541 | void |
|---|
| 542 | Globaldp:: |
|---|
| 543 | printMatch() |
|---|
| 544 | { |
|---|
| 545 | int size = matchPSCS1->size(); |
|---|
| 546 | for(int iter = 0; iter < size; iter++) { |
|---|
| 547 | int i = matchPSCS1->at(iter); |
|---|
| 548 | int j = matchPSCS2->at(iter); |
|---|
| 549 | |
|---|
| 550 | const StemCandidate &sc1 = pscs1->at(i); |
|---|
| 551 | const StemCandidate &sc2 = pscs2->at(j); |
|---|
| 552 | |
|---|
| 553 | std::cout << iter << "\t" << sc1.GetPosition() << "\t" << sc1.GetRvposition() << "\t" << sc2.GetPosition() << "\t" << sc2.GetRvposition() << endl; |
|---|
| 554 | } |
|---|
| 555 | |
|---|
| 556 | } |
|---|
| 557 | } |
|---|