| 1 | |
|---|
| 2 | #include "phylip.h" |
|---|
| 3 | #include "seq.h" |
|---|
| 4 | |
|---|
| 5 | /* version 3.6. (c) Copyright 1994-2002 by the University of Washington. |
|---|
| 6 | Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. |
|---|
| 7 | Permission is granted to copy and use this program provided no fee is |
|---|
| 8 | charged for it and provided that this copyright notice is not removed. */ |
|---|
| 9 | |
|---|
| 10 | #define initialv 0.1 /* starting value of branch length */ |
|---|
| 11 | #define iterationsr 20 /* how many Newton iterations per distance */ |
|---|
| 12 | |
|---|
| 13 | extern sequence y; |
|---|
| 14 | |
|---|
| 15 | #ifndef OLDC |
|---|
| 16 | /* function prototypes */ |
|---|
| 17 | void restdist_inputnumbers(void); |
|---|
| 18 | void getoptions(void); |
|---|
| 19 | void allocrest(void); |
|---|
| 20 | void doinit(void); |
|---|
| 21 | void inputoptions(void); |
|---|
| 22 | void restdist_inputdata(void); |
|---|
| 23 | void restdist_sitesort(void); |
|---|
| 24 | void restdist_sitecombine(void); |
|---|
| 25 | void makeweights(void); |
|---|
| 26 | void makev(long, long, double *); |
|---|
| 27 | void makedists(void); |
|---|
| 28 | void writedists(void); |
|---|
| 29 | void getinput(void); |
|---|
| 30 | /* function prototypes */ |
|---|
| 31 | #endif |
|---|
| 32 | |
|---|
| 33 | |
|---|
| 34 | Char infilename[FNMLNGTH], outfilename[FNMLNGTH]; |
|---|
| 35 | long sites, weightsum, datasets, ith; |
|---|
| 36 | boolean restsites, neili, gama, weights, lower, |
|---|
| 37 | progress, mulsets, firstset; |
|---|
| 38 | double ttratio, fracchange, cvi, sitelength, xi, xv; |
|---|
| 39 | double **d; |
|---|
| 40 | steptr aliasweight; |
|---|
| 41 | char *progname; |
|---|
| 42 | Char ch; |
|---|
| 43 | |
|---|
| 44 | void restdist_inputnumbers() |
|---|
| 45 | { |
|---|
| 46 | /* read and print out numbers of species and sites */ |
|---|
| 47 | fscanf(infile, "%ld%ld", &spp, &sites); |
|---|
| 48 | } /* restdist_inputnumbers */ |
|---|
| 49 | |
|---|
| 50 | |
|---|
| 51 | void getoptions() |
|---|
| 52 | { |
|---|
| 53 | /* interactively set options */ |
|---|
| 54 | long loopcount, loopcount2; |
|---|
| 55 | Char ch; |
|---|
| 56 | |
|---|
| 57 | putchar('\n'); |
|---|
| 58 | sitelength = 6.0; |
|---|
| 59 | neili = false; |
|---|
| 60 | gama = false; |
|---|
| 61 | cvi = 0.0; |
|---|
| 62 | weights = false; |
|---|
| 63 | lower = false; |
|---|
| 64 | printdata = false; |
|---|
| 65 | progress = true; |
|---|
| 66 | restsites = true; |
|---|
| 67 | interleaved = true; |
|---|
| 68 | ttratio = 2.0; |
|---|
| 69 | loopcount = 0; |
|---|
| 70 | for (;;) { |
|---|
| 71 | cleerhome(); |
|---|
| 72 | printf("\nRestriction site or fragment distances, "); |
|---|
| 73 | printf("version %s\n\n",VERSION); |
|---|
| 74 | printf("Settings for this run:\n"); |
|---|
| 75 | printf(" R Restriction sites or fragments? %s\n", |
|---|
| 76 | (restsites ? "Sites" : "Fragments")); |
|---|
| 77 | if (!restsites) |
|---|
| 78 | printf(" N Original or modified Nei/Li model? %s\n", |
|---|
| 79 | (neili ? "Original" : "Modified")); |
|---|
| 80 | if (restsites || !neili) { |
|---|
| 81 | printf(" G Gamma distribution of rates among sites?"); |
|---|
| 82 | if (!gama) |
|---|
| 83 | printf(" No\n"); |
|---|
| 84 | else |
|---|
| 85 | printf(" Yes\n"); |
|---|
| 86 | printf(" T Transition/transversion ratio? %f\n", ttratio); |
|---|
| 87 | } |
|---|
| 88 | printf(" S Site length? %4.1f\n",sitelength); |
|---|
| 89 | printf(" L Form of distance matrix? %s\n", |
|---|
| 90 | (lower ? "Lower-triangular" : "Square")); |
|---|
| 91 | printf(" M Analyze multiple data sets?"); |
|---|
| 92 | if (mulsets) |
|---|
| 93 | printf(" Yes, %2ld sets\n", datasets); |
|---|
| 94 | else |
|---|
| 95 | printf(" No\n"); |
|---|
| 96 | printf(" I Input sequences interleaved? %s\n", |
|---|
| 97 | (interleaved ? "Yes" : "No, sequential")); |
|---|
| 98 | printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", |
|---|
| 99 | ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"); |
|---|
| 100 | printf(" 1 Print out the data at start of run? %s\n", |
|---|
| 101 | (printdata ? "Yes" : "No")); |
|---|
| 102 | printf(" 2 Print indications of progress of run? %s\n", |
|---|
| 103 | (progress ? "Yes" : "No")); |
|---|
| 104 | printf("\n Y to accept these or type the letter for one to change\n"); |
|---|
| 105 | scanf("%c%*[^\n]", &ch); |
|---|
| 106 | getchar(); |
|---|
| 107 | if (ch == '\n') |
|---|
| 108 | ch = ' '; |
|---|
| 109 | uppercase(&ch); |
|---|
| 110 | if (ch == 'Y') |
|---|
| 111 | break; |
|---|
| 112 | if (strchr("RDNGTSLMI012",ch) != NULL){ |
|---|
| 113 | switch (ch) { |
|---|
| 114 | |
|---|
| 115 | case 'R': |
|---|
| 116 | restsites = !restsites; |
|---|
| 117 | break; |
|---|
| 118 | |
|---|
| 119 | case 'G': |
|---|
| 120 | if (restsites || !neili) |
|---|
| 121 | gama = !gama; |
|---|
| 122 | break; |
|---|
| 123 | |
|---|
| 124 | case 'N': |
|---|
| 125 | if (!restsites) |
|---|
| 126 | neili = !neili; |
|---|
| 127 | break; |
|---|
| 128 | |
|---|
| 129 | case 'T': |
|---|
| 130 | if (restsites || !neili) |
|---|
| 131 | initratio(&ttratio); |
|---|
| 132 | break; |
|---|
| 133 | |
|---|
| 134 | case 'S': |
|---|
| 135 | loopcount2 = 0; |
|---|
| 136 | do { |
|---|
| 137 | printf("New Sitelength?\n"); |
|---|
| 138 | scanf("%lf%*[^\n]", &sitelength); |
|---|
| 139 | getchar(); |
|---|
| 140 | if (sitelength < 1.0) |
|---|
| 141 | printf("BAD RESTRICTION SITE LENGTH: %f\n", sitelength); |
|---|
| 142 | countup(&loopcount2, 10); |
|---|
| 143 | } while (sitelength < 1.0); |
|---|
| 144 | break; |
|---|
| 145 | |
|---|
| 146 | case 'L': |
|---|
| 147 | lower = !lower; |
|---|
| 148 | break; |
|---|
| 149 | |
|---|
| 150 | case 'M': |
|---|
| 151 | mulsets = !mulsets; |
|---|
| 152 | if (mulsets) |
|---|
| 153 | initdatasets(&datasets); |
|---|
| 154 | break; |
|---|
| 155 | |
|---|
| 156 | case 'I': |
|---|
| 157 | interleaved = !interleaved; |
|---|
| 158 | break; |
|---|
| 159 | |
|---|
| 160 | case '0': |
|---|
| 161 | initterminal(&ibmpc, &ansi); |
|---|
| 162 | break; |
|---|
| 163 | |
|---|
| 164 | case '1': |
|---|
| 165 | printdata = !printdata; |
|---|
| 166 | break; |
|---|
| 167 | |
|---|
| 168 | case '2': |
|---|
| 169 | progress = !progress; |
|---|
| 170 | break; |
|---|
| 171 | |
|---|
| 172 | } |
|---|
| 173 | } else |
|---|
| 174 | printf("Not a possible option!\n"); |
|---|
| 175 | countup(&loopcount, 100); |
|---|
| 176 | } |
|---|
| 177 | if (gama) { |
|---|
| 178 | loopcount = 0; |
|---|
| 179 | do { |
|---|
| 180 | printf( |
|---|
| 181 | "\nCoefficient of variation of substitution rate among sites (must be positive)?\n"); |
|---|
| 182 | scanf("%lf%*[^\n]", &cvi); |
|---|
| 183 | getchar(); |
|---|
| 184 | countup(&loopcount, 100); |
|---|
| 185 | } while (cvi <= 0.0); |
|---|
| 186 | cvi = 1.0 / (cvi * cvi); |
|---|
| 187 | printf("\n"); |
|---|
| 188 | } |
|---|
| 189 | xi = (ttratio - 0.5)/(ttratio + 0.5); |
|---|
| 190 | xv = 1.0 - xi; |
|---|
| 191 | fracchange = xi*0.5 + xv*0.75; |
|---|
| 192 | } /* getoptions */ |
|---|
| 193 | |
|---|
| 194 | |
|---|
| 195 | void allocrest() |
|---|
| 196 | { |
|---|
| 197 | long i; |
|---|
| 198 | |
|---|
| 199 | y = (Char **)Malloc(spp*sizeof(Char *)); |
|---|
| 200 | for (i = 0; i < spp; i++) |
|---|
| 201 | y[i] = (Char *)Malloc(sites*sizeof(Char)); |
|---|
| 202 | nayme = (naym *)Malloc(spp*sizeof(naym)); |
|---|
| 203 | weight = (steptr)Malloc((sites+1)*sizeof(long)); |
|---|
| 204 | alias = (steptr)Malloc((sites+1)*sizeof(long)); |
|---|
| 205 | aliasweight = (steptr)Malloc((sites+1)*sizeof(long)); |
|---|
| 206 | d = (double **)Malloc(spp*sizeof(double *)); |
|---|
| 207 | for (i = 0; i < spp; i++) |
|---|
| 208 | d[i] = (double*)Malloc(spp*sizeof(double)); |
|---|
| 209 | } /* allocrest */ |
|---|
| 210 | |
|---|
| 211 | |
|---|
| 212 | void doinit() |
|---|
| 213 | { |
|---|
| 214 | /* initializes variables */ |
|---|
| 215 | restdist_inputnumbers(); |
|---|
| 216 | getoptions(); |
|---|
| 217 | if (printdata) |
|---|
| 218 | fprintf(outfile, "\n %4ld Species, %4ld Sites\n", spp, sites); |
|---|
| 219 | allocrest(); |
|---|
| 220 | } /* doinit */ |
|---|
| 221 | |
|---|
| 222 | |
|---|
| 223 | void inputoptions() |
|---|
| 224 | { |
|---|
| 225 | /* read the options information */ |
|---|
| 226 | Char ch; |
|---|
| 227 | long i, extranum, cursp, curst; |
|---|
| 228 | |
|---|
| 229 | if (!firstset) { |
|---|
| 230 | if (eoln(infile)) |
|---|
| 231 | scan_eoln(infile); |
|---|
| 232 | fscanf(infile, "%ld%ld", &cursp, &curst); |
|---|
| 233 | if (cursp != spp) { |
|---|
| 234 | printf("\nERROR: INCONSISTENT NUMBER OF SPECIES IN DATA SET %4ld\n", |
|---|
| 235 | ith); |
|---|
| 236 | exxit(-1); |
|---|
| 237 | } |
|---|
| 238 | sites = curst; |
|---|
| 239 | } |
|---|
| 240 | for (i = 1; i <= sites; i++) |
|---|
| 241 | weight[i] = 1; |
|---|
| 242 | weightsum = sites; |
|---|
| 243 | extranum = 0; |
|---|
| 244 | fscanf(infile, "%*[ 0-9]"); |
|---|
| 245 | readoptions(&extranum, "W"); |
|---|
| 246 | for (i = 1; i <= extranum; i++) { |
|---|
| 247 | matchoptions(&ch, "W"); |
|---|
| 248 | inputweights2(1, sites+1, &weightsum, weight, &weights, "RESTDIST"); |
|---|
| 249 | } |
|---|
| 250 | } /* inputoptions */ |
|---|
| 251 | |
|---|
| 252 | |
|---|
| 253 | void restdist_inputdata() |
|---|
| 254 | { |
|---|
| 255 | /* read the species and sites data */ |
|---|
| 256 | long i, j, k, l, sitesread = 0, sitesnew = 0; |
|---|
| 257 | Char ch; |
|---|
| 258 | boolean allread, done; |
|---|
| 259 | |
|---|
| 260 | if (printdata) |
|---|
| 261 | putc('\n', outfile); |
|---|
| 262 | j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; |
|---|
| 263 | if (j < nmlngth - 1) |
|---|
| 264 | j = nmlngth - 1; |
|---|
| 265 | if (j > 39) |
|---|
| 266 | j = 39; |
|---|
| 267 | if (printdata) { |
|---|
| 268 | fprintf(outfile, "Name"); |
|---|
| 269 | for (i = 1; i <= j; i++) |
|---|
| 270 | putc(' ', outfile); |
|---|
| 271 | fprintf(outfile, "Sites\n"); |
|---|
| 272 | fprintf(outfile, "----"); |
|---|
| 273 | for (i = 1; i <= j; i++) |
|---|
| 274 | putc(' ', outfile); |
|---|
| 275 | fprintf(outfile, "-----\n\n"); |
|---|
| 276 | } |
|---|
| 277 | sitesread = 0; |
|---|
| 278 | allread = false; |
|---|
| 279 | while (!(allread)) { |
|---|
| 280 | allread = true; |
|---|
| 281 | if (eoln(infile)) |
|---|
| 282 | scan_eoln(infile); |
|---|
| 283 | i = 1; |
|---|
| 284 | while (i <= spp ) { |
|---|
| 285 | if ((interleaved && sitesread == 0) || !interleaved) |
|---|
| 286 | initname(i - 1); |
|---|
| 287 | if (interleaved) |
|---|
| 288 | j = sitesread; |
|---|
| 289 | else |
|---|
| 290 | j = 0; |
|---|
| 291 | done = false; |
|---|
| 292 | while (!done && !eoff(infile)) { |
|---|
| 293 | if (interleaved) |
|---|
| 294 | done = true; |
|---|
| 295 | while (j < sites && !(eoln(infile) || eoff(infile))) { |
|---|
| 296 | ch = gettc(infile); |
|---|
| 297 | if (ch == '\n') |
|---|
| 298 | ch = ' '; |
|---|
| 299 | if (ch == ' ') |
|---|
| 300 | continue; |
|---|
| 301 | uppercase(&ch); |
|---|
| 302 | if (ch != '1' && ch != '0' && ch != '+' && ch != '-' && ch != '?') { |
|---|
| 303 | printf(" ERROR -- Bad symbol %c",ch); |
|---|
| 304 | printf(" at position %ld of species %ld\n", j+1, i); |
|---|
| 305 | exxit(-1); |
|---|
| 306 | } |
|---|
| 307 | if (ch == '1') |
|---|
| 308 | ch = '+'; |
|---|
| 309 | if (ch == '0') |
|---|
| 310 | ch = '-'; |
|---|
| 311 | j++; |
|---|
| 312 | y[i - 1][j - 1] = ch; |
|---|
| 313 | } |
|---|
| 314 | if (interleaved) |
|---|
| 315 | continue; |
|---|
| 316 | if (j < sites) |
|---|
| 317 | scan_eoln(infile); |
|---|
| 318 | else if (j == sites) |
|---|
| 319 | done = true; |
|---|
| 320 | } |
|---|
| 321 | if (interleaved && i == 1) |
|---|
| 322 | sitesnew = j; |
|---|
| 323 | scan_eoln(infile); |
|---|
| 324 | if ((interleaved && j != sitesnew ) || ((!interleaved) && j != sites)){ |
|---|
| 325 | printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n"); |
|---|
| 326 | exxit(-1);} |
|---|
| 327 | i++; |
|---|
| 328 | } |
|---|
| 329 | if (interleaved) { |
|---|
| 330 | sitesread = sitesnew; |
|---|
| 331 | allread = (sitesread == sites); |
|---|
| 332 | } else |
|---|
| 333 | allread = (i > spp); |
|---|
| 334 | } |
|---|
| 335 | if (printdata) { |
|---|
| 336 | for (i = 1; i <= ((sites - 1) / 60 + 1); i++) { |
|---|
| 337 | for (j = 0; j < spp; j++) { |
|---|
| 338 | for (k = 0; k < nmlngth; k++) |
|---|
| 339 | putc(nayme[j][k], outfile); |
|---|
| 340 | fprintf(outfile, " "); |
|---|
| 341 | l = i * 60; |
|---|
| 342 | if (l > sites) |
|---|
| 343 | l = sites; |
|---|
| 344 | for (k = (i - 1) * 60 + 1; k <= l; k++) { |
|---|
| 345 | putc(y[j][k - 1], outfile); |
|---|
| 346 | if (k % 10 == 0 && k % 60 != 0) |
|---|
| 347 | putc(' ', outfile); |
|---|
| 348 | } |
|---|
| 349 | putc('\n', outfile); |
|---|
| 350 | } |
|---|
| 351 | putc('\n', outfile); |
|---|
| 352 | } |
|---|
| 353 | putc('\n', outfile); |
|---|
| 354 | } |
|---|
| 355 | } /* restdist_inputdata */ |
|---|
| 356 | |
|---|
| 357 | |
|---|
| 358 | void restdist_sitesort() |
|---|
| 359 | { |
|---|
| 360 | /* Shell sort keeping alias, aliasweight in same order */ |
|---|
| 361 | long gap, i, j, jj, jg, k, itemp; |
|---|
| 362 | boolean flip, tied; |
|---|
| 363 | |
|---|
| 364 | gap = sites / 2; |
|---|
| 365 | while (gap > 0) { |
|---|
| 366 | for (i = gap + 1; i <= sites; i++) { |
|---|
| 367 | j = i - gap; |
|---|
| 368 | flip = true; |
|---|
| 369 | while (j > 0 && flip) { |
|---|
| 370 | jj = alias[j]; |
|---|
| 371 | jg = alias[j + gap]; |
|---|
| 372 | flip = false; |
|---|
| 373 | tied = true; |
|---|
| 374 | k = 1; |
|---|
| 375 | while (k <= spp && tied) { |
|---|
| 376 | flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); |
|---|
| 377 | tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); |
|---|
| 378 | k++; |
|---|
| 379 | } |
|---|
| 380 | if (tied) { |
|---|
| 381 | aliasweight[j] += aliasweight[j + gap]; |
|---|
| 382 | aliasweight[j + gap] = 0; |
|---|
| 383 | } |
|---|
| 384 | if (!flip) |
|---|
| 385 | break; |
|---|
| 386 | itemp = alias[j]; |
|---|
| 387 | alias[j] = alias[j + gap]; |
|---|
| 388 | alias[j + gap] = itemp; |
|---|
| 389 | itemp = aliasweight[j]; |
|---|
| 390 | aliasweight[j] = aliasweight[j + gap]; |
|---|
| 391 | aliasweight[j + gap] = itemp; |
|---|
| 392 | j -= gap; |
|---|
| 393 | } |
|---|
| 394 | } |
|---|
| 395 | gap /= 2; |
|---|
| 396 | } |
|---|
| 397 | } /* restdist_sitesort */ |
|---|
| 398 | |
|---|
| 399 | |
|---|
| 400 | void restdist_sitecombine() |
|---|
| 401 | { |
|---|
| 402 | /* combine sites that have identical patterns */ |
|---|
| 403 | long i, j, k; |
|---|
| 404 | boolean tied; |
|---|
| 405 | |
|---|
| 406 | i = 1; |
|---|
| 407 | while (i < sites) { |
|---|
| 408 | j = i + 1; |
|---|
| 409 | tied = true; |
|---|
| 410 | while (j <= sites && tied) { |
|---|
| 411 | k = 1; |
|---|
| 412 | while (k <= spp && tied) { |
|---|
| 413 | tied = (tied && y[k - 1][alias[i] - 1] == y[k - 1][alias[j] - 1]); |
|---|
| 414 | k++; |
|---|
| 415 | } |
|---|
| 416 | if (tied && aliasweight[j] > 0) { |
|---|
| 417 | aliasweight[i] += aliasweight[j]; |
|---|
| 418 | aliasweight[j] = 0; |
|---|
| 419 | alias[j] = alias[i]; |
|---|
| 420 | } |
|---|
| 421 | j++; |
|---|
| 422 | } |
|---|
| 423 | i = j - 1; |
|---|
| 424 | } |
|---|
| 425 | } /* restdist_sitecombine */ |
|---|
| 426 | |
|---|
| 427 | |
|---|
| 428 | void makeweights() |
|---|
| 429 | { |
|---|
| 430 | /* make up weights vector to avoid duplicate computations */ |
|---|
| 431 | long i; |
|---|
| 432 | |
|---|
| 433 | for (i = 1; i <= sites; i++) { |
|---|
| 434 | alias[i] = i; |
|---|
| 435 | aliasweight[i] = weight[i]; |
|---|
| 436 | } |
|---|
| 437 | restdist_sitesort(); |
|---|
| 438 | restdist_sitecombine(); |
|---|
| 439 | sitescrunch2(sites + 1, 2, 3, aliasweight); |
|---|
| 440 | for (i = 1; i <= sites; i++) { |
|---|
| 441 | weight[i] = aliasweight[i]; |
|---|
| 442 | if (weight[i] > 0) |
|---|
| 443 | endsite = i; |
|---|
| 444 | } |
|---|
| 445 | weight[0] = 1; |
|---|
| 446 | } /* makeweights */ |
|---|
| 447 | |
|---|
| 448 | |
|---|
| 449 | void makev(long m, long n, double *v) |
|---|
| 450 | { |
|---|
| 451 | /* compute one distance */ |
|---|
| 452 | long i, ii, it, numerator, denominator; |
|---|
| 453 | double f, g=0, h, p1, p2, p3, q1, pp, tt, delta, vv, slope; |
|---|
| 454 | |
|---|
| 455 | numerator = 0; |
|---|
| 456 | denominator = 0; |
|---|
| 457 | for (i = 0; i < endsite; i++) { |
|---|
| 458 | ii = alias[i]; |
|---|
| 459 | if ((y[m-1][ii-1] == '+') || (y[n-1][ii-1] == '+')) { |
|---|
| 460 | denominator += weight[i]; |
|---|
| 461 | if ((y[m-1][ii-1] == '+') && (y[n-1][ii-1] == '+')) { |
|---|
| 462 | numerator += weight[i]; |
|---|
| 463 | } |
|---|
| 464 | } |
|---|
| 465 | } |
|---|
| 466 | f = 2*numerator/(double)(denominator+numerator); |
|---|
| 467 | if (restsites) { |
|---|
| 468 | if (exp(-sitelength*1.38629436) > f) { |
|---|
| 469 | printf("\nERROR: Infinite distance between "); |
|---|
| 470 | printf(" species %3ld and %3ld\n", m, n); |
|---|
| 471 | exxit(-1); |
|---|
| 472 | } |
|---|
| 473 | } |
|---|
| 474 | if (!restsites) { |
|---|
| 475 | if (!neili) { |
|---|
| 476 | f = (sqrt(f*(f+8.0))-f)/2.0; |
|---|
| 477 | } |
|---|
| 478 | else { |
|---|
| 479 | g = initialv; |
|---|
| 480 | h = g; |
|---|
| 481 | delta = g; |
|---|
| 482 | it = 0; |
|---|
| 483 | while (fabs(delta) > 0.00002 && it < iterationsr) { |
|---|
| 484 | it++; |
|---|
| 485 | h = g; |
|---|
| 486 | g = exp(0.25*log(f * (3-2*g))); |
|---|
| 487 | delta = g - h; |
|---|
| 488 | } |
|---|
| 489 | } |
|---|
| 490 | } |
|---|
| 491 | if ((!restsites) && neili) |
|---|
| 492 | vv = - (2.0/sitelength) * log(g); |
|---|
| 493 | else { |
|---|
| 494 | pp = exp(log(f)/sitelength); |
|---|
| 495 | delta = initialv; |
|---|
| 496 | tt = delta; |
|---|
| 497 | it = 0; |
|---|
| 498 | while (fabs(delta) > 0.00002 && it < iterationsr) { |
|---|
| 499 | it++; |
|---|
| 500 | if (gama) { |
|---|
| 501 | p1 = exp(-cvi * log(1 + tt / cvi)); |
|---|
| 502 | p2 = exp(-cvi * log(1 + xv * tt / cvi)) |
|---|
| 503 | - exp(-cvi * log(1 + tt / cvi)); |
|---|
| 504 | p3 = 1.0 - exp(-cvi * log(1 + xv * tt / cvi)); |
|---|
| 505 | } else { |
|---|
| 506 | p1 = exp(-tt); |
|---|
| 507 | p2 = exp(-xv * tt) - exp(-tt); |
|---|
| 508 | p3 = 1.0 - exp(-xv * tt); |
|---|
| 509 | } |
|---|
| 510 | q1 = p1 + p2 / 2.0 + p3 / 4.0; |
|---|
| 511 | g = q1 - pp; |
|---|
| 512 | if (gama) |
|---|
| 513 | slope = - 0.5 * (1 / (1 + tt / cvi)) * exp(-cvi * log(1 + tt / cvi)) |
|---|
| 514 | - 0.25 * (xv / (1 + xv * tt / cvi)) * |
|---|
| 515 | exp(-cvi * log(1 + xv * tt / cvi)); |
|---|
| 516 | else |
|---|
| 517 | slope = - 0.5*exp(-tt) - 0.25*exp(-xv*tt); |
|---|
| 518 | if (g < 0.0) |
|---|
| 519 | delta = fabs(delta) / -2.0; |
|---|
| 520 | else |
|---|
| 521 | delta = fabs(delta); |
|---|
| 522 | tt += delta; |
|---|
| 523 | } |
|---|
| 524 | vv = fracchange * tt; |
|---|
| 525 | } |
|---|
| 526 | *v = vv; |
|---|
| 527 | } /* makev */ |
|---|
| 528 | |
|---|
| 529 | |
|---|
| 530 | void makedists() |
|---|
| 531 | { |
|---|
| 532 | /* compute distance matrix */ |
|---|
| 533 | long i, j; |
|---|
| 534 | double v; |
|---|
| 535 | |
|---|
| 536 | if (progress) |
|---|
| 537 | printf("Distances calculated for species\n"); |
|---|
| 538 | for (i = 0; i < spp; i++) |
|---|
| 539 | d[i][i] = 0.0; |
|---|
| 540 | for (i = 1; i < spp; i++) { |
|---|
| 541 | if (progress) { |
|---|
| 542 | printf(" "); |
|---|
| 543 | for (j = 0; j < nmlngth; j++) |
|---|
| 544 | putchar(nayme[i - 1][j]); |
|---|
| 545 | printf(" "); |
|---|
| 546 | } |
|---|
| 547 | for (j = i + 1; j <= spp; j++) { |
|---|
| 548 | makev(i, j, &v); |
|---|
| 549 | d[i - 1][j - 1] = v; |
|---|
| 550 | d[j - 1][i - 1] = v; |
|---|
| 551 | if (progress) |
|---|
| 552 | putchar('.'); |
|---|
| 553 | } |
|---|
| 554 | if (progress) |
|---|
| 555 | putchar('\n'); |
|---|
| 556 | } |
|---|
| 557 | if (progress) { |
|---|
| 558 | printf(" "); |
|---|
| 559 | for (j = 0; j < nmlngth; j++) |
|---|
| 560 | putchar(nayme[spp - 1][j]); |
|---|
| 561 | putchar('\n'); |
|---|
| 562 | } |
|---|
| 563 | } /* makedists */ |
|---|
| 564 | |
|---|
| 565 | |
|---|
| 566 | void writedists() |
|---|
| 567 | { |
|---|
| 568 | /* write out distances */ |
|---|
| 569 | long i, j, k; |
|---|
| 570 | |
|---|
| 571 | if (!printdata) |
|---|
| 572 | fprintf(outfile, "%5ld\n", spp); |
|---|
| 573 | for (i = 0; i < spp; i++) { |
|---|
| 574 | for (j = 0; j < nmlngth; j++) |
|---|
| 575 | putc(nayme[i][j], outfile); |
|---|
| 576 | if (lower) |
|---|
| 577 | k = i; |
|---|
| 578 | else |
|---|
| 579 | k = spp; |
|---|
| 580 | for (j = 1; j <= k; j++) { |
|---|
| 581 | fprintf(outfile, "%8.4f", d[i][j - 1]); |
|---|
| 582 | if ((j + 1) % 9 == 0 && j < k) |
|---|
| 583 | putc('\n', outfile); |
|---|
| 584 | } |
|---|
| 585 | putc('\n', outfile); |
|---|
| 586 | } |
|---|
| 587 | if (progress) |
|---|
| 588 | printf("\nDistances written to file \"%s\"\n\n", outfilename); |
|---|
| 589 | } /* writedists */ |
|---|
| 590 | |
|---|
| 591 | |
|---|
| 592 | void getinput() |
|---|
| 593 | { |
|---|
| 594 | /* reads the input data */ |
|---|
| 595 | inputoptions(); |
|---|
| 596 | restdist_inputdata(); |
|---|
| 597 | makeweights(); |
|---|
| 598 | } /* getinput */ |
|---|
| 599 | |
|---|
| 600 | |
|---|
| 601 | int main(int argc, Char *argv[]) |
|---|
| 602 | { /* distances from restriction sites or fragments */ |
|---|
| 603 | #ifdef MAC |
|---|
| 604 | argc = 1; /* macsetup("Restdist",""); */ |
|---|
| 605 | argv[0] = "Restdist"; |
|---|
| 606 | #endif |
|---|
| 607 | init(argc,argv); |
|---|
| 608 | progname = argv[0]; |
|---|
| 609 | openfile(&infile,INFILE,"input data file","r",argv[0],infilename); |
|---|
| 610 | openfile(&outfile,OUTFILE,"output file","w",argv[0],outfilename); |
|---|
| 611 | ibmpc = IBMCRT; |
|---|
| 612 | ansi = ANSICRT; |
|---|
| 613 | mulsets = false; |
|---|
| 614 | datasets = 1; |
|---|
| 615 | firstset = true; |
|---|
| 616 | doinit(); |
|---|
| 617 | for (ith = 1; ith <= datasets; ith++) { |
|---|
| 618 | getinput(); |
|---|
| 619 | if (ith == 1) |
|---|
| 620 | firstset = false; |
|---|
| 621 | if (datasets > 1 && progress) |
|---|
| 622 | printf("\nData set # %ld:\n\n",ith); |
|---|
| 623 | makedists(); |
|---|
| 624 | writedists(); |
|---|
| 625 | } |
|---|
| 626 | FClose(infile); |
|---|
| 627 | FClose(outfile); |
|---|
| 628 | #ifdef MAC |
|---|
| 629 | fixmacfile(outfilename); |
|---|
| 630 | #endif |
|---|
| 631 | printf("Done.\n\n"); |
|---|
| 632 | return 0; |
|---|
| 633 | } /* distances from restriction sites or fragments */ |
|---|