| 1 | /* |
|---|
| 2 | |
|---|
| 3 | PHYML : a program that computes maximum likelihood phylogenies from |
|---|
| 4 | DNA or AA homologous sequences |
|---|
| 5 | |
|---|
| 6 | Copyright (C) Stephane Guindon. Oct 2003 onward |
|---|
| 7 | |
|---|
| 8 | All parts of the source except where indicated are distributed under |
|---|
| 9 | the GNU public licence. See http://www.opensource.org for details. |
|---|
| 10 | |
|---|
| 11 | */ |
|---|
| 12 | |
|---|
| 13 | #include "utilities.h" |
|---|
| 14 | #include "lk.h" |
|---|
| 15 | #include "optimiz.h" |
|---|
| 16 | #include "bionj.h" |
|---|
| 17 | #include "models.h" |
|---|
| 18 | #include "free.h" |
|---|
| 19 | #include "options.h" |
|---|
| 20 | #include "simu.h" |
|---|
| 21 | |
|---|
| 22 | |
|---|
| 23 | #ifdef PHYML |
|---|
| 24 | |
|---|
| 25 | /* int T_MAX_FILE; */ |
|---|
| 26 | |
|---|
| 27 | int main(int argc, char **argv) |
|---|
| 28 | { |
|---|
| 29 | seq **data; |
|---|
| 30 | allseq *alldata; |
|---|
| 31 | option *input; |
|---|
| 32 | char *s_tree, *s_any; |
|---|
| 33 | FILE *fp_phyml_tree,*fp_phyml_stats,*fp_phyml_lk,*fp_best_tree,*fp_best_tree_stats; |
|---|
| 34 | arbre *tree; |
|---|
| 35 | int n_otu, n_data_sets; |
|---|
| 36 | matrix *mat; |
|---|
| 37 | model *mod; |
|---|
| 38 | time_t t_beg,t_end; |
|---|
| 39 | div_t hour,min; |
|---|
| 40 | int num_tree,tree_line_number; |
|---|
| 41 | double best_loglk; |
|---|
| 42 | |
|---|
| 43 | |
|---|
| 44 | srand(time(NULL)); |
|---|
| 45 | |
|---|
| 46 | tree = NULL; |
|---|
| 47 | mod = NULL; |
|---|
| 48 | |
|---|
| 49 | /* Init_Constant(); */ |
|---|
| 50 | |
|---|
| 51 | s_any = (char *)mCalloc(T_MAX_FILE,sizeof(char)); |
|---|
| 52 | |
|---|
| 53 | fflush(NULL); |
|---|
| 54 | |
|---|
| 55 | input = (option *)Get_Input(argc,argv); |
|---|
| 56 | |
|---|
| 57 | Make_Model_Complete(input->mod); |
|---|
| 58 | |
|---|
| 59 | mod = input->mod; |
|---|
| 60 | |
|---|
| 61 | fp_phyml_stats = Openfile(input->phyml_stat_file,input->phyml_stat_file_open_mode); |
|---|
| 62 | |
|---|
| 63 | fprintf(fp_phyml_stats,"\n- PHYML %s -\n\n", VERSION); |
|---|
| 64 | |
|---|
| 65 | fp_phyml_tree = Openfile(input->phyml_tree_file,input->phyml_tree_file_open_mode); |
|---|
| 66 | |
|---|
| 67 | n_data_sets = 0; |
|---|
| 68 | |
|---|
| 69 | fp_phyml_lk = fopen(input->phyml_lk_file,"w"); |
|---|
| 70 | |
|---|
| 71 | if(input->inputtree) Test_Multiple_Data_Set_Format(input); |
|---|
| 72 | else input->n_trees = 1; |
|---|
| 73 | |
|---|
| 74 | /* if(input->n_data_sets > 1) input->n_trees = 1; */ |
|---|
| 75 | |
|---|
| 76 | best_loglk = UNLIKELY; |
|---|
| 77 | tree_line_number = 0; |
|---|
| 78 | |
|---|
| 79 | do |
|---|
| 80 | { |
|---|
| 81 | |
|---|
| 82 | n_data_sets++; |
|---|
| 83 | |
|---|
| 84 | time(&t_beg); |
|---|
| 85 | |
|---|
| 86 | n_otu = 0; |
|---|
| 87 | |
|---|
| 88 | if(n_data_sets > input->n_data_sets) |
|---|
| 89 | { |
|---|
| 90 | data = NULL; |
|---|
| 91 | } |
|---|
| 92 | else |
|---|
| 93 | { |
|---|
| 94 | data = Get_Seq(input,0); |
|---|
| 95 | } |
|---|
| 96 | |
|---|
| 97 | if(data) |
|---|
| 98 | { |
|---|
| 99 | if(n_data_sets > 1) printf("\n. Data set [#%d]\n",n_data_sets); |
|---|
| 100 | |
|---|
| 101 | printf("\n. Compressing sequences...\n"); |
|---|
| 102 | |
|---|
| 103 | alldata = Compact_Seq(data,input); |
|---|
| 104 | |
|---|
| 105 | Free_Seq(data,alldata->n_otu); |
|---|
| 106 | |
|---|
| 107 | Init_Model(alldata,mod); |
|---|
| 108 | |
|---|
| 109 | Check_Ambiguities(alldata,input->mod->datatype,input->mod->stepsize); |
|---|
| 110 | |
|---|
| 111 | |
|---|
| 112 | For(num_tree,input->n_trees) |
|---|
| 113 | { |
|---|
| 114 | if(!input->inputtree) |
|---|
| 115 | { |
|---|
| 116 | printf("\n. Computing pairwise distances...\n"); |
|---|
| 117 | |
|---|
| 118 | mat = ML_Dist(alldata,mod); |
|---|
| 119 | |
|---|
| 120 | printf("\n. Building BIONJ tree...\n"); |
|---|
| 121 | |
|---|
| 122 | mat->tree = Make_Tree(alldata); |
|---|
| 123 | |
|---|
| 124 | Bionj(mat); |
|---|
| 125 | |
|---|
| 126 | tree = mat->tree; |
|---|
| 127 | |
|---|
| 128 | Free_Mat(mat); |
|---|
| 129 | } |
|---|
| 130 | else |
|---|
| 131 | { |
|---|
| 132 | if(input->n_trees > 1) printf("\n. Reading user tree [#%d]\n",tree_line_number+1); |
|---|
| 133 | else printf("\n. Reading user tree...\n"); |
|---|
| 134 | |
|---|
| 135 | if(input->n_trees == 1) |
|---|
| 136 | { |
|---|
| 137 | rewind(input->fp_input_tree); |
|---|
| 138 | tree_line_number = 0; |
|---|
| 139 | } |
|---|
| 140 | |
|---|
| 141 | tree = Read_Tree_File(input->fp_input_tree); |
|---|
| 142 | |
|---|
| 143 | tree_line_number++; |
|---|
| 144 | |
|---|
| 145 | if(!tree) |
|---|
| 146 | { |
|---|
| 147 | printf("\n. Missing tree for data set #%d\n",n_data_sets); |
|---|
| 148 | printf(" This data set is not analyzed.\n"); |
|---|
| 149 | data = NULL; |
|---|
| 150 | } |
|---|
| 151 | |
|---|
| 152 | if(!tree->has_branch_lengths) |
|---|
| 153 | { |
|---|
| 154 | printf("\n. Computing branch length estimates...\n"); |
|---|
| 155 | |
|---|
| 156 | Order_Tree_CSeq(tree,alldata); |
|---|
| 157 | |
|---|
| 158 | mat = ML_Dist(alldata,mod); |
|---|
| 159 | |
|---|
| 160 | mat->tree = tree; |
|---|
| 161 | |
|---|
| 162 | mat->method = 0; |
|---|
| 163 | |
|---|
| 164 | Bionj_Br_Length(mat); |
|---|
| 165 | |
|---|
| 166 | Free_Mat(mat); |
|---|
| 167 | } |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | if(!tree) continue; |
|---|
| 171 | |
|---|
| 172 | |
|---|
| 173 | tree->mod = mod; |
|---|
| 174 | tree->input = input; |
|---|
| 175 | tree->data = alldata; |
|---|
| 176 | tree->both_sides = 1; |
|---|
| 177 | tree->n_pattern = tree->data->crunch_len/tree->mod->stepsize; |
|---|
| 178 | |
|---|
| 179 | Order_Tree_CSeq(tree,alldata); |
|---|
| 180 | |
|---|
| 181 | Make_Tree_4_Lk(tree,alldata,alldata->init_len); |
|---|
| 182 | |
|---|
| 183 | if(tree->mod->s_opt->opt_topo) |
|---|
| 184 | Simu(tree,1000); |
|---|
| 185 | else |
|---|
| 186 | { |
|---|
| 187 | if(tree->mod->s_opt->opt_free_param) |
|---|
| 188 | Round_Optimize(tree,tree->data); |
|---|
| 189 | else |
|---|
| 190 | { |
|---|
| 191 | Lk(tree,tree->data); |
|---|
| 192 | printf("\n. Log(lk) : * -> %15.6f ",tree->tot_loglk); |
|---|
| 193 | } |
|---|
| 194 | } |
|---|
| 195 | |
|---|
| 196 | if(tree->mod->bootstrap) Bootstrap(tree); |
|---|
| 197 | |
|---|
| 198 | Update_BrLen_Invar(tree); |
|---|
| 199 | |
|---|
| 200 | s_tree = Write_Tree(tree); |
|---|
| 201 | |
|---|
| 202 | fprintf(fp_phyml_tree,"%s\n",s_tree); |
|---|
| 203 | |
|---|
| 204 | Free(s_tree); |
|---|
| 205 | |
|---|
| 206 | Unconstraint_Lk(tree); |
|---|
| 207 | |
|---|
| 208 | time(&t_end); |
|---|
| 209 | |
|---|
| 210 | hour = div(t_end-t_beg,3600); |
|---|
| 211 | min = div(t_end-t_beg,60 ); |
|---|
| 212 | |
|---|
| 213 | min.quot -= hour.quot*60; |
|---|
| 214 | |
|---|
| 215 | if (input->n_data_sets==1) |
|---|
| 216 | Print_Fp_Out(fp_phyml_stats, t_beg, t_end, tree, input, n_data_sets); |
|---|
| 217 | else |
|---|
| 218 | Print_Fp_Out_Lines(fp_phyml_stats, t_beg, t_end, tree, input, n_data_sets); |
|---|
| 219 | |
|---|
| 220 | printf("\n\n. Time used %dh%dm%ds\n", hour.quot,min.quot,(int)(t_end-t_beg)%60); |
|---|
| 221 | printf("\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n"); |
|---|
| 222 | |
|---|
| 223 | Print_Site_Lk(tree,fp_phyml_lk); |
|---|
| 224 | /* fprintf(fp_phyml_lk,"%f\n",tree->tot_loglk); */ |
|---|
| 225 | |
|---|
| 226 | |
|---|
| 227 | if((input->n_data_sets == 1) && |
|---|
| 228 | (input->n_trees > 1) && |
|---|
| 229 | (tree->tot_loglk > best_loglk)) |
|---|
| 230 | { |
|---|
| 231 | |
|---|
| 232 | best_loglk = tree->tot_loglk; |
|---|
| 233 | strcpy(s_any,input->seqfile); |
|---|
| 234 | fp_best_tree = fopen(s_any = strcat(s_any,"_phyml_best_tree.txt"),"w"); |
|---|
| 235 | s_tree = Write_Tree(tree); |
|---|
| 236 | fprintf(fp_best_tree,"%s\n",s_tree); |
|---|
| 237 | Free(s_tree); |
|---|
| 238 | strcpy(s_any,input->seqfile); |
|---|
| 239 | fp_best_tree_stats = fopen(s_any = strcat(s_any,"_phyml_best_stat.txt"),"w"); |
|---|
| 240 | Print_Fp_Out(fp_best_tree_stats, |
|---|
| 241 | t_beg, |
|---|
| 242 | t_end, |
|---|
| 243 | tree, |
|---|
| 244 | input, |
|---|
| 245 | n_data_sets); |
|---|
| 246 | |
|---|
| 247 | fclose(fp_best_tree); |
|---|
| 248 | fclose(fp_best_tree_stats); |
|---|
| 249 | } |
|---|
| 250 | |
|---|
| 251 | Free_Tree_Lk(tree); |
|---|
| 252 | |
|---|
| 253 | Free_Tree(tree); |
|---|
| 254 | |
|---|
| 255 | if(input->n_data_sets > 1) |
|---|
| 256 | { |
|---|
| 257 | break; |
|---|
| 258 | } |
|---|
| 259 | } |
|---|
| 260 | Free_Cseq(alldata); |
|---|
| 261 | } |
|---|
| 262 | }while(data); |
|---|
| 263 | |
|---|
| 264 | Free_Model(mod); |
|---|
| 265 | |
|---|
| 266 | if(input->fp_seq ) fclose(input->fp_seq ); |
|---|
| 267 | if(input->fp_input_tree) fclose(input->fp_input_tree); |
|---|
| 268 | |
|---|
| 269 | fclose(fp_phyml_lk); |
|---|
| 270 | |
|---|
| 271 | fclose(fp_phyml_tree); |
|---|
| 272 | |
|---|
| 273 | fclose(fp_phyml_stats); |
|---|
| 274 | |
|---|
| 275 | Free_Input(input); |
|---|
| 276 | |
|---|
| 277 | Free(s_any); |
|---|
| 278 | |
|---|
| 279 | return 0; |
|---|
| 280 | } |
|---|
| 281 | |
|---|
| 282 | #endif |
|---|
| 283 | /*********************************************************/ |
|---|
| 284 | |
|---|