source: branches/species/PARSIMONY/PARS_main.cxx

Last change on this file was 19654, checked in by westram, 5 weeks ago
  • reintegrates 'macros' into 'trunk'
    • improves program termination (#867)
      • introduces MacroExitor classes
        • handles confirmation (to quit)
        • waits for macros to finish, then exits w/o confirmation
        • provides specialized termination for different programs via derived classes
          • has been implemented for "normal" arb and merge-tool.
      • introduces ARB_disconnect_from_db
        • generalizes code to terminate all interconnections between GUI, database and macro-ability
        • allow to install atdisconnect-callbacks
          • usable by modules operating on a database; allow to inform module that database will vanish.
        • now used by all arb applications to disconnect from all their database(s), except the properties.
    • fixes some broken behavior
      • merge-tool
        • crashed when quitting via macro
        • wrong restarts, if originally started with arguments,
      • importer
        • failed to record/playback macros
        • crashed in modules operating on the temporary import database
      • database browser
        • crashed on disappearing database
  • adds: log:branches/macros@19620:19653
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 125.5 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : PARS_main.cxx                                     //
4//   Purpose   : arb parsimony main class                          //
5//                                                                 //
6//   Institute of Microbiology (Technical University Munich)       //
7//   http://www.arb-home.de/                                       //
8//                                                                 //
9// =============================================================== //
10
11#include "PerfMeter.h"
12#include "pars_main.hxx"
13#include "pars_klprops.hxx"
14#include "pars_awars.h"
15#include "ap_tree_nlen.hxx"
16#include "ap_main.hxx"
17
18#include <ColumnStat.hxx>
19#include <gui_aliview.hxx>
20#include <macros.hxx>
21#include <nds.h>
22#include <TreeCallbacks.hxx>
23
24#include <aw_awars.hxx>
25#include <aw_preset.hxx>
26#include <aw_msg.hxx>
27#include <aw_root.hxx>
28#include <aw_question.hxx>
29
30#include <app.hxx>
31#include <sel_boxes.hxx>
32#include <ali_filter.hxx>
33#include <config_manager.hxx>
34
35#include <arb_progress.h>
36#include <arb_misc.h>
37#include <arb_defs.h>
38#include <arb_global_defs.h>
39
40#include <ad_cb.h>
41
42#include <list>
43#include <map>
44#include <mod_rlimit.h>
45
46#if defined(DEBUG)
47# define TESTMENU
48#endif // DEBUG
49
50using namespace std;
51
52AW_HEADER_MAIN
53
54#define AWAR_COLUMNSTAT_BASE "tmp/pars/colstat"
55#define AWAR_COLUMNSTAT_NAME AWAR_COLUMNSTAT_BASE "/name"
56
57#define AWT_TREE_PARS(ntw) DOWNCAST(AWT_graphic_parsimony*, (ntw)->gfx)
58
59static ArbParsimony *GLOBAL_PARS = NULp;
60
61inline AWT_graphic_parsimony *global_tree() { return GLOBAL_PARS->get_tree(); }
62inline AP_pars_root *global_tree_root() { return global_tree()->get_tree_root(); }
63
64// waaah more globals :(
65AP_main *ap_main; // @@@ move into ArbParsimony? or eliminate ArbParsimony
66
67void ArbParsimony::set_tree(AWT_graphic_parsimony *tree_) {
68    ap_assert(!tree); // only call once
69    tree = tree_;
70    ap_main->set_tree_root(tree);
71}
72
73static void set_keep_ghostnodes() {
74    // avoid that saving tree to DB does delete removed nodes
75    // (hack to fix #528)
76    // see ../ARBDB/adtree.cxx@keep_ghostnodes
77    GBDATA         *gb_tree = ap_main->get_tree_root()->get_gb_tree();
78    GB_transaction  ta(gb_tree);
79    GBDATA         *gb_keep = GB_searchOrCreate_int(gb_tree, "keep_ghostnodes", 1);
80    ASSERT_NO_ERROR(GB_set_temporary(gb_keep));
81}
82static void delete_kept_ghostnodes() {
83    if (ap_main->get_graphic_tree()) {
84        GBDATA         *gb_tree = ap_main->get_tree_root()->get_gb_tree();
85        GB_transaction  ta(gb_tree);
86
87        GBDATA *gb_keep = GB_entry(gb_tree, "keep_ghostnodes");
88        if (gb_keep) { // e.g. wrong for quick-add species
89            GB_ERROR error = GB_delete(gb_keep);
90            if (!error) {
91                if (ap_main->get_tree_root()->was_saved()) {
92                    // if tree was saved, DB may contain ghostnodes
93                    // -> save again to delete them
94                    error = global_tree()->save_to_DB(GB_get_root(gb_tree), NULp);
95                }
96            }
97            if (error) aw_message(error);
98        }
99    }
100}
101
102
103void AP_main::disconnect_from_db(AW_root *aw_root) {
104    ARB_disconnect_from_db(aw_root, gb_main);
105}
106
107__ATTR__NORETURN static void pars_exit(AW_window *aww) {
108    ap_main->accept_all();
109    delete_kept_ghostnodes();
110
111    ap_main->disconnect_from_db(aww->get_root()); // closes DB
112    delete ap_main; ap_main = NULp;
113    exit(EXIT_SUCCESS);
114}
115
116static void AP_user_push_cb(AW_window *aww) {
117    ap_main->remember_user_state();
118    aww->get_root()->awar(AWAR_STACKPOINTER)->write_int(ap_main->get_user_push_counter());
119}
120
121static void AP_user_pop_cb(AW_window *aww, TREE_canvas *ntw) {
122    if (ap_main->get_user_push_counter()<=0) {
123        aw_message("No tree on stack.");
124        return;
125    }
126
127    AWT_auto_refresh allowed_on(ntw);
128    ap_main->revert_user_state();
129    ntw->request_save();
130
131    aww->get_root()->awar(AWAR_STACKPOINTER)->write_int(ap_main->get_user_push_counter());
132    if (ap_main->get_user_push_counter() <= 0) { // last tree was popped => push again
133        AP_user_push_cb(aww);
134    }
135}
136
137class InsertData {
138    bool         abort_flag;
139    arb_progress progress;
140
141public:
142
143    bool quick_add_flag;
144    InsertData(bool quick, long spec_count)
145        : abort_flag(false),
146          progress(GBS_global_string("Inserting %li species", spec_count), spec_count),
147          quick_add_flag(quick)
148    {}
149
150    bool aborted() const { return abort_flag; }
151    void set_aborted(bool aborted_) { abort_flag = aborted_; }
152
153    void inc() {
154        progress.inc();
155        abort_flag = progress.aborted();
156    }
157
158    arb_progress& get_progress() { return progress; }
159};
160
161
162static int sort_sequences_by_length(const char*, long leaf0_ptr, const char*, long leaf1_ptr) { // @@@ any chance to make this typesafe?
163    AP_tree_nlen *leaf0 = (AP_tree_nlen*)leaf0_ptr;
164    AP_tree_nlen *leaf1 = (AP_tree_nlen*)leaf1_ptr;
165
166    AP_FLOAT len0 = leaf0->get_seq()->weighted_base_count();
167    AP_FLOAT len1 = leaf1->get_seq()->weighted_base_count();
168
169    // longest sequence first
170    if (len0<len1) return 1;
171    if (len0>len1) return -1;
172
173    // if length equal -> determine order by species name (just to have a defined order!)
174    int cmp = strcmp(leaf1->name, leaf0->name);
175    ap_assert(cmp != 0);
176    return cmp;
177}
178
179static long transform_gbd_to_leaf(const char *key, long val, void *) {
180    if (!val) return val;
181
182    // @@@ instead implement create_linked_leaf(), then use that?
183
184    GBDATA       *gb_node = (GBDATA *)val;
185    AP_pars_root *troot   = ap_main->get_tree_root();
186    AP_tree_nlen *leaf    = DOWNCAST(AP_tree_nlen*, troot->makeNode());
187
188    leaf->forget_origin(); // new leaf is not part of tree yet
189
190    leaf->gb_node = gb_node;
191    leaf->name    = ARB_strdup(key);
192    leaf->markAsLeaf();
193
194    leaf->set_seq(troot->get_seqTemplate()->dup());
195    GB_ERROR error = leaf->get_seq()->bind_to_species(gb_node);
196    if (!error) {
197        if (leaf->get_seq()->weighted_base_count() < MIN_SEQUENCE_LENGTH) {
198            error = GBS_global_string("Species %s has too short sequence (%f, minimum is %i)",
199                                      key,
200                                      leaf->get_seq()->weighted_base_count(),
201                                      MIN_SEQUENCE_LENGTH);
202        }
203    }
204    if (error) {
205        GBT_message(gb_node, error);
206        destroy(leaf, troot); leaf = NULp;
207    }
208    return (long)leaf;
209}
210
211typedef vector<AP_tree_nlen*> InsertedSpecies;
212
213static long toInserted(const char *, long val, void *cd_toInsert) {
214    InsertedSpecies *toInsert = (InsertedSpecies*)cd_toInsert;
215    AP_tree_nlen    *node     = (AP_tree_nlen*)val;
216
217    toInsert->push_back(node);
218    return 0;
219}
220
221inline int maxAllowedInsertions(int inTree) {
222    // max. species allowed to insert (in one pass) into a tree with 'inTree' leafs
223    return inTree/2;
224}
225inline int calcInsertNow(int toInsert, int inTree) {
226    // calculate number of species added in next pass
227    return std::min(toInsert, maxAllowedInsertions(inTree));
228}
229
230static long calc_steps(int toInsert, int inTree) {
231    ap_assert((toInsert+inTree) >= 2);
232
233    if (!toInsert) return 0;
234    if (!inTree) return 1 + calc_steps(toInsert-2, 2);
235
236    int edges     = leafs_2_edges(inTree, UNROOTED);
237    int insertNow = calcInsertNow(toInsert, inTree);
238
239    return (long)(edges+1)*insertNow + calc_steps(toInsert-insertNow, inTree+insertNow); // +1 for final step (=actual insertion of species)
240}
241
242class AP_subtree { // defines a subtree
243    AP_tree_nlen *subNode;
244    AP_tree_nlen *upNode;
245
246    bool valid() const { return subNode && upNode; }
247
248public:
249    AP_subtree() : subNode(NULp), upNode(NULp) {}
250    AP_subtree(AP_tree_edge *e, AP_tree_nlen *sub_node) :
251        subNode(sub_node),
252        upNode(e->otherNode(subNode))
253    {}
254
255    AP_tree_edge *edgeToSubtree() const { ap_assert(valid()); return upNode->edgeTo(subNode); }
256    AP_tree_nlen *subtreeRoot() const { return subNode; }
257
258    void setSubtreeRoot(AP_tree_nlen *new_subtree) {
259        ap_assert(upNode->edgeTo(new_subtree));
260        subNode = new_subtree;
261    }
262};
263
264struct EdgeBetween : private AP_subtree {
265    // semantically same as AP_tree_edge, but survives tree-modifications which modify edges (like insert+moveNextTo/moveTo)
266
267    EdgeBetween() {}
268    EdgeBetween(AP_tree_edge *e) : AP_subtree(e, e->sonNode()) {}
269    AP_tree_edge *find() const { return edgeToSubtree(); }
270};
271
272struct BestEdge {
273    Mutations   pars;
274    EdgeBetween between; // need to store pair of AP_tree_nlen here
275                         // (using AP_tree_edge is not stable; may move elsewhere by calls insert() or moveNextTo()!)
276
277    BestEdge() : pars(-1) {}
278    BestEdge(const EdgeBetween& betw, Mutations p) : pars(p), between(betw) {}
279
280    AP_tree_edge *edge() const { return between.find(); }
281};
282
283struct NodeInsertOrder {
284    bool operator() (AP_tree_nlen *i, AP_tree_nlen *j) { return strcmp(i->name, j->name)<0; }
285};
286
287typedef InsertedSpecies::const_iterator InsertSpeciesIterator;
288
289static void insert_species_into_tree(const InsertSpeciesIterator begin, const InsertSpeciesIterator end, arb_progress& progress) {
290    typedef map<AP_tree_nlen*, BestEdge> BestEdge4Node;
291    BestEdge4Node bestpos;
292
293    ap_assert(begin != end);
294
295    {
296        ap_main->remember();
297
298        EdgeChain chain(rootEdge(), ANY_EDGE, false);
299        ap_assert(chain.size()>0);
300
301        bool speciesInserted = false;
302
303        while (chain) {
304            AP_tree_edge *edge = *chain; ++chain;
305            edge->set_root();
306
307            EdgeBetween betweenNodes(edge);
308
309            InsertSpeciesIterator  curr    = begin;
310            AP_tree_nlen          *species = *curr++;
311
312            if (speciesInserted) {
313                species->moveTo(edge);
314            }
315            else {
316                species->insert(edge->sonNode()); // edge is root-edge -> son does not matter
317                speciesInserted = true;
318            }
319
320            species->set_root(); // => only needs one combine when exchanging species
321
322            Mutations pars = rootNode()->costs();
323            BestEdge4Node::iterator found = bestpos.find(species);
324            if (found == bestpos.end() || pars<found->second.pars) {
325                bestpos[species] = BestEdge(betweenNodes, pars);
326            }
327            ++progress;
328
329            AP_tree_nlen *rot_node = rootNode()->get_leftson(); // rot=rest of tree
330            if (rot_node == species) {
331                rot_node = rot_node->get_brother();
332            }
333            ap_assert(rot_node->get_brother() == species);
334
335            AP_combinableSeq *rot_seq   = rot_node->get_seq();
336            Mutations         rot_costs = rot_node->stored_costs();
337
338            ap_assert(species->stored_costs() == 0); // leaf has no mutations
339
340            while (1) {
341                if (curr == end) break;
342
343                AP_tree_nlen     *nextSpec = *curr++;
344                AP_combinableSeq *nextSeq  = nextSpec->get_seq();
345
346                pars  = nextSeq->mutations_if_combined_with(rot_seq) + rot_costs;
347                found = bestpos.find(nextSpec);
348                if (found == bestpos.end() || pars<found->second.pars) {
349                    bestpos[nextSpec] = BestEdge(betweenNodes, pars);
350                }
351                ++progress;
352            }
353        }
354
355        ap_main->revert();
356    }
357
358    // create insert lists for each used insert position:
359    typedef list<AP_tree_nlen*>          NodeList;
360    typedef map<AP_tree_edge*, NodeList> NodesAtEdge;
361
362    NodesAtEdge atEdge;
363    for (InsertSpeciesIterator s = begin; s != end; ++s) {
364        const BestEdge&  best = bestpos[*s];
365        AP_tree_edge    *edge = best.edge();
366
367        ap_assert(edge != NULp);
368
369        NodesAtEdge::iterator at = atEdge.find(edge);
370        if (at == atEdge.end()) {
371            atEdge[edge] = NodeList(1, *s);
372        }
373        else {
374            at->second.push_back(*s);
375        }
376    }
377
378#if defined(DEVEL_RALF)
379    // testcode: test whether all found edges are members of the tree
380    // (got some problem with insert/REMOVE while root is next to inserted/removed node)
381
382    set<AP_tree_edge*> edgeInTree;
383    {
384        EdgeChain chain(rootEdge(), ANY_EDGE, false);
385        while (chain) {
386            AP_tree_edge *edge = *chain; ++chain;
387            edgeInTree.insert(edge);
388        }
389
390        for (BestEdge4Node::iterator b = bestpos.begin(); b != bestpos.end(); ++b) {
391            AP_tree_edge *e = b->second.edge();
392
393            if (edgeInTree.find(e) == edgeInTree.end()) {
394                GBK_terminate("remembered edge has been removed from tree");
395            }
396        }
397    }
398#endif
399
400    // build list of edges where insert takes place (value=iterator into 'atEdge')
401    // => insert in determined order
402    typedef list<NodesAtEdge::iterator> InsertOrder;
403    InsertOrder insertOrder;
404    {
405        EdgeChain chain(rootEdge(), ANY_EDGE, false);
406        while (chain) {
407            AP_tree_edge *edge = *chain; ++chain;
408
409            NodesAtEdge::iterator at = atEdge.find(edge);
410            if (at != atEdge.end()) {
411                insertOrder.push_back(at);
412            }
413        }
414    }
415
416    typedef list<AP_subtree> OptiList;
417    OptiList                  optiPos;
418
419    // insert species to tree according to insert-lists:
420    for (InsertOrder::iterator o = insertOrder.begin(); o != insertOrder.end(); ++o) {
421        NodesAtEdge::iterator  e     = *o;
422        AP_tree_edge          *edge  = e->first;
423        NodeList&              nodes = e->second;
424
425        edge->set_root();
426
427        AP_tree_nlen *brother    = edge->sonNode();
428        size_t        nodes_size = nodes.size();
429
430#if defined(ASSERTION_USED)
431        ap_assert(bestpos[nodes.front()].edge() == edge);
432#endif
433
434        if (nodes_size == 1) {
435            nodes.front()->insert(brother);
436            ASSERT_VALID_TREE(rootNode());
437        }
438        else {
439            bool atLeaf = brother->is_leaf();
440            if (!atLeaf && edge->is_leaf_edge()) { // at leaf edge -> make sure brother points to leaf node
441                brother = edge->notSonNode();
442                ap_assert(brother->is_leaf());
443                atLeaf = true;
444            }
445
446#if defined(UNIT_TESTS)
447            if (RUNNING_TEST()) {
448                // use a determined order to insert multiple species at one position.
449                // Does not produce "better" topologies, just makes result independent from insert order.
450                typedef vector<AP_tree_nlen*> NodeVector;
451
452                NodeVector toSort(nodes.begin(), nodes.end());
453                sort(toSort.begin(), toSort.end(), NodeInsertOrder());
454                nodes = NodeList(toSort.begin(), toSort.end());
455            }
456#endif
457
458            AP_tree_nlen *at = brother;
459            for (NodeList::iterator n = nodes.begin(); n != nodes.end(); ++n) {
460                (*n)->insert(at);
461                at = *n; // only insert 1st node at 'brother', insert following nodes next to previously added nodes
462            }
463
464            ASSERT_VALID_TREE(rootNode());
465
466            AP_tree_nlen *ourFather    = brother->get_father();
467            AP_tree_nlen *addedSubtree = brother->get_brother(); // contains all added species
468            ap_assert(addedSubtree->is_ancestor_of(at));
469
470            if (atLeaf) {
471                // if inserted at leaf edge -> perform NNI at parent edge (i.e. including the leaf)
472                AP_tree_edge *toRest = ourFather->nextEdge();
473                for (int i = 0; i<2; ++i) {
474                    AP_tree_nlen *rest = toRest->otherNode(ourFather);
475                    if (rest != brother && rest != addedSubtree) {
476                        break;
477                    }
478                    toRest = ourFather->nextEdge(toRest);
479                }
480
481                optiPos.push_back(AP_subtree(toRest, ourFather));
482                ap_assert(optiPos.back().subtreeRoot() == ourFather);
483            }
484            else {
485                if (nodes_size>2) { // if inserted at internal edge && only 2 species inserted -> NNI makes no sense
486                    // Store (directed) edge to brother (for later optimization of subtree):
487                    AP_tree_edge *subEdge = ourFather->edgeTo(addedSubtree);
488                    optiPos.push_back(AP_subtree(subEdge, addedSubtree));
489                    ap_assert(optiPos.back().subtreeRoot() == addedSubtree);
490                }
491            }
492        }
493        progress.inc_by(nodes_size);
494    }
495
496    // Optimize all inserts of multiple species at one position:
497    {
498        arb_suppress_progress suppress_child; // suppress implicit progress count caused by nni_rec
499
500        AP_FLOAT curr_pars = rootNode()->costs();
501        AP_FLOAT prev_pars = curr_pars;
502
503        int loop = 0;
504
505        do {
506            ++loop;
507            prev_pars = curr_pars;
508            for (OptiList::iterator op = optiPos.begin(); op != optiPos.end(); ++op) {
509                AP_tree_edge *subtreeEdge = op->edgeToSubtree();
510                AP_tree_nlen *subtreeRoot = op->subtreeRoot();
511
512                subtreeEdge->set_root();
513                ap_assert(subtreeEdge->isConnectedTo(subtreeRoot));
514                AP_tree_nlen *father = subtreeEdge->otherNode(subtreeRoot);
515
516                AP_FLOAT this_pars;
517                while (1) {
518                    ap_assert(subtreeEdge->isConnectedTo(father)); // otherwise block fails
519                    this_pars = subtreeEdge->nni_rec(SKIP_LEAF_EDGES, AP_BL_NNI_ONLY, father, false);
520                    if (!(this_pars<curr_pars)) {
521                        ap_assert(!(this_pars>curr_pars));
522                        break;
523                    }
524                    curr_pars = this_pars;
525                }
526
527                ap_assert(subtreeEdge->isConnectedTo(father)); // otherwise next command fails
528                AP_tree_nlen *newSubtreeRoot = subtreeEdge->otherNode(father);
529                if (newSubtreeRoot != subtreeRoot) {
530                    op->setSubtreeRoot(newSubtreeRoot);
531                }
532            }
533        }
534        while (curr_pars<prev_pars);
535    }
536}
537
538static void insert_all_species_into_tree(GB_HASH*& hash) {
539    // inserts all species (from hash) into tree
540
541    AP_tree_nlen *tree = rootNode();
542
543    int inTree   = tree ? tree->count_leafs() : 0;
544    int toInsert = GBS_hash_elements(hash);
545
546    ap_assert(toInsert);
547
548    long steps = calc_steps(toInsert, inTree);
549    arb_progress progress(steps);
550
551    // move species to insert to a stack
552    InsertedSpecies speciesToInsert;
553    speciesToInsert.reserve(toInsert);
554
555    if (maxAllowedInsertions(inTree)<toInsert) {
556        // insert longest sequences first
557        GBS_hash_do_sorted_loop(hash, toInserted, sort_sequences_by_length, &speciesToInsert);
558    }
559    else {
560        // insert all sequences (order should not matter)
561        GBS_hash_do_loop(hash, toInserted, &speciesToInsert);
562    }
563    GBS_free_hash(hash);
564    hash = NULp;
565
566    ap_assert(toInsert != 2); // @@@ need to test this case
567
568    InsertSpeciesIterator curr = speciesToInsert.begin();
569    InsertSpeciesIterator end  = speciesToInsert.end();
570
571    AP_tree_edge *oldRootEdge = NULp;
572    if (!tree) { // create initial tree
573        AP_pars_root *troot = ap_main->get_tree_root();
574
575        AP_tree_nlen *s1 = *curr++;
576        AP_tree_nlen *s2 = *curr++;
577
578        s1->initial_insert(s2, troot);
579
580        inTree    = 2;
581        toInsert -= 2;
582
583        ++progress;
584    }
585    else {
586        oldRootEdge = rootEdge();
587    }
588
589    ASSERT_VALID_TREE(rootNode());
590
591    while (1) {
592        int insertNow = calcInsertNow(toInsert, inTree);
593        ap_assert(insertNow<=toInsert);
594        if (insertNow == toInsert) break;
595
596        {
597            InsertSpeciesIterator partEnd = curr;
598            advance(partEnd, insertNow);
599
600            insert_species_into_tree(curr, partEnd, progress);
601            curr = partEnd;
602        }
603
604        toInsert -= insertNow;
605        inTree   += insertNow;
606    }
607
608    insert_species_into_tree(curr, end, progress);
609
610    if (oldRootEdge) oldRootEdge->set_root(); // set root back to old position
611}
612
613enum AddWhat {
614    NT_ADD_MARKED,
615    NT_ADD_SELECTED,
616};
617
618static void nt_add(AWT_graphic_parsimony *agt, AddWhat what, bool quick) {
619    GB_ERROR  error = NULp;
620
621    AP_tree *oldrootleft  = NULp;
622    AP_tree *oldrootright = NULp;
623    {
624        AP_tree_nlen *root = rootNode();
625        if (root) {
626            root->reset_subtree_layout();
627            oldrootleft  = root->get_leftson();
628            oldrootright = root->get_rightson();
629        }
630    }
631
632    GB_HASH *hash    = NULp;
633    GBDATA  *gb_main = agt->get_gbmain();
634    {
635        GB_transaction ta(gb_main);
636        switch (what) {
637            case NT_ADD_SELECTED: {
638                char *name = GBT_readOrCreate_string(gb_main, AWAR_SPECIES_NAME, "");
639                if (name && strlen(name)) {
640                    GBDATA *gb_species = GBT_find_species(gb_main, name);
641                    if (gb_species) {
642                        hash = GBS_create_hash(1, GB_MIND_CASE);
643                        GBS_write_hash(hash, name, (long)gb_species);
644                    }
645                    else error = GBS_global_string("Selected Species (%s) not found", name);
646                }
647                else error = "Please select a species";
648                free(name);
649                break;
650            }
651            case NT_ADD_MARKED: {
652                hash = GBT_create_marked_species_hash(gb_main);
653                break;
654            }
655        }
656    }
657
658    if (!error) {
659        ap_assert(hash);
660
661        arb_progress progress(quick ? "Quick add" : "Add + NNI");
662
663        NT_remove_species_in_tree_from_hash(rootNode(), hash);
664
665        size_t          species_count = GBS_hash_elements(hash);
666        InsertPerfMeter insertPerf("(quick-)add", species_count);
667
668        {
669            GB_transaction ta(gb_main);
670            GBS_hash_do_loop(hash, transform_gbd_to_leaf, NULp);
671        }
672        {
673            size_t skipped = species_count - GBS_hash_elements(hash);
674            if (skipped) {
675                GBT_message(gb_main, GBS_global_string("Skipped %zu species (no data?)", skipped));
676            }
677        }
678        if (GBS_hash_elements(hash)) {
679            insert_all_species_into_tree(hash);
680        }
681        else {
682            GBT_message(gb_main, "No species (left) to insert");
683        }
684
685        if (rootNode()) {
686            if (oldrootleft) {
687                if (oldrootleft->father == oldrootright) oldrootleft->set_root();
688                else                                     oldrootright->set_root();
689            }
690            else {
691                ARB_edge innermost = rootNode()->get_tree_root()->find_innermost_edge();
692                innermost.set_root();
693            }
694
695            if (!quick) {
696                arb_suppress_progress quiet;
697
698                Mutations pars_prev = rootNode()->costs();
699                rootNode()->compute_tree(); // see AP_tree_edge.cxx@flags_broken_by_moveNextTo
700                progress.subtitle("local optimize (repeated NNI)");
701                while (1) {
702                    rootEdge()->nni_rec(EdgeSpec(SKIP_UNMARKED_EDGES|SKIP_LEAF_EDGES), AP_BL_NNI_ONLY, NULp, true);
703                    Mutations pars_curr = rootNode()->costs();
704                    if (pars_curr == pars_prev) break;
705                    ap_assert(pars_curr<pars_prev);
706                    pars_prev          = pars_curr;
707                }
708            }
709
710            {
711                arb_suppress_progress ignore;
712                rootEdge()->calc_branchlengths();
713            }
714
715            ASSERT_VALID_TREE(rootNode());
716            rootNode()->compute_tree();
717        }
718        else {
719            error = "Tree lost (no leafs left)";
720        }
721
722        insertPerf.dump(stdout);
723    }
724
725    if (hash) GBS_free_hash(hash);
726    if (error) aw_message(error);
727
728    // @@@ quick-add w/o NNI should sort according to original tree
729    agt->reorderTree(BIG_BRANCHES_TO_TOP);
730}
731
732// ------------------------------------------
733//      Adding partial sequences to tree
734
735class PartialSequence {
736    GBDATA               *gb_species;
737    mutable AP_tree_nlen *self;                     // self converted to leaf (ready for insertion)
738    const AP_tree_nlen   *best_full_match;          // full sequence position which matched best
739    long                  overlap;                  // size of overlapping region
740    long                  penalty;                  // weighted mismatches
741    bool                  released;
742    bool                  multi_match;
743    string                multi_list;               // list of equal-rated insertion-points (not containing self)
744
745    AP_tree_nlen *get_self() const {
746        if (!self) {
747            ap_assert(!released); // request not possible, because leaf has already been released!
748
749            self = (AP_tree_nlen*)transform_gbd_to_leaf(GBT_get_name_or_description(gb_species), (long)gb_species, NULp);
750            ap_assert(self);
751        }
752        return self;
753    }
754
755public:
756    PartialSequence(GBDATA *gb_species_) :
757        gb_species(gb_species_),
758        self(NULp),
759        best_full_match(NULp),
760        overlap(0),
761        penalty(LONG_MAX),
762        released(false),
763        multi_match(false)
764    {}
765    PartialSequence(const PartialSequence& other)
766        : gb_species(other.gb_species),
767          self(other.self),
768          best_full_match(other.best_full_match),
769          overlap(other.overlap),
770          penalty(other.penalty),
771          released(other.released),
772          multi_match(other.multi_match),
773          multi_list(other.multi_list)
774    {
775        ap_assert(!self); // copying self not implemented
776    }
777    DECLARE_ASSIGNMENT_OPERATOR(PartialSequence);
778    ~PartialSequence() { ap_assert(!self); }
779
780    GBDATA *get_species() const { return gb_species; }
781    const AP_tree_nlen *get_best_match() const { return best_full_match; }
782    AP_FLOAT get_branchlength() const { return AP_FLOAT(penalty)/overlap; }
783    void test_match(const AP_tree_nlen *leaf_full);
784    bool is_multi_match() const { return multi_match; }
785
786    const char *get_name() const {
787        const char *name = get_self()->name;
788        ap_assert(name);
789        return name;
790    }
791
792    string get_multilist() const {
793        ap_assert(is_multi_match());
794        return string(best_full_match->name)+multi_list;
795    }
796
797    AP_tree_nlen *release() {
798        AP_tree_nlen *s = self;
799        self            = NULp;
800        released        = true;
801        return s;
802    }
803
804    void dump(const char *whichMatch) const {
805        ap_assert(best_full_match);
806        printf("%s match for '%s' is '%s' (overlap=%li penalty=%li)\n",
807               whichMatch, get_name(), best_full_match->name,
808               overlap, penalty);
809    }
810
811};
812
813void PartialSequence::test_match(const AP_tree_nlen *leaf_full) {
814    long curr_overlap;
815    long curr_penalty;
816
817    leaf_full->get_seq()->partial_match(get_self()->get_seq(), &curr_overlap, &curr_penalty);
818
819    bool better = false;
820
821    if (curr_overlap > overlap) {
822        better = true;
823    }
824    else if (curr_overlap == overlap) {
825        if (curr_penalty<penalty) {
826            better = true;
827        }
828        else if (curr_penalty == penalty) {
829            // found two equal-rated insertion points -> store data for warning
830#if defined(DEBUG)
831            if (!multi_match) dump("better");
832            printf("Another equal match is against '%s' (overlap=%li penalty=%li)\n", leaf_full->name, curr_overlap, curr_penalty);
833#endif // DEBUG
834
835            multi_match  = true;
836            multi_list.append(1, '/');
837            multi_list.append(leaf_full->name);
838        }
839    }
840
841    if (better) {
842        overlap         = curr_overlap;
843        penalty         = curr_penalty;
844        best_full_match = leaf_full;
845        multi_match     = false;
846        multi_list      = "";
847
848#if defined(DEBUG)
849        dump("better");
850#endif
851    }
852#if defined(DEBUG)
853    else if (!multi_match) {
854        printf("Worse match against '%s' (overlap=%li penalty=%li)\n", leaf_full->name, curr_overlap, curr_penalty);
855    }
856#endif
857}
858
859static GB_ERROR nt_best_partial_match_rec(list<PartialSequence>& partial, const AP_tree_nlen *tree) {
860    GB_ERROR error = NULp;
861
862    if (tree) {
863        if (tree->is_leaf() && tree->name) {
864            if (tree->gb_node) {
865                int is_partial = GBT_is_partial(tree->gb_node, 0, true); // marks undef as 'full sequence'
866                if (is_partial == 0) { // do not consider other partial sequences
867                    list<PartialSequence>::iterator i = partial.begin();
868                    list<PartialSequence>::iterator e = partial.end();
869                    for (;  i != e; ++i) {
870                        i->test_match(tree);
871                    }
872                }
873                else if (is_partial == -1) {
874                    error = GB_await_error();
875                }
876            }
877        }
878        else {
879            error             = nt_best_partial_match_rec(partial, tree->get_leftson());
880            if (!error) error = nt_best_partial_match_rec(partial, tree->get_rightson());
881        }
882    }
883    return error;
884}
885
886static void count_partial_and_full(const AP_tree_nlen *at, int *partial, int *full, int *zombies, int default_value, bool define_if_undef) {
887    if (at->is_leaf()) {
888        if (at->gb_node) {
889            int is_partial = GBT_is_partial(at->gb_node, default_value, define_if_undef);
890            if (is_partial) ++(*partial);
891            else ++(*full);
892        }
893        else {
894            ++(*zombies);
895        }
896    }
897    else {
898        count_partial_and_full(at->get_leftson(),  partial, full, zombies, default_value, define_if_undef);
899        count_partial_and_full(at->get_rightson(), partial, full, zombies, default_value, define_if_undef);
900    }
901}
902
903static const AP_tree_nlen *find_least_deep_leaf(const AP_tree_nlen *at, int depth, int *min_depth) {
904    if (depth >= *min_depth) {
905        return NULp; // already found better or equal
906    }
907
908    if (at->is_leaf()) {
909        if (at->gb_node) {
910            *min_depth = depth;
911            return at;
912        }
913        return NULp;
914    }
915
916    const AP_tree_nlen *left  = find_least_deep_leaf(at->get_leftson(), depth+1, min_depth);
917    const AP_tree_nlen *right = find_least_deep_leaf(at->get_rightson(), depth+1, min_depth);
918
919    return right ? right : left;
920}
921inline AP_tree_nlen *find_least_deep_leaf(AP_tree_nlen *at, int depth, int *min_depth) {
922    return const_cast<AP_tree_nlen*>(find_least_deep_leaf(const_cast<const AP_tree_nlen*>(at), depth, min_depth));
923}
924
925static void push_partial(const char *, long val, void *cd_partial) {
926    list<PartialSequence> *partial = reinterpret_cast<list<PartialSequence> *>(cd_partial);
927    partial->push_back(PartialSequence((GBDATA*)val));
928}
929
930// -------------------------------
931//      Add Partial sequences
932
933static void nt_add_partial(AWT_graphic_parsimony *agt) {
934    GB_ERROR  error   = NULp;
935    GBDATA   *gb_main = agt->get_gbmain();
936
937    GB_begin_transaction(gb_main);
938
939    int full_marked_sequences = 0;
940
941    arb_progress part_add_progress("Adding partial sequences");
942
943    {
944        list<PartialSequence> partial;
945        {
946            GB_HASH *partial_hash = GBS_create_hash(GBT_get_species_count(gb_main), GB_MIND_CASE);
947
948            int marked_found             = 0;
949            int partial_marked_sequences = 0;
950            int no_data                  = 0;      // no data in alignment
951
952            for (GBDATA *gb_marked = GBT_first_marked_species(gb_main);
953                 !error && gb_marked;
954                 gb_marked = GBT_next_marked_species(gb_marked))
955            {
956                ++marked_found;
957
958                if (GBT_find_sequence(gb_marked, ap_main->get_aliname())) { // species has sequence in alignment
959                    const char *name = GBT_get_name_or_description(gb_marked);
960
961                    switch (GBT_is_partial(gb_marked, 1, true)) { // marks undef as 'partial sequence'
962                        case 0: { // full sequences
963                            GBT_message(gb_main, GBS_global_string("'%s' is a full sequence (cannot add partial)", name));
964                            ++full_marked_sequences;
965                            break;
966                        }
967                        case 1:     // partial sequences
968                            ++partial_marked_sequences;
969                            GBS_write_hash(partial_hash, name, (long)gb_marked);
970                            break;
971                        case -1:    // error
972                            error = GB_await_error();
973                            break;
974                        default:
975                            ap_assert(0);
976                            break;
977                    }
978                }
979                else {
980                    no_data++;
981                }
982            }
983
984            if (!error && !marked_found) error = "There are no marked species";
985
986            if (!error) {
987                NT_remove_species_in_tree_from_hash(rootNode(), partial_hash); // skip all species which are in tree
988                GBS_hash_do_const_loop(partial_hash, push_partial, &partial);  // build partial list from hash
989
990                int partials_already_in_tree = partial_marked_sequences - partial.size();
991
992                if (no_data>0)                  GBT_message(gb_main, GBS_global_string("%i marked species have no data in '%s'",        no_data, ap_main->get_aliname()));
993                if (full_marked_sequences>0)    GBT_message(gb_main, GBS_global_string("%i marked species are declared full sequences", full_marked_sequences));
994                if (partials_already_in_tree>0) GBT_message(gb_main, GBS_global_string("%i marked species are already in tree",         partials_already_in_tree));
995
996                if (partial.empty()) error = "No species left to add";
997            }
998
999            GBS_free_hash(partial_hash);
1000        }
1001
1002        if (!error) error = GBT_add_new_species_changekey(gb_main, "ARB_partial", GB_INT);
1003
1004        if (!error) {
1005            rootNode()->reset_subtree_layout();
1006
1007            // find best matching full sequence for each partial sequence
1008            error = nt_best_partial_match_rec(partial, rootNode());
1009
1010            list<PartialSequence>::iterator i = partial.begin();
1011            list<PartialSequence>::iterator e = partial.end();
1012
1013            arb_progress part_insert_progress(partial.size());
1014
1015#if defined(DEBUG)
1016            // show results :
1017            for (; i != e; ++i) i->dump("best");
1018            i = partial.begin();
1019#endif // DEBUG
1020
1021            for (; i != e && !error; ++i) {
1022                const char *name = i->get_name();
1023
1024                if (i->is_multi_match()) {
1025                    GBT_message(gb_main, GBS_global_string("Insertion of '%s' is ambiguous.\n"
1026                                                                  "(took first of equal scored insertion points: %s)",
1027                                                                  name, i->get_multilist().c_str()));
1028                }
1029
1030                AP_tree_nlen *part_leaf  = i->release();
1031                AP_tree_nlen *full_seq   = const_cast<AP_tree_nlen*>(i->get_best_match());
1032                AP_tree_nlen *brother    = full_seq->get_brother();
1033                int           is_partial = 0;
1034                AP_tree_nlen *target     = NULp;
1035
1036                if (brother->is_leaf()) {
1037                    if (brother->gb_node) {
1038                        is_partial = GBT_is_partial(brother->gb_node, 0, true);
1039
1040                        if (is_partial) { // brother is partial sequence
1041                            target = brother; // insert as brother of brother
1042                        }
1043                        else {
1044                            target = full_seq; // insert as brother of full_seq
1045                        }
1046                    }
1047                    else {
1048                        error = "There are zombies in your tree - please remove them";
1049                    }
1050                }
1051                else {
1052                    int partial_count = 0;
1053                    int full_count    = 0;
1054                    int zombie_count  = 0;
1055
1056                    count_partial_and_full(brother, &partial_count, &full_count, &zombie_count, 0, true);
1057
1058                    if (zombie_count) {
1059                        error = "There are zombies in your tree - please remove them";
1060                    }
1061                    else if (full_count) {
1062                        // brother is a subtree containing full sequences
1063                        // -> add new brother to full_seq found above
1064                        target = full_seq;
1065                    }
1066                    else {      // brother subtree only contains partial sequences
1067                        // find one of the least-deep leafs
1068                        int depth  = INT_MAX;
1069                        target     = find_least_deep_leaf(brother, 0, &depth);
1070                        is_partial = 1;
1071                    }
1072                }
1073
1074
1075                if (!error) {
1076#if defined(DEBUG)
1077                    printf("inserting '%s'\n", name);
1078#endif // DEBUG
1079                    part_leaf->insert(target);
1080
1081                    // we need to create the sequence of the father node!
1082                    AP_tree_nlen *father = part_leaf->get_father();
1083                    father->costs();
1084
1085                    // ensure full-sequence is always on top
1086                    if (father->rightson == target) {
1087                        father->swap_sons();
1088                    }
1089
1090                    if (!error) { // now correct the branch lengths modified by insert()
1091                        // calc the original branchlen (of target leaf branch)
1092                        GBT_LEN orglen = father->get_branchlength()+target->get_branchlength();
1093
1094                        if (is_partial) { // we have a subtree of partial sequences
1095                            target->set_branchlength(orglen); // restore original branchlength
1096                            father->set_branchlength(0); // all father branches are zero length
1097                        }
1098                        else { // we have a subtree of one full+one partial sequence
1099                            ap_assert(full_seq->get_father() == father);
1100
1101                            father->set_branchlength(orglen); // father branch represents original length (w/o partial seq)
1102                            full_seq->set_branchlength(0);    // full seq has no sub-branch length
1103                        }
1104                        part_leaf->set_branchlength(i->get_branchlength());
1105                        printf("Adding with branchlength=%f\n", i->get_branchlength());
1106                    }
1107                }
1108                else {
1109                    destroy(part_leaf);
1110                }
1111
1112                part_insert_progress.inc_and_check_user_abort(error);
1113            }
1114        }
1115    }
1116
1117    if (full_marked_sequences) {
1118        GBT_message(gb_main, GBS_global_string("%i marked full sequences were not added", full_marked_sequences));
1119    }
1120
1121    if (error) {
1122        GBT_message(gb_main, error);
1123        GB_abort_transaction(gb_main);
1124    }
1125    else {
1126        GB_commit_transaction(gb_main);
1127        agt->exports.request_save();
1128    }
1129}
1130
1131static void NT_add_partial_and_update(UNFIXED, TREE_canvas *ntw) {
1132    AWT_auto_refresh allowed_on(ntw);
1133    nt_add_partial(AWT_TREE_PARS(ntw));
1134}
1135
1136// -------------------------------
1137//      add marked / selected
1138
1139static void nt_add_and_update(AWT_canvas *ntw, AddWhat what, bool quick) {
1140    AWT_auto_refresh allowed_on(ntw);
1141    nt_add(AWT_TREE_PARS(ntw), what, quick);
1142}
1143
1144static void NT_add_and_NNI(UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_add_and_update(ntw, what, false); }
1145static void NT_add_quick  (UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_add_and_update(ntw, what, true);  }
1146
1147// ------------------------------------------
1148//      remove and add marked / selected
1149
1150static void nt_reAdd(AWT_graphic_parsimony *agt, AddWhat what, bool quick) {
1151    if (agt->get_root_node()) {
1152        ap_assert(what == NT_ADD_MARKED); // code below will misbehave for NT_ADD_SELECTED
1153        agt->get_tree_root()->remove_leafs(AWT_REMOVE_MARKED);
1154        nt_add(agt, what, quick);
1155    }
1156}
1157
1158static void nt_reAdd_and_update(AWT_canvas *ntw, AddWhat what, bool quick) {
1159    AWT_auto_refresh allowed_on(ntw);
1160    nt_reAdd(AWT_TREE_PARS(ntw), what, quick);
1161}
1162
1163static void NT_reAdd_and_NNI(UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_reAdd_and_update(ntw, what, false); }
1164static void NT_reAdd_quick  (UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_reAdd_and_update(ntw, what, true);  }
1165
1166// --------------------------------------------------------------------------------
1167
1168static void calc_branchlengths_and_reorder(AWT_graphic_parsimony *agt) {
1169    arb_progress progress("Calculating branchlengths");
1170    rootEdge()->calc_branchlengths();
1171    agt->reorderTree(BIG_BRANCHES_TO_TOP);
1172}
1173
1174static void NT_calc_branchlengths_reorder_and_update(AW_window *, TREE_canvas *ntw) {
1175    AWT_auto_refresh allowed_on(ntw);
1176    calc_branchlengths_and_reorder(AWT_TREE_PARS(ntw));
1177}
1178
1179static void NT_bootstrap(AW_window *, TREE_canvas *ntw, bool limit_only) {
1180    arb_progress     progress("Calculating bootstrap limit");
1181    AWT_auto_refresh allowed_on(ntw);
1182    AP_BL_MODE       mode = AP_BL_MODE((limit_only ? AP_BL_BOOTSTRAP_LIMIT : AP_BL_BOOTSTRAP_ESTIMATE)|AP_BL_BL_ONLY);
1183
1184    rootEdge()->nni_rec(ANY_EDGE, mode, NULp, true);
1185    AWT_graphic_tree *agt = AWT_TREE(ntw);
1186    agt->reorderTree(BIG_BRANCHES_TO_TOP);
1187    agt->set_logical_root_to(agt->get_root_node());
1188}
1189
1190static void optimizeTree(AWT_graphic_parsimony *agt, const KL_Settings& settings) {
1191    arb_progress progress("Optimizing tree");
1192    agt->get_parsimony().optimize_tree(rootNode(), settings, progress);
1193    ASSERT_VALID_TREE(rootNode());
1194    calc_branchlengths_and_reorder(agt);
1195}
1196static void NT_optimize(AW_window *, TREE_canvas *ntw) {
1197    AWT_auto_refresh allowed_on(ntw);
1198    optimizeTree(AWT_TREE_PARS(ntw), KL_Settings(ntw->awr));
1199}
1200
1201static void recursiveNNI(AWT_graphic_parsimony *agt, EdgeSpec whichEdges) {
1202    arb_progress progress("Recursive NNI");
1203    Mutations    orgPars  = rootNode()->costs();
1204    Mutations    prevPars = orgPars;
1205    progress.subtitle(GBS_global_string("best=%li", orgPars));
1206
1207    {
1208        arb_suppress_progress quiet;
1209
1210        while (!progress.aborted()) {
1211            Mutations currPars = rootEdge()->nni_rec(whichEdges, AP_BL_NNI_ONLY, NULp, true);
1212            if (currPars == prevPars) break; // no improvement -> abort
1213            progress.subtitle(GBS_global_string("best=%li (gain=%li)", currPars, orgPars-currPars));
1214            prevPars          = currPars;
1215        }
1216        calc_branchlengths_and_reorder(agt);
1217    }
1218}
1219
1220static void NT_recursiveNNI(AW_window *, TREE_canvas *ntw) {
1221    AWT_auto_refresh allowed_on(ntw);
1222    EdgeSpec whichEdges = KL_Settings(ntw->awr).whichEdges;
1223    recursiveNNI(AWT_TREE_PARS(ntw), whichEdges);
1224}
1225
1226static int calculate_default_random_repeat(long leafs) {
1227    double balanced_depth = log10(leafs) / log10(2);
1228    int    repeat         = int(balanced_depth*2.0 + .5);
1229    if (repeat<1) repeat  = 1;
1230    return repeat;
1231}
1232
1233static void update_random_repeat(AW_root *awr, AWT_graphic_parsimony *agt) {
1234    long leafs  = agt->get_root_node()->count_leafs();
1235    int  repeat = calculate_default_random_repeat(leafs);
1236    awr->awar(AWAR_RAND_REPEAT)->write_int(repeat);
1237}
1238
1239static void mixtree_and_calclengths(AWT_graphic_parsimony *agt, int repeat, int percent, EdgeSpec whichEdges) {
1240    double       allBranchProbability = double(repeat)*percent/100.0; // = ~1.0 if each branch is mixed once
1241    double       phase1_weight        = 1.0 - 1.0/(30.0 * allBranchProbability);
1242    arb_progress progress(WEIGHTED, "Randomizing tree", phase1_weight);
1243
1244    progress.subtitle("mixing");
1245    rootEdge()->mixTree(repeat, percent, whichEdges);
1246    ++progress;
1247
1248    progress.subtitle("calculating branchlengths");
1249    rootEdge()->calc_branchlengths();
1250    ++progress;
1251
1252    agt->exports.request_save();
1253}
1254
1255static void randomMixTree(AW_window *aww, TREE_canvas *ntw) {
1256    AWT_auto_refresh  allowed_on(ntw);
1257    AW_root          *awr = aww->get_root();
1258
1259    mixtree_and_calclengths(AWT_TREE_PARS(ntw), awr->awar(AWAR_RAND_REPEAT)->read_int(), awr->awar(AWAR_RAND_PERCENT)->read_int(), KL_Settings(awr).whichEdges);
1260    {
1261        ARB_edge newRootEdge = rootNode()->get_tree_root()->find_innermost_edge();
1262        newRootEdge.son()->set_root();
1263    }
1264    AWT_TREE_PARS(ntw)->reorderTree(BIG_BRANCHES_TO_TOP);
1265}
1266
1267
1268static AWT_config_mapping_def optimizer_config_mapping[] = {
1269    { AWAR_OPTI_MARKED_ONLY, "marked_only" },
1270    { AWAR_OPTI_SKIP_FOLDED, "skip_folded" },
1271
1272    // { AWAR_RAND_REPEAT,     "rand_repeat" }, // do not store (use treesize-dependent default)
1273    { AWAR_RAND_PERCENT, "rand_percent" },
1274
1275    { AWAR_KL_MAXDEPTH, "maxdepth" },
1276    { AWAR_KL_INCDEPTH, "incdepth" },
1277
1278    { AWAR_KL_STATIC_ENABLED, "static" },
1279    { AWAR_KL_STATIC_DEPTH1,  "s_depth1" },
1280    { AWAR_KL_STATIC_DEPTH2,  "s_depth2" },
1281    { AWAR_KL_STATIC_DEPTH3,  "s_depth3" },
1282    { AWAR_KL_STATIC_DEPTH4,  "s_depth4" },
1283    { AWAR_KL_STATIC_DEPTH5,  "s_depth5" },
1284
1285    { AWAR_KL_DYNAMIC_ENABLED, "dynamic" },
1286    { AWAR_KL_DYNAMIC_START,   "start" },
1287    { AWAR_KL_DYNAMIC_MAXX,    "maxx" },
1288    { AWAR_KL_DYNAMIC_MAXY,    "maxy" },
1289
1290    { NULp, NULp }
1291};
1292
1293static AWT_predefined_config optimizer_predefined_configs[] = {
1294    {
1295        "*minimum_static_reduction",
1296        "Sets paths allowed by static reduction to maximum\n(causing the minimal reduction)",
1297        "s_depth1='8';s_depth2='6';s_depth3='6';s_depth4='6';s_depth5='6';static='1'" // only defines/affects settings related to static path reduction
1298    },
1299    {
1300        "*whole_tree_level8",
1301        "Level-8-optimization of whole tree\n(no path reduction)",
1302        "dynamic='0';incdepth='0';marked_only='0';maxdepth='8';skip_folded='0';static='0'"
1303    },
1304    { NULp, NULp, NULp }
1305};
1306
1307static AW_window *createOptimizeWindow(AW_root *aw_root, TREE_canvas *ntw) {
1308    AW_window_simple *aws = new AW_window_simple;
1309    aws->init(aw_root, "TREE_OPTIMIZE", "Tree optimization");
1310    aws->load_xfig("pars/tree_opti.fig");
1311
1312    aws->at("close");
1313    aws->callback(AW_POPDOWN);
1314    aws->create_button("CLOSE", "CLOSE", "C");
1315
1316    aws->at("help");
1317    aws->callback(makeHelpCallback("pa_optimizer.hlp"));
1318    aws->create_button("HELP", "HELP", "H");
1319
1320    aws->at("marked");
1321    aws->label("Only subtrees containing marked species");
1322    aws->create_toggle(AWAR_OPTI_MARKED_ONLY);
1323
1324    aws->at("folded");
1325    aws->label("Do not modify folded subtrees");
1326    aws->create_toggle(AWAR_OPTI_SKIP_FOLDED);
1327
1328    aws->button_length(18);
1329
1330    aws->at("rec_nni");
1331    aws->callback(makeWindowCallback(NT_recursiveNNI, ntw));
1332    aws->create_button("REC_NNI", "Recursive NNI", "N");
1333
1334    aws->at("heuristic");
1335    aws->callback(makeWindowCallback(NT_optimize, ntw));
1336    aws->create_button("HEURISTIC", "Heuristic\noptimizer", "H");
1337
1338    aws->at("config");
1339    AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, "treeopti", optimizer_config_mapping, NULp, optimizer_predefined_configs);
1340
1341    aws->at("settings");
1342    aws->callback(makeCreateWindowCallback(create_kernighan_properties_window));
1343    aws->create_button("SETTINGS", "Settings", "S");
1344
1345    aws->at("randomize");
1346    aws->callback(makeWindowCallback(randomMixTree, ntw));
1347    aws->create_button("RANDOMIZE", "Randomize tree", "R");
1348
1349    aws->button_length(5);
1350
1351    aws->at("repeat");   aws->create_input_field(AWAR_RAND_REPEAT);
1352    aws->at("percent");  aws->create_input_field(AWAR_RAND_PERCENT);
1353
1354    return aws;
1355}
1356
1357// -----------------------
1358//      test functions
1359
1360#if defined(TESTMENU)
1361static void refreshTree(AWT_canvas *ntw) {
1362    GB_transaction ta(ntw->gb_main);
1363    AWT_auto_refresh allowed_on(ntw);
1364    ntw->request_save_and_zoom_reset();
1365}
1366
1367static void setBranchlens(AP_tree_nlen *node, double newLen) {
1368    node->setBranchlen(newLen, newLen);
1369
1370    if (!node->is_leaf()) {
1371        setBranchlens(node->get_leftson(), newLen);
1372        setBranchlens(node->get_rightson(), newLen);
1373    }
1374}
1375
1376static void TESTMENU_setBranchlen(AW_window *, AWT_canvas *ntw) {
1377    AP_tree_nlen *root = rootNode();
1378
1379    setBranchlens(root, 1.0);
1380    refreshTree(ntw);
1381}
1382
1383static void TESTMENU_treeStats(AW_window *) {
1384    ARB_tree_info tinfo;
1385    AP_tree_nlen *root = rootNode();
1386
1387    if (root) {
1388        {
1389            GB_transaction ta(root->get_tree_root()->get_gb_main());
1390            root->calcTreeInfo(tinfo);
1391        }
1392
1393        puts("Tree stats:");
1394
1395        printf("nodes      =%6zu\n", tinfo.nodes());
1396        printf(" inner     =%6zu\n", tinfo.innerNodes);
1397        printf("  groups   =%6zu\n", tinfo.groups);
1398        printf(" leafs     =%6zu\n", tinfo.leafs);
1399        printf("  unlinked =%6zu (zombies?)\n", tinfo.unlinked);
1400        printf("  linked   =%6zu\n", tinfo.linked());
1401        printf("   marked  =%6zu\n", tinfo.marked);
1402    }
1403    else {
1404        puts("No tree");
1405    }
1406}
1407
1408static void TESTMENU_sortTreeByName(AW_window *, AWT_canvas *ntw) {
1409    AP_tree_nlen *root = rootNode();
1410
1411    root->sortByName();
1412    refreshTree(ntw);
1413}
1414
1415static void init_TEST_menu(AW_window_menu_modes *awm, AWT_canvas *ntw) {
1416    awm->create_menu("Test[debug]", "g", AWM_ALL);
1417
1418    awm->insert_menu_topic("treestat",        "Tree statistics",    "s", "", AWM_ALL, TESTMENU_treeStats);
1419    awm->insert_menu_topic("setlens",         "Set branchlens",     "b", "", AWM_ALL, makeWindowCallback(TESTMENU_setBranchlen, ntw));
1420    awm->insert_menu_topic("sorttreebyname",  "Sort tree by name",  "o", "", AWM_ALL, makeWindowCallback(TESTMENU_sortTreeByName, ntw));
1421}
1422#endif // TESTMENU
1423
1424static GB_ERROR pars_check_size(AW_root *awr, GB_ERROR& warning, const adfiltercbstruct *filterDef) {
1425    GB_ERROR error = NULp;
1426    warning        = NULp;
1427
1428    char *tree_name = awr->awar(AWAR_TREE)->read_string();
1429    char *filter    = awr->awar(filterDef->def_filter)->read_string();
1430    long  ali_len   = 0;
1431
1432    if (strlen(filter)) {
1433        int i;
1434        for (i=0; filter[i]; i++) {
1435            if (filter[i] != '0') ali_len++;
1436        }
1437    }
1438    else {
1439        char *ali_name = awr->awar(AWAR_ALIGNMENT)->read_string();
1440        ali_len        = GBT_get_alignment_len(ap_main->get_gb_main(), ali_name);
1441        if (ali_len<=0) {
1442            error = "Please select a valid alignment";
1443            GB_clear_error();
1444        }
1445        free(ali_name);
1446    }
1447
1448    if (!error) {
1449        long tree_size = GBT_size_of_tree(ap_main->get_gb_main(), tree_name);
1450        if (tree_size == -1) {
1451            error = "Please select an existing tree";
1452        }
1453        else {
1454            size_t expected_memuse = (ali_len * tree_size * 4 / 1024);
1455            if (expected_memuse > GB_get_usable_memory()) {
1456                warning = GBS_global_string("Estimated memory usage (%s) exceeds physical memory (will swap)\n"
1457                                            "(did you specify a filter?)",
1458                                            GBS_readable_size(expected_memuse, "b"));
1459            }
1460        }
1461    }
1462
1463    free(filter);
1464    free(tree_name);
1465
1466    ap_assert(!GB_have_error());
1467    return error;
1468}
1469
1470static void pars_reset_optimal_parsimony(AW_window *aww) {
1471    AW_root *awr = aww->get_root();
1472    awr->awar(AWAR_BEST_PARSIMONY)->write_int(awr->awar(AWAR_PARSIMONY)->read_int());
1473}
1474
1475class LowDataCheck {
1476    int leafs; // counts leafs with insufficiant data
1477    int inner; // same for inner nodes
1478
1479public:
1480    LowDataCheck() : leafs(0), inner(0) {}
1481
1482    void count(AP_tree_nlen *node);
1483
1484    int get_leafs() const { return leafs; }
1485    int get_inner() const { return inner; }
1486};
1487
1488void LowDataCheck::count(AP_tree_nlen *node) {
1489    const AP_combinableSeq *seq   = node->get_seq();
1490    AP_FLOAT                bases = seq->weighted_base_count();
1491
1492    if (node->is_leaf()) {
1493        if (bases<MIN_SEQUENCE_LENGTH) ++leafs;
1494    }
1495    else {
1496        if (bases<MIN_SEQUENCE_LENGTH) ++inner;
1497
1498        count(node->get_leftson());
1499        count(node->get_rightson());
1500    }
1501}
1502
1503static void PARS_infomode_cb(UNFIXED, TREE_canvas *canvas, AWT_COMMAND_MODE mode) {
1504    AWT_trigger_remote_action(NULp, canvas->gb_main, "ARB_NT:species_info");
1505    nt_mode_event(NULp, canvas, mode);
1506}
1507
1508static void pars_start_cb(AW_window *aw_parent, WeightedFilter *wfilt, const PARS_commands *cmds) {
1509    ModRLimit increase_stacksize(RLIMIT_STACK, TREEDISP_STACKSIZE);
1510
1511    AW_root *awr     = aw_parent->get_root();
1512    GBDATA  *gb_main = ap_main->get_gb_main();
1513    GB_begin_transaction(gb_main);
1514    {
1515        GB_ERROR warning;
1516        GB_ERROR error = pars_check_size(awr, warning, wfilt->get_adfiltercbstruct());
1517
1518        if (warning && !error) {
1519            char *question = GBS_global_string_copy("%s\nDo you want to continue?", warning);
1520            bool  cont     = aw_ask_sure("swap_warning", question);
1521            free(question);
1522
1523            if (!cont) error = "User abort";
1524
1525        }
1526
1527        if (!error) {
1528            // freeze value of GBT_get_default_alignment to 'ali_name':
1529            const char *ali_name = awr->awar(AWAR_ALIGNMENT)->read_char_pntr();
1530            error = GBT_set_startup_alignment(ap_main->get_gb_main(), ali_name);
1531        }
1532
1533        if (error) {
1534            aw_message(error);
1535            GB_commit_transaction(gb_main);
1536            return;
1537        }
1538    }
1539
1540
1541    AW_window_menu_modes *awm = new AW_window_menu_modes;
1542    awm->init(awr, "ARB_PARSIMONY", "ARB_PARSIMONY", 400, 200);
1543
1544    GLOBAL_PARS->generate_tree(wfilt);
1545
1546    TREE_canvas *ntw;
1547    {
1548        AP_tree_display_style  prev_style = global_tree()->get_tree_style();
1549        global_tree()->set_tree_style(AP_LIST_SIMPLE, NULp); // avoid NDS warnings during startup
1550        ntw = new TREE_canvas(gb_main, awm, awm->get_window_id(), global_tree(), awr->awar(AWAR_TREE));
1551        global_tree()->set_tree_style(prev_style, ntw);
1552    }
1553
1554    {
1555        GB_ERROR error = NULp;
1556        arb_progress progress("loading tree");
1557        NT_reload_tree_event(awr, ntw, false);             // load tree (but do not expose - first zombies need to be removed)
1558        if (!global_tree()->get_root_node()) {
1559            error = "Failed to load the selected tree";
1560        }
1561        else {
1562            AP_tree_edge::initialize(rootNode());   // builds edges
1563            long removed = global_tree_root()->remove_leafs(AWT_REMOVE_ZOMBIES);
1564
1565            PARS_tree_init(global_tree());
1566            removed += global_tree_root()->remove_leafs(AWT_RemoveType(AWT_REMOVE_ZOMBIES | AWT_REMOVE_NO_SEQUENCE));
1567
1568            if (!global_tree()->get_root_node()) {
1569                const char *aliname = global_tree_root()->get_aliview()->get_aliname();
1570                error               = GBS_global_string("Less than 2 species contain data in '%s'\n"
1571                                                        "Tree vanished", aliname);
1572            }
1573            else if (removed) {
1574                aw_message(GBS_global_string("Removed %li leafs (zombies or species w/o data in alignment)", removed));
1575            }
1576
1577            error = GB_end_transaction(ntw->gb_main, error);
1578            if (!error) {
1579                progress.subtitle("Calculating inner nodes");
1580                GLOBAL_PARS->get_root_node()->costs();
1581
1582                progress.subtitle("Checking amount of data");
1583                LowDataCheck lowData;
1584                lowData.count(GLOBAL_PARS->get_root_node());
1585
1586                bool warned = false;
1587                if (lowData.get_inner()>0) {
1588                    aw_message(GBS_global_string("Inner nodes with insufficient data: %i", lowData.get_inner()));
1589                    warned = true;
1590                }
1591                if (lowData.get_leafs()>0) {
1592                    aw_message(GBS_global_string("Species with insufficient data: %i", lowData.get_leafs()));
1593                    warned = true;
1594                }
1595                if (warned) {
1596                    aw_message("Warning: low sequence data (<" stringize_pscan(MIN_SEQUENCE_LENGTH) " bp) detected! (filter too restrictive?)");
1597                }
1598            }
1599        }
1600        if (error) aw_popup_exit(error);
1601    }
1602
1603    if (cmds->add_marked)           NT_add_quick(NULp, ntw, NT_ADD_MARKED);
1604    if (cmds->add_selected)         NT_add_quick(NULp, ntw, NT_ADD_SELECTED);
1605    if (cmds->calc_branch_lengths)  NT_calc_branchlengths_reorder_and_update(awm, ntw);
1606    if (cmds->calc_bootstrap)       NT_bootstrap(awm, ntw, 0);
1607    if (cmds->quit)                 pars_exit(awm);
1608
1609    GB_transaction ta(ntw->gb_main);
1610
1611#if defined(DEBUG)
1612    AWT_create_debug_menu(awm);
1613#endif // DEBUG
1614
1615    awm->create_menu("File", "F", AWM_ALL);
1616    {
1617        insert_macro_menu_entry(awm, false);
1618        awm->insert_menu_topic("print_tree", "Print Tree ...", "P", "tree2prt.hlp", AWM_ALL, makeWindowCallback(AWT_popup_print_window, static_cast<AWT_canvas*>(ntw)));
1619        awm->insert_menu_topic("quit",       "Quit",           "Q", "quit.hlp",     AWM_ALL, pars_exit);
1620    }
1621
1622    awm->create_menu("Species", "S", AWM_ALL);
1623    {
1624        NT_insert_mark_submenus(awm, ntw, 0);
1625
1626    }
1627    awm->create_menu("Tree", "T", AWM_ALL);
1628    {
1629
1630        awm->insert_menu_topic("nds",       "NDS (Node Display Setup) ...",      "N", "props_nds.hlp",   AWM_ALL, makeCreateWindowCallback(NDS_create_window, ntw->gb_main));
1631
1632        awm->sep______________();
1633        awm->insert_menu_topic("tree_print",  "Print tree ...",          "P", "tree2prt.hlp",  AWM_ALL, makeWindowCallback(AWT_popup_print_window,       static_cast<AWT_canvas*>(ntw)));
1634        awm->insert_menu_topic("tree_2_xfig", "Export tree to XFIG ...", "F", "tree2file.hlp", AWM_ALL, makeWindowCallback(AWT_popup_tree_export_window, static_cast<AWT_canvas*>(ntw)));
1635        awm->sep______________();
1636        NT_insert_collapse_submenu(awm, ntw);
1637        awm->sep______________();
1638        awm->insert_sub_menu("Remove Species from Tree",     "R");
1639        {
1640            awm->insert_menu_topic("tree_remove_deleted", "Remove Zombies", "Z", "trm_del.hlp",    AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_REMOVE_ZOMBIES));
1641            awm->insert_menu_topic("tree_remove_marked",  "Remove Marked",  "M", "trm_mrkd.hlp",   AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_REMOVE_MARKED));
1642            awm->insert_menu_topic("tree_keep_marked",    "Keep Marked",    "K", "tkeep_mrkd.hlp", AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_KEEP_MARKED));
1643        }
1644        awm->close_sub_menu();
1645        awm->insert_sub_menu("Add Species to Tree",      "A");
1646        {
1647            awm->insert_menu_topic("add_marked",         "Add Marked Species",                              "M", "pa_quick.hlp",   AWM_ALL, makeWindowCallback(NT_add_quick,     ntw, NT_ADD_MARKED));
1648            awm->insert_menu_topic("add_marked_nni",     "Add Marked Species + Local Optimization (NNI)",   "N", "pa_add.hlp",     AWM_ALL, makeWindowCallback(NT_add_and_NNI,   ntw, NT_ADD_MARKED));
1649            awm->insert_menu_topic("rm_add_marked",      "Remove & Add Marked Species",                     "R", "pa_quick.hlp",   AWM_ALL, makeWindowCallback(NT_reAdd_quick,   ntw, NT_ADD_MARKED));
1650            awm->insert_menu_topic("rm_add_marked_nni|", "Remove & Add Marked + Local Optimization (NNI)",  "L", "pa_add.hlp",     AWM_ALL, makeWindowCallback(NT_reAdd_and_NNI, ntw, NT_ADD_MARKED));
1651            awm->sep______________();
1652            awm->insert_menu_topic("add_marked_partial", "Add Marked Partial Species",                      "P", "pa_partial.hlp", AWM_ALL, makeWindowCallback(NT_add_partial_and_update, ntw));
1653            awm->sep______________();
1654            awm->insert_menu_topic("add_selected",       "Add Selected Species",                            "S", "pa_quick.hlp",   AWM_ALL, makeWindowCallback(NT_add_quick,     ntw, NT_ADD_SELECTED));
1655            awm->insert_menu_topic("add_selected_nni",   "Add Selected Species + Local Optimization (NNI)", "O", "pa_add.hlp",     AWM_ALL, makeWindowCallback(NT_add_and_NNI,   ntw, NT_ADD_SELECTED));
1656        }
1657        awm->close_sub_menu();
1658        awm->sep______________();
1659        awm->insert_menu_topic("optimize", "Tree Optimization ...",   "O", "pa_optimizer.hlp", AWM_ALL, makeCreateWindowCallback(createOptimizeWindow, ntw));
1660        awm->insert_menu_topic("reset",    "Reset optimal parsimony", "s", "pa_reset.hlp",     AWM_ALL, pars_reset_optimal_parsimony);
1661        awm->sep______________();
1662        awm->insert_menu_topic("beautify_tree",       "Beautify Tree",            "B", "resorttree.hlp",       AWM_ALL, makeWindowCallback(NT_resort_tree_cb, ntw, BIG_BRANCHES_TO_TOP));
1663        awm->insert_menu_topic("calc_branch_lengths", "Calculate Branch Lengths", "L", "pa_branchlengths.hlp", AWM_ALL, makeWindowCallback(NT_calc_branchlengths_reorder_and_update, ntw));
1664        awm->sep______________();
1665        awm->insert_menu_topic("calc_upper_bootstrap_indep", "Calculate Upper Bootstrap Limit (dependent NNI)",   "U", "pa_bootstrap.hlp", AWM_ALL, makeWindowCallback(NT_bootstrap,        ntw, false));
1666        awm->insert_menu_topic("calc_upper_bootstrap_dep",   "Calculate Upper Bootstrap Limit (independent NNI)", "i", "pa_bootstrap.hlp", AWM_ALL, makeWindowCallback(NT_bootstrap,        ntw, true));
1667        awm->insert_menu_topic("tree_remove_remark",         "Remove bootstrap values",                           "v", "trm_boot.hlp",     AWM_ALL, makeWindowCallback(NT_remove_bootstrap, ntw));
1668    }
1669
1670#if defined(TESTMENU)
1671    init_TEST_menu(awm, ntw);
1672#endif // TESTMENU
1673
1674    awm->create_menu("Reset", "R", AWM_ALL);
1675    {
1676        awm->insert_menu_topic("reset_logical_zoom",  "Logical Zoom",  "L", "rst_log_zoom.hlp",  AWM_ALL, makeWindowCallback(NT_reset_lzoom_cb, ntw));
1677        awm->insert_menu_topic("reset_physical_zoom", "Physical Zoom", "P", "rst_phys_zoom.hlp", AWM_ALL, makeWindowCallback(NT_reset_pzoom_cb, ntw));
1678    }
1679
1680    awm->create_menu("Properties", "P", AWM_ALL);
1681    {
1682        awm->insert_menu_topic("props_menu",  "Frame settings ...",      "F", "props_frame.hlp",      AWM_ALL, AW_preset_window);
1683        awm->insert_menu_topic("props_tree2", "Tree options",            "o", "nt_tree_settings.hlp", AWM_ALL, TREE_create_settings_window);
1684        awm->insert_menu_topic("props_tree",  "Tree colors & fonts",     "c", "color_props.hlp",      AWM_ALL, makeCreateWindowCallback(AW_create_gc_window, ntw->gc_manager));
1685        awm->insert_menu_topic("props_kl",    "Optimizer settings (KL)", "K", "kernlin.hlp",          AWM_ALL, makeCreateWindowCallback(create_kernighan_properties_window));
1686        awm->sep______________();
1687        AW_insert_common_property_menu_entries(awm);
1688        awm->sep______________();
1689        awm->insert_menu_topic("save_props", "Save Defaults (pars.arb)", "D", "savedef.hlp", AWM_ALL, AW_save_properties);
1690    }
1691    awm->button_length(5);
1692
1693    awm->insert_help_topic("ARB_PARSIMONY help", "P", "arb_pars.hlp", AWM_ALL, makeHelpCallback("arb_pars.hlp"));
1694
1695    // ----------------------
1696    //      mode buttons
1697    //
1698    // keep them synchronized as far as possible with those in ARB_PARSIMONY
1699    // see ../NTREE/NT_extern.cxx@keepModesSynchronized
1700
1701    awm->create_mode("mode_select.xpm", "mode_select.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SELECT));
1702    awm->create_mode("mode_mark.xpm",   "mode_mark.hlp",   AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_MARK));
1703    awm->create_mode("mode_group.xpm",  "mode_group.hlp",  AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_GROUP));
1704    awm->create_mode("mode_zoom.xpm",   "mode_pzoom.hlp",  AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_ZOOM));
1705    awm->create_mode("mode_lzoom.xpm",  "mode_lzoom.hlp",  AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_LZOOM));
1706
1707    awm->create_mode("mode_info.xpm",   "mode_info.hlp",   AWM_ALL, makeWindowCallback(PARS_infomode_cb, ntw, AWT_MODE_INFO));
1708    // reserve mode-locations (to put the modes below at the same position as in ARB_NT)
1709    awm->create_mode("mode_empty.xpm", "mode.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_EMPTY));
1710
1711    // topology-modification-modes
1712    awm->create_mode("mode_setroot.xpm", "mode_setroot.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SETROOT));
1713    awm->create_mode("mode_swap.xpm",    "mode_swap.hlp",    AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SWAP));
1714    awm->create_mode("mode_move.xpm",    "mode_move.hlp",    AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_MOVE));
1715
1716    awm->create_mode("mode_nni.xpm",      "mode_nni.hlp",      AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_NNI));
1717    awm->create_mode("mode_kernlin.xpm",  "mode_kernlin.hlp",  AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_KERNINGHAN));
1718    awm->create_mode("mode_optimize.xpm", "mode_optimize.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_OPTIMIZE));
1719
1720    awm->at(5, 2);
1721    awm->auto_space(0, -2);
1722    awm->shadow_width(1);
1723
1724
1725    int db_treex, db_treey;
1726    awm->get_at_position(&db_treex, &db_treey);
1727    awm->callback(makeHelpCallback("nt_tree_select.hlp"));
1728    awm->button_length(16);
1729    awm->help_text("nt_tree_select.hlp");
1730    awm->create_button(NULp, AWAR_TREE);
1731
1732
1733    int db_stackx, db_stacky;
1734    awm->label_length(8);
1735    awm->label("Stored");
1736    awm->get_at_position(&db_stackx, &db_stacky);
1737    awm->button_length(6);
1738    awm->callback(makeHelpCallback("ap_stack.hlp"));
1739    awm->help_text("ap_stack.hlp");
1740    awm->create_button(NULp, AWAR_STACKPOINTER);
1741
1742    int db_parsx, db_parsy;
1743    awm->label_length(14);
1744    awm->label("Current Pars:");
1745    awm->get_at_position(&db_parsx, &db_parsy);
1746
1747    awm->button_length(10);
1748    awm->create_button(NULp, AWAR_PARSIMONY, NULp, "+");
1749
1750    awm->button_length(0);
1751
1752    awm->callback(makeWindowCallback(NT_jump_cb, ntw, AP_JUMP_BY_BUTTON));
1753    awm->help_text("tr_jump.hlp");
1754    awm->create_button("JUMP", "Jump");
1755
1756    awm->callback(makeHelpCallback("arb_pars.hlp"));
1757    awm->help_text("help.hlp");
1758    awm->create_button("HELP", "HELP", "H");
1759
1760    awm->at_newline();
1761
1762    awm->button_length(8);
1763    awm->at_x(db_stackx);
1764    awm->callback(makeWindowCallback(AP_user_pop_cb, ntw));
1765    awm->help_text("ap_stack.hlp");
1766    awm->create_button("POP", "RESTORE");
1767
1768    awm->button_length(6);
1769    awm->callback(AP_user_push_cb);
1770    awm->help_text("ap_stack.hlp");
1771    awm->create_button("PUSH", "STORE");
1772
1773    awm->at_x(db_parsx);
1774    awm->label_length(14);
1775    awm->label("Optimal Pars:");
1776
1777    awm->button_length(10);
1778    awm->create_button(NULp, AWAR_BEST_PARSIMONY, NULp, "+");
1779
1780    awm->button_length(0);
1781    awm->auto_space(0, -2);
1782
1783    awm->at_x(db_treex);
1784    awm->callback(makeWindowCallback(NT_set_tree_style, ntw, AP_TREE_RADIAL));
1785    awm->help_text("tr_type_radial.hlp");
1786    awm->create_button("RADIAL_TREE", "#radial.xpm");
1787
1788    awm->callback(makeWindowCallback(NT_set_tree_style, ntw, AP_TREE_NORMAL));
1789    awm->help_text("tr_type_dendro.hlp");
1790    awm->create_button("LIST_TREE", "#dendro.xpm");
1791
1792    awm->at_newline();
1793    awm->at(db_treex, awm->get_at_yposition());
1794
1795    {
1796        SmartPtr<AW_at_storage> maxSize(AW_at_storage::make(awm, AW_AT_MAXSIZE));
1797
1798        awm->button_length(AWAR_FOOTER_MAX_LEN);
1799        awm->create_button(NULp, AWAR_FOOTER);
1800        awm->at_newline();
1801        awm->restore_at_from(*maxSize);
1802    }
1803
1804    awm->get_at_position(&db_treex, &db_treey);
1805    awm->set_info_area_height(db_treey);
1806
1807    awm->set_bottom_area_height(0);
1808
1809    aw_parent->hide(); // hide parent
1810    awm->show();
1811
1812    TREE_install_update_callbacks(ntw);
1813
1814    update_random_repeat(awr, AWT_TREE_PARS(ntw));
1815    AP_user_push_cb(aw_parent); // push initial tree
1816    set_keep_ghostnodes(); // make sure no stacked nodes get deleted
1817}
1818
1819static AW_window *create_pars_init_window(AW_root *awr, const PARS_commands *cmds) {
1820    AW_window_simple *aws = new AW_window_simple;
1821    aws->init(awr, "PARS_PROPS", "SET PARSIMONY OPTIONS");
1822    aws->load_xfig("pars/init.fig");
1823
1824    aws->button_length(10);
1825    aws->label_length(10);
1826
1827    aws->callback(pars_exit);
1828    aws->at("close");
1829    aws->create_button("ABORT", "ABORT", "A");
1830
1831    aws->callback(makeHelpCallback("arb_pars_init.hlp"));
1832    aws->at("help");
1833    aws->create_button("HELP", "HELP", "H");
1834
1835    GBDATA *gb_main                 = ap_main->get_gb_main();
1836    WeightedFilter *weighted_filter = // do NOT free (bound to callbacks)
1837        new WeightedFilter(gb_main, aws->get_root(), AWAR_FILTER_NAME, AWAR_COLUMNSTAT_NAME, aws->get_root()->awar_string(AWAR_ALIGNMENT));
1838
1839    aws->at("filter");
1840    aws->callback(makeCreateWindowCallback(awt_create_select_filter_win, weighted_filter->get_adfiltercbstruct()));
1841    aws->create_button("SELECT_FILTER", AWAR_FILTER_NAME);
1842
1843    aws->at("weights");
1844    aws->callback(makeCreateWindowCallback(COLSTAT_create_selection_window, weighted_filter->get_column_stat()));
1845    aws->sens_mask(AWM_EXP);
1846    aws->create_button("SELECT_CSP", AWAR_COLUMNSTAT_NAME);
1847    aws->sens_mask(AWM_ALL);
1848
1849    aws->at("alignment");
1850    awt_create_ALI_selection_list(gb_main, aws, AWAR_ALIGNMENT, "*=");
1851
1852    aws->at("tree");
1853    awt_create_TREE_selection_list(gb_main, aws, AWAR_TREE);
1854
1855    aws->callback(makeWindowCallback(pars_start_cb, weighted_filter, cmds));
1856    aws->at("go");
1857    aws->create_button("GO", "GO", "G");
1858
1859    return aws;
1860}
1861
1862KL_Settings::KL_Settings(AW_root *aw_root) {
1863    maxdepth = aw_root->awar(AWAR_KL_MAXDEPTH)->read_int();
1864    incdepth = aw_root->awar(AWAR_KL_INCDEPTH)->read_int();
1865
1866    Static.enabled  = aw_root->awar(AWAR_KL_STATIC_ENABLED)->read_int();
1867    Static.depth[0] = 2; // always test both possibilities at starting edge
1868    Static.depth[1] = aw_root->awar(AWAR_KL_STATIC_DEPTH1)->read_int();
1869    Static.depth[2] = aw_root->awar(AWAR_KL_STATIC_DEPTH2)->read_int();
1870    Static.depth[3] = aw_root->awar(AWAR_KL_STATIC_DEPTH3)->read_int();
1871    Static.depth[4] = aw_root->awar(AWAR_KL_STATIC_DEPTH4)->read_int();
1872    Static.depth[5] = aw_root->awar(AWAR_KL_STATIC_DEPTH5)->read_int();
1873
1874    Dynamic.enabled = aw_root->awar(AWAR_KL_DYNAMIC_ENABLED)->read_int();
1875    Dynamic.start   = aw_root->awar(AWAR_KL_DYNAMIC_START)->read_int();
1876    Dynamic.maxx    = aw_root->awar(AWAR_KL_DYNAMIC_MAXX)->read_int();
1877    Dynamic.maxy    = aw_root->awar(AWAR_KL_DYNAMIC_MAXY)->read_int();
1878    Dynamic.type    = (KL_DYNAMIC_THRESHOLD_TYPE)aw_root->awar(AWAR_KL_FUNCTION_TYPE)->read_int();
1879
1880    whichEdges = ANY_EDGE;
1881    if (aw_root->awar(AWAR_OPTI_MARKED_ONLY)->read_int()) whichEdges = EdgeSpec(whichEdges|SKIP_UNMARKED_EDGES);
1882    if (aw_root->awar(AWAR_OPTI_SKIP_FOLDED)->read_int()) whichEdges = EdgeSpec(whichEdges|SKIP_FOLDED_EDGES);
1883}
1884#if defined(UNIT_TESTS)
1885KL_Settings::KL_Settings() {
1886    // set default values
1887    maxdepth = 15;
1888
1889    Static.enabled  = true;
1890    Static.depth[0] = 2; // always test both possibilities at starting edge
1891    Static.depth[1] = 8;
1892    Static.depth[2] = 6;
1893    Static.depth[3] = 6;
1894    Static.depth[4] = 6;
1895    Static.depth[5] = 6;
1896
1897    Dynamic.enabled = true;
1898    Dynamic.start   = 100;
1899    Dynamic.maxy    = 150;
1900    Dynamic.maxx    = 6;
1901
1902    // these values do not seem to have any effect (i.e. are not covered by unit-tests):
1903    incdepth = 4;
1904
1905    // const setting (not configurable)
1906    Dynamic.type = AP_QUADRAT_START;
1907    whichEdges   = EdgeSpec(SKIP_UNMARKED_EDGES|SKIP_FOLDED_EDGES);
1908}
1909#endif
1910
1911static void create_optimize_vars(AW_root *aw_root, AW_default props) {
1912    // kernighan
1913
1914    aw_root->awar_int(AWAR_OPTI_MARKED_ONLY, 1, props);
1915    aw_root->awar_int(AWAR_OPTI_SKIP_FOLDED, 1, props);
1916
1917    aw_root->awar_int(AWAR_KL_MAXDEPTH, 15, props);
1918    aw_root->awar_int(AWAR_KL_INCDEPTH, 4,  props);
1919
1920    aw_root->awar_int(AWAR_KL_STATIC_ENABLED, 1, props);
1921    aw_root->awar_int(AWAR_KL_STATIC_DEPTH1,  5, props)->set_minmax(1, 8);
1922    aw_root->awar_int(AWAR_KL_STATIC_DEPTH2,  3, props)->set_minmax(1, 6);
1923    aw_root->awar_int(AWAR_KL_STATIC_DEPTH3,  2, props)->set_minmax(1, 6);
1924    aw_root->awar_int(AWAR_KL_STATIC_DEPTH4,  2, props)->set_minmax(1, 6);
1925    aw_root->awar_int(AWAR_KL_STATIC_DEPTH5,  1, props)->set_minmax(1, 6);
1926
1927    aw_root->awar_int(AWAR_KL_DYNAMIC_ENABLED, 1,   props);
1928    aw_root->awar_int(AWAR_KL_DYNAMIC_START,   100, props);
1929    aw_root->awar_int(AWAR_KL_DYNAMIC_MAXX,    6,   props);
1930    aw_root->awar_int(AWAR_KL_DYNAMIC_MAXY,    150, props);
1931
1932    aw_root->awar_int(AWAR_KL_FUNCTION_TYPE, AP_QUADRAT_START, props);
1933}
1934
1935static void pars_create_all_awars(AW_root *awr, AW_default aw_def, GBDATA *gb_main) {
1936    awr->awar_string(AWAR_SPECIES_NAME, "",     gb_main);
1937    awr->awar_string(AWAR_FOOTER,       "",     aw_def);
1938
1939    // copy currently selected alignment to awar:
1940    {
1941        GB_transaction ta(gb_main);
1942
1943        char *dali = GBT_get_default_alignment(gb_main);
1944        if (!dali) {
1945            GB_clear_error();
1946            dali = ARB_strdup("no default alignment");
1947        }
1948        awr->awar_string(AWAR_ALIGNMENT, dali, gb_main)->write_string(dali);
1949        free(dali);
1950    }
1951
1952    awt_create_filter_awars(awr, aw_def, AWAR_FILTER_NAME, AWAR_ALIGNMENT);
1953    awt_set_awar_to_valid_filter_good_for_tree_methods(gb_main, awr, AWAR_FILTER_NAME);
1954
1955    awr->awar_int(AWAR_PARS_TYPE, PARS_WAGNER, gb_main);
1956
1957    {
1958        GB_transaction  ta(gb_main);
1959        GBDATA         *gb_tree_name = GB_search(gb_main, AWAR_TREE, GB_STRING);
1960        char           *tree_name    = GB_read_string(gb_tree_name);
1961
1962        awr->awar_string(AWAR_TREE, "", aw_def)->write_string(tree_name);
1963        free(tree_name);
1964    }
1965
1966    awr->awar_int(AWAR_PARSIMONY,      0, aw_def);
1967    awr->awar_int(AWAR_BEST_PARSIMONY, 0, aw_def);
1968    awr->awar_int(AWAR_STACKPOINTER,   0, aw_def);
1969
1970    awr->awar_int(AWAR_RAND_REPEAT,  1,  aw_def)->set_minmax(1, 1000000); // default value is overwritten by update_random_repeat()
1971    awr->awar_int(AWAR_RAND_PERCENT, 50, aw_def)->set_minmax(1, 100);
1972
1973    create_optimize_vars(awr, aw_def);
1974    NDS_create_vars(awr, aw_def, gb_main);
1975
1976    TREE_create_awars(awr, gb_main);
1977
1978#if defined(DEBUG)
1979    AWT_create_db_browser_awars(awr, aw_def);
1980#endif // DEBUG
1981
1982    GB_ERROR error = ARB_init_global_awars(awr, aw_def, gb_main);
1983    if (error) aw_message(error);
1984}
1985
1986static AW_root *AD_map_viewer_aw_root = NULp;
1987
1988void PARS_map_viewer(GBDATA *gb_species, AD_MAP_VIEWER_TYPE vtype) {
1989    // Note: sync with ../NTREE/ad_spec.cxx@launch_MapViewer_cb
1990
1991    if (AD_map_viewer_aw_root &&
1992        gb_species            &&
1993        (vtype == ADMVT_SELECT || vtype == ADMVT_INFO))
1994    {
1995        AD_map_viewer_aw_root->awar(AWAR_SPECIES_NAME)->write_string(null2empty(GBT_get_name(gb_species)));
1996    }
1997}
1998
1999int ARB_main(int argc, char *argv[]) {
2000    aw_initstatus();
2001
2002    GB_shell shell;
2003    AW_root *aw_root      = AWT_create_root("pars.arb", "ARB_PARS", need_macro_ability());
2004    AD_map_viewer_aw_root = aw_root;
2005
2006    ap_main     = new AP_main;
2007    GLOBAL_PARS = new ArbParsimony();
2008
2009    const char *db_server = ":";
2010
2011    PARS_commands cmds;
2012
2013    while (argc>=2 && argv[1][0] == '-') {
2014        argc--;
2015        argv++;
2016        if (!strcmp(argv[0], "-quit"))                   cmds.quit = 1;
2017        else if (!strcmp(argv[0], "-add_marked"))        cmds.add_marked = 1;
2018        else if (!strcmp(argv[0], "-add_selected"))      cmds.add_selected = 1;
2019        else if (!strcmp(argv[0], "-calc_branchlengths")) cmds.calc_branch_lengths = 1;
2020        else if (!strcmp(argv[0], "-calc_bootstrap"))    cmds.calc_bootstrap = 1;
2021        else {
2022            fprintf(stderr, "Unknown option '%s'\n", argv[0]);
2023
2024            printf("    Options:                Meaning:\n"
2025                   "\n"
2026                   "    -add_marked             add marked species   (without changing topology)\n"
2027                   "    -add_selected           add selected species (without changing topology)\n"
2028                   "    -calc_branchlengths     calculate branch lengths only\n"
2029                   "    -calc_bootstrap         estimate bootstrap values\n"
2030                   "    -quit                   quit after performing operations\n"
2031                   );
2032
2033            exit(EXIT_FAILURE);
2034        }
2035    }
2036
2037
2038    if (argc==2) db_server = argv[1];
2039
2040    GB_ERROR error = ap_main->open(db_server);
2041    if (!error) {
2042        GBDATA *gb_main = ap_main->get_gb_main();
2043        error           = configure_macro_recording(aw_root, "ARB_PARS", gb_main);
2044
2045        if (!error) {
2046#if defined(DEBUG)
2047            AWT_announce_db_to_browser(gb_main, GBS_global_string("ARB-database (%s)", db_server));
2048#endif // DEBUG
2049
2050            pars_create_all_awars(aw_root, AW_ROOT_DEFAULT, gb_main);
2051
2052            AW_window *aww = create_pars_init_window(aw_root, &cmds);
2053            aww->show();
2054
2055            AWT_install_cb_guards();
2056            aw_root->main_loop();
2057        }
2058    }
2059
2060    if (error) aw_popup_exit(error);
2061    return EXIT_SUCCESS;
2062}
2063
2064
2065// --------------------------------------------------------------------------------
2066
2067#ifdef UNIT_TESTS
2068#include <arb_file.h>
2069#include <arb_diff.h>
2070#include <test_unit.h>
2071#include <AP_seq_dna.hxx>
2072#include <AP_seq_protein.hxx>
2073#include "test_env.h"
2074
2075// #define AUTO_UPDATE_IF_CHANGED // uncomment to auto update expected results
2076
2077static arb_test::match_expectation topologyEquals(AP_tree_nlen *root_node, const char *treefile_base) {
2078    using namespace   arb_test;
2079    expectation_group fulfilled;
2080
2081    char *outfile  = GBS_global_string_copy("pars/%s.tree", treefile_base);
2082    char *expected = GBS_global_string_copy("%s.expected", outfile);
2083    bool  update   = false;
2084
2085    {
2086        FILE *out    = fopen(outfile, "wt");
2087        fulfilled.add(that(out).does_differ_from_NULL());
2088        char *newick = GBT_tree_2_newick(root_node, NewickFormat(nLENGTH|nWRAP), false);
2089        fputs(newick, out);
2090        free(newick);
2091        fclose(out);
2092    }
2093
2094    if (GB_is_regularfile(expected)) {
2095        bool match_exp_topo = textfiles_have_difflines(outfile,expected,0);
2096#if defined(AUTO_UPDATE_IF_CHANGED)
2097        if (!match_exp_topo) update = true;
2098#endif
2099        if (!update) fulfilled.add(that(match_exp_topo).is_equal_to(true));
2100    }
2101    else {
2102        update = true;
2103    }
2104
2105    if (update) TEST_COPY_FILE(outfile, expected);
2106    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(outfile));
2107
2108    free(expected);
2109    free(outfile);
2110
2111    return all().ofgroup(fulfilled);
2112}
2113
2114template<class ENV>
2115arb_test::match_expectation calcCostsCausesCombines(ENV& env, AP_FLOAT exp_pars, long exp_combines) {
2116    using namespace   arb_test;
2117    expectation_group fulfilled;
2118
2119    long combines_b4_costCalc = env.combines_performed();
2120    fulfilled.add(that(combines_b4_costCalc).is_equal_to(0));
2121
2122    AP_FLOAT new_pars             = env.root_node()->costs();
2123    long     combines_by_costCalc = env.combines_performed();
2124
2125    fulfilled.add(that(new_pars).fulfills(epsilon_similar(0.001), exp_pars));
2126    fulfilled.add(that(combines_by_costCalc).is_equal_to(exp_combines));
2127
2128    return all().ofgroup(fulfilled);
2129}
2130
2131#define TEST_EXPECT_SAVED_TOPOLOGY(env,exp_topo)                  TEST_EXPECTATION(topologyEquals(env.root_node(), exp_topo))
2132#define TEST_EXPECT_SAVED_TOPOLOGY__BROKEN(env,exp_topo,got_topo) TEST_EXPECTATION__BROKEN(topologyEquals(env.root_node(), exp_topo), topologyEquals(env.root_node(), got_topo))
2133
2134#define TEST_EXPECT_PARSVAL(env,exp_pars)                            TEST_EXPECT_EQUAL(env.root_node()->costs(), exp_pars);
2135#define TEST_EXPECT_ONLY_PARSVAL_COMBINES(env,exp_pars,exp_combines) TEST_EXPECTATION(calcCostsCausesCombines(env, exp_pars, exp_combines))
2136// use TEST_EXPECT_ONLY_PARSVAL_COMBINES when
2137// - no combines occurred (or combines were just tested using TEST_EXPECT_COMBINES_PERFORMED) and
2138// - topology was modified, so that calculation of costs causes new combines.
2139#define TEST_EXPECT_KNOWN_PARSVAL(env,exp_pars) TEST_EXPECT_ONLY_PARSVAL_COMBINES(env,exp_pars,0)
2140
2141enum TopoMod {
2142    MOD_REMOVE_MARKED,
2143
2144    MOD_QUICK_READD,
2145    MOD_QUICK_ADD,
2146    MOD_READD_NNI,
2147
2148    MOD_ADD_PARTIAL,
2149
2150    MOD_CALC_LENS,
2151    MOD_OPTI_NNI,
2152    MOD_OPTI_GLOBAL,
2153
2154    MOD_MIX_TREE,
2155};
2156
2157template <typename SEQ>
2158static void modifyTopology(PARSIMONY_testenv<SEQ>& env, TopoMod mod) {
2159    switch (mod) {
2160        case MOD_REMOVE_MARKED:
2161            env.graphic_tree()->get_tree_root()->remove_leafs(AWT_REMOVE_MARKED);
2162            break;
2163
2164        case MOD_QUICK_READD:
2165            nt_reAdd(env.graphic_tree(), NT_ADD_MARKED, true);
2166            break;
2167
2168        case MOD_QUICK_ADD:
2169            nt_add(env.graphic_tree(), NT_ADD_MARKED, true);
2170            break;
2171
2172        case MOD_READD_NNI:
2173            nt_reAdd(env.graphic_tree(), NT_ADD_MARKED, false);
2174            break;
2175
2176        case MOD_ADD_PARTIAL:
2177            nt_add_partial(env.graphic_tree());
2178            break;
2179
2180        case MOD_CALC_LENS:
2181            calc_branchlengths_and_reorder(env.graphic_tree());
2182            break;
2183
2184        case MOD_OPTI_NNI: // only marked/unfolded
2185            recursiveNNI(env.graphic_tree(), env.get_KL_settings().whichEdges);
2186            break;
2187
2188        case MOD_OPTI_GLOBAL:
2189            optimizeTree(env.graphic_tree(), env.get_KL_settings());
2190            break;
2191
2192        case MOD_MIX_TREE: {
2193            long leafs = rootNode()->count_leafs();
2194            mixtree_and_calclengths(env.graphic_tree(), calculate_default_random_repeat(leafs), 100, ANY_EDGE);
2195            break;
2196        }
2197    }
2198}
2199
2200template <typename SEQ>
2201static arb_test::match_expectation modifyingTopoResultsIn(TopoMod mod, const char *topo, long pars_expected, PARSIMONY_testenv<SEQ>& env, bool restore) {
2202    using namespace   arb_test;
2203    expectation_group fulfilled;
2204
2205    TEST_EXPECT_VALID_TREE(env.root_node());
2206
2207    Level upc = env.get_user_push_counter();
2208    Level fl  = env.get_frame_level();
2209
2210    if (restore) {
2211        env.push();
2212        TEST_EXPECT_VALID_TREE(env.root_node());
2213    }
2214
2215    AWT_graphic_exports& exports = env.graphic_tree()->exports;
2216    exports.clear_save_request();
2217    modifyTopology(env, mod);
2218    if (topo) {
2219        fulfilled.add(topologyEquals(env.root_node(), topo));
2220        if (mod != MOD_REMOVE_MARKED) { // remove_leafs doesn't request save
2221            fulfilled.add(that(exports.needs_save()).is_equal_to(true));
2222        }
2223    }
2224
2225    fulfilled.add(that(allBranchlengthsAreDefined(env.root_node())).is_equal_to(true));
2226
2227    if (pars_expected != -1) {
2228        fulfilled.add(that(env.root_node()->costs()).is_equal_to(pars_expected));
2229    }
2230
2231    if (restore) {
2232        TEST_EXPECT_VALID_TREE(env.root_node());
2233        TEST_VALIDITY(env.pop_will_produce_valid_tree());
2234        env.pop();
2235        bool blen_def_after_pop = allBranchlengthsAreDefined(env.root_node());
2236        fulfilled.add(that(blen_def_after_pop).is_equal_to(true));
2237    }
2238
2239    TEST_EXPECT_EQUAL(fl, env.get_frame_level());
2240    TEST_EXPECT_EQUAL(upc, env.get_user_push_counter());
2241
2242    TEST_EXPECT_VALID_TREE(env.root_node());
2243
2244    return all().ofgroup(fulfilled);
2245}
2246
2247static arb_test::match_expectation movingRootDoesntAffectCosts(long pars_expected) {
2248    using namespace   arb_test;
2249    expectation_group fulfilled;
2250
2251    long pars_min = LONG_MAX;
2252    long pars_max = LONG_MIN;
2253
2254    for (int depth_first = 0; depth_first<=1; ++depth_first) {
2255        for (int push_local = 0; push_local<=1; ++push_local) {
2256            EdgeChain chain(rootEdge(), ANY_EDGE, depth_first);
2257
2258            if (!push_local) ap_main->remember();
2259            while (chain) {
2260                AP_tree_edge *edge = *chain; ++chain;
2261
2262                if (push_local) ap_main->remember();
2263                edge->set_root();
2264                long pars = rootNode()->costs();
2265                pars_min  = std::min(pars, pars_min);
2266                pars_max  = std::max(pars, pars_max);
2267                if (push_local) ap_main->revert();
2268            }
2269            if (!push_local) ap_main->revert();
2270        }
2271    }
2272
2273    fulfilled.add(that(pars_min).is_equal_to(pars_expected));
2274    fulfilled.add(that(pars_max).is_equal_to(pars_expected));
2275
2276    return all().ofgroup(fulfilled);
2277}
2278
2279static GBDATA *copy_to(GBDATA *gb_species, const char *newShortname) {
2280    GBDATA *gb_species_data = GB_get_father(gb_species);
2281    GBDATA *gb_new_species  = GB_create_container(gb_species_data, "species");
2282
2283    GB_ERROR error = NULp;
2284    if (!gb_new_species) {
2285        error = GB_await_error();
2286    }
2287    else {
2288        error = GB_copy_dropProtectMarksAndTempstate(gb_new_species, gb_species);
2289        if (!error) error = GBT_write_string(gb_new_species, "name", newShortname);
2290    }
2291
2292    ap_assert(contradicted(gb_new_species, error));
2293    return gb_new_species;
2294}
2295
2296inline void mark_only(GBDATA *gb_species) {
2297    GBDATA         *gb_main = GB_get_root(gb_species);
2298    GB_transaction  ta(gb_main);
2299    GBT_mark_all(gb_main, 0);
2300    GB_write_flag(gb_species, 1);
2301}
2302inline void mark(GBDATA *gb_species) {
2303    GBDATA         *gb_main = GB_get_root(gb_species);
2304    GB_transaction  ta(gb_main);
2305    GB_write_flag(gb_species, 1);
2306}
2307inline void mark_all(GBDATA *gb_main) {
2308    GB_transaction  ta(gb_main);
2309    GBT_mark_all(gb_main, 1);
2310}
2311
2312inline int is_partial(GBDATA *gb_species) {
2313    GB_transaction ta(gb_species);
2314    return GBT_is_partial(gb_species, -1, false);
2315}
2316
2317template <typename SEQ>
2318static arb_test::match_expectation addedAsBrotherOf(const char *name, const char *allowedBrothers, PARSIMONY_testenv<SEQ>& env) {
2319    using namespace   arb_test;
2320    expectation_group fulfilled;
2321
2322    AP_tree_nlen *node_in_tree = env.root_node()->findLeafNamed(name);
2323    ap_assert(node_in_tree);
2324    fulfilled.add(that(node_in_tree).does_differ_from_NULL());
2325
2326    const char *brother = node_in_tree->get_brother()->name;
2327    ap_assert(brother);
2328    fulfilled.add(that(allowedBrothers).does_contain(brother));
2329
2330    return all().ofgroup(fulfilled);
2331}
2332
2333template <typename SEQ>
2334static arb_test::match_expectation addingPartialResultsIn(GBDATA *gb_added_species, const char *allowedBrothers, const char *topo, int pars_expected, PARSIMONY_testenv<SEQ>& env) {
2335    using namespace   arb_test;
2336    expectation_group fulfilled;
2337
2338    mark_only(gb_added_species);
2339    env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2340    fulfilled.add(modifyingTopoResultsIn(MOD_ADD_PARTIAL, topo, pars_expected, env, false));
2341    fulfilled.add(that(is_partial(gb_added_species)).is_equal_to(1));
2342
2343    const char *name = GBT_get_name_or_description(gb_added_species);
2344    fulfilled.add(addedAsBrotherOf(name, allowedBrothers, env));
2345
2346    return all().ofgroup(fulfilled);
2347}
2348
2349static int seqDiff(GBDATA *gb_main, const char *aliname, const char *species1, const char *species2, int startPos, int endPos) {
2350    GB_transaction ta(gb_main);
2351
2352    GBDATA *gb_species1 = GBT_expect_species(gb_main, species1);
2353    GBDATA *gb_species2 = GBT_expect_species(gb_main, species2);
2354    int     diffs       = -1;
2355
2356    if (gb_species1 && gb_species2) {
2357        GBDATA *gb_seq1 = GBT_find_sequence(gb_species1, aliname);
2358        GBDATA *gb_seq2 = GBT_find_sequence(gb_species2, aliname);
2359
2360        if (gb_seq1 && gb_seq2) {
2361            char *seq1 = GB_read_string(gb_seq1);
2362            char *seq2 = GB_read_string(gb_seq2);
2363
2364            int maxPos1 = strlen(seq1)-1;
2365#if defined(ASSERTION_USED)
2366            int maxPos2 = strlen(seq2)-1;
2367#endif
2368            ap_assert(maxPos1 == maxPos2);
2369
2370            if (endPos>maxPos1) endPos = maxPos1;
2371
2372            diffs = 0;
2373            for (int p = startPos; p<=endPos; ++p) { // LOOP_ VECTORIZED[!<9.1] // @@@ fails in RELEASE code! // IRRELEVANT_LOOP
2374                diffs += seq1[p] != seq2[p];
2375            }
2376
2377            free(seq2);
2378            free(seq1);
2379        }
2380    }
2381
2382    return diffs;
2383}
2384
2385static GBDATA *createPartialSeqFrom(GBDATA *gb_main, const char *aliname, const char *dest_species, const char *source_species, int startPos, int endPos) {
2386    GB_transaction ta(gb_main);
2387
2388    GBDATA *gb_result         = NULp;
2389    GBDATA *gb_source_species = GBT_expect_species(gb_main, source_species);
2390
2391    if (gb_source_species) {
2392        GBDATA *gb_dest_species = copy_to(gb_source_species, dest_species);
2393        GBDATA *gb_dest_seq     = GBT_find_sequence(gb_dest_species, aliname); // =same as source seq
2394        char   *seq             = GB_read_string(gb_dest_seq);
2395
2396        if (seq) {
2397            int maxPos = strlen(seq)-1;
2398
2399            startPos = std::min(startPos, maxPos);
2400            endPos   = std::min(endPos, maxPos);
2401
2402            if (startPos>0) memset(seq, '.', startPos);
2403            if (endPos<maxPos) memset(seq+endPos+1, '.', maxPos-endPos);
2404
2405            GB_ERROR error     = GB_write_string(gb_dest_seq, seq);
2406            if (error) GB_export_error(error);
2407            else {
2408                gb_result = gb_dest_species; // success
2409#if defined(DEBUG)
2410                fprintf(stderr, "created partial '%s' from '%s' (seq='%s')\n", dest_species, source_species, seq);
2411#endif
2412            }
2413
2414            free(seq);
2415        }
2416    }
2417
2418    return gb_result;
2419}
2420
2421static GB_ERROR modifyOneBase(GBDATA *gb_species, const char *aliname, char cOld, char cNew) {
2422    GB_transaction ta(gb_species);
2423    GB_ERROR       error = "failed to modifyOneBase";
2424
2425    GBDATA *gb_seq = GBT_find_sequence(gb_species, aliname);
2426    if (gb_seq) {
2427        char *seq = GB_read_string(gb_seq);
2428        if (seq) {
2429            char *B = strchr(seq, cOld);
2430            if (!B) {
2431                error = "does not contain base in modifyOneBase";
2432            }
2433            else {
2434                B[0]  = cNew;
2435                error = GB_write_string(gb_seq, seq);
2436            }
2437            free(seq);
2438        }
2439    }
2440
2441    return error;
2442}
2443
2444static long unmark_unwanted(const char *, long cd_gbd, void*) {
2445    GBDATA *gbd = (GBDATA*)cd_gbd;
2446    GB_write_flag(gbd, 0);
2447    return 0;
2448}
2449
2450void TEST_SLOW_nucl_tree_modifications() {
2451    const char *aliname = "ali_5s";
2452
2453    PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname);
2454    TEST_EXPECT_NO_ERROR(env.load_tree("tree_test"));
2455    TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial");
2456
2457    const int PARSIMONY_ORG = 302;
2458    TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14);
2459
2460    // [NUCOPTI] opposed to protein tests below the initial tree here is NOT optimized! compare .@PROTOPTI
2461    // -> removing and adding species produces a better tree (for add+NNI)
2462    //
2463    // diff initial->removed  : http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-removed.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected
2464    // diff initial->add-quick: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-add-quick.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected
2465    // diff initial->add-NNI:   http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-add-NNI.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected
2466
2467    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, "nucl-removed",   PARSIMONY_ORG-94, env, true)); // test remove-marked only (same code as part of nt_reAdd)
2468    TEST_EXPECT_COMBINES_PERFORMED(env, 3);
2469
2470    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD,   "nucl-add-quick", PARSIMONY_ORG-18, env, true)); // test quick-add
2471    TEST_EXPECT_COMBINES_PERFORMED(env, 400);
2472
2473    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI,     "nucl-add-NNI",   PARSIMONY_ORG-20, env, true)); // test add + NNI
2474    TEST_EXPECT_COMBINES_PERFORMED(env, 503);
2475
2476    // test partial-add
2477    {
2478        GBDATA *gb_main = env.gbmain();
2479
2480        // create 2 non-overlapping partial species
2481        const int  SPLIT    = 55;
2482        GBDATA    *CorGlutP = createPartialSeqFrom(gb_main, aliname, "CorGlutP", "CorGluta", 0,       SPLIT);
2483        GBDATA    *CloButyP = createPartialSeqFrom(gb_main, aliname, "CloButyP", "CloButyr", SPLIT+1, INT_MAX);
2484        GBDATA    *CloButyM = createPartialSeqFrom(gb_main, aliname, "CloButyM", "CloButyr", SPLIT+1, INT_MAX);
2485        TEST_EXPECT_NO_ERROR(modifyOneBase(CloButyM, aliname, 'G', 'C')); // change first 'G' into 'C'
2486
2487        TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees()); // no push yet (does nothing)
2488
2489        // test partials differ from full:
2490        TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CorGlutP", "CorGluta", 0, INT_MAX));
2491        TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyP", "CloButyr", 0, INT_MAX));
2492        TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyM", "CloButyr", 0, INT_MAX));
2493        // test partials created from CloButyr differ in partial range:
2494        TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyM", "CloButyP", SPLIT+1, INT_MAX));
2495
2496        // test condition that "CloButyr and CloButy2 do NOT differ in seq-range of partial" (otherwise test below makes no sense!)
2497        TEST_EXPECT_ZERO(seqDiff(gb_main, aliname, "CloButyr", "CloButy2", SPLIT+1, INT_MAX));
2498
2499        // test that "CloButyr and CloButy2 DO differ in whole seq-range" (otherwise inserting into tree is non-deterministic)
2500        TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyr", "CloButy2", 0, INT_MAX));
2501
2502        // add CloButyP (and undo)
2503        {
2504            env.push();
2505
2506            // CloButyr and CloButy2 do not differ in seq-range of partial -> any of both may be chosen as brother.
2507            // behavior should be changed with #605
2508            TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", "nucl-addPart-CloButyP", PARSIMONY_ORG, env));
2509            TEST_EXPECT_COMBINES_PERFORMED(env, 6);
2510            env.pop();
2511        }
2512
2513        {
2514            env.push();
2515            TEST_EXPECTATION(addingPartialResultsIn(CorGlutP, "CorGluta",          "nucl-addPart-CorGlutP",          PARSIMONY_ORG, env)); // add CorGlutP
2516            TEST_EXPECT_COMBINES_PERFORMED(env, 5); // @@@ partial-add should not perform combines at all (maybe caused by cost-recalc?)
2517            TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", "nucl-addPart-CorGlutP-CloButyP", PARSIMONY_ORG, env)); // also add CloButyP
2518            TEST_EXPECT_COMBINES_PERFORMED(env, 6);
2519            env.pop();
2520        }
2521
2522        // now add CorGlutP as full, then CloButyP and CloButyM as partials
2523        {
2524            env.push();
2525
2526            mark_only(CorGlutP);
2527            env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2528            {
2529                GB_transaction  ta(gb_main);
2530                TEST_EXPECT_NO_ERROR(GBT_write_int(CorGlutP, "ARB_partial", 0)); // revert species to "full"
2531            }
2532
2533            const int PARSIMONY_ADDED = PARSIMONY_ORG; // value after adding CorGlutP (as full-length sequence)
2534
2535            TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-addPartialAsFull-CorGlutP", PARSIMONY_ADDED, env, false));
2536            TEST_EXPECT_COMBINES_PERFORMED(env, 230);
2537            TEST_EXPECT_EQUAL(is_partial(CorGlutP), 0); // check CorGlutP was added as full sequence
2538            TEST_EXPECTATION(addedAsBrotherOf("CorGlutP", "CorGluta", env)); // partial created from CorGluta gets inserted next to CorGluta
2539
2540            // add CloButyP as partial.
2541            // as expected it is placed next to matching full sequences (does not differ in partial range)
2542            TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", NULp, PARSIMONY_ADDED, env));
2543            TEST_EXPECT_COMBINES_PERFORMED(env, 6);
2544
2545            // CloButyM differs slightly in overlap with CloButyr/CloButy2, but has no overlap with CorGlutP
2546            // shows bug described in #609 is fixed:
2547            TEST_EXPECTATION(addingPartialResultsIn(CloButyM, "CloButyP", "nucl-addPart-bug609",
2548                                                    PARSIMONY_ADDED+1, // @@@ known bug - partial should not affect parsimony value; possibly related to ../HELP_SOURCE/source/pa_partial.hlp@WARNINGS
2549                                                    env));
2550            TEST_EXPECT_COMBINES_PERFORMED(env, 7);
2551            env.pop();
2552        }
2553    }
2554
2555    TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial");
2556
2557    const int PARSIMONY_NNI_MARKED   = PARSIMONY_ORG-18;
2558    const int PARSIMONY_NNI_ALL      = PARSIMONY_ORG-18;
2559    const int PARSIMONY_OPTI_MARKED  = PARSIMONY_ORG-25;
2560    const int PARSIMONY_OPTI_VISIBLE = PARSIMONY_ORG-26;
2561    const int PARSIMONY_OPTI_ALL     = PARSIMONY_ORG-36;
2562
2563    {
2564        env.push();
2565        TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_ORG));
2566        TEST_EXPECT_COMBINES_PERFORMED(env, 342);
2567        env.pop();
2568    }
2569
2570    // ------------------------------
2571    //      test optimize (some)
2572
2573    // mark initially marked species
2574    {
2575        GB_transaction ta(env.gbmain());
2576        GBT_restore_marked_species(env.gbmain(), "CorAquat;CorGluta;CurCitre;CloButyr;CloButy2;CytAquat");
2577        env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2578    }
2579
2580    TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG);
2581
2582    // test branchlength calculation
2583    // (optimizations below implicitely recalculates branchlengths)
2584    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "nucl-calclength", PARSIMONY_ORG, env, false));
2585    TEST_EXPECT_COMBINES_PERFORMED(env, 120);
2586
2587    // test whether branchlength calculation depends on root-position
2588    {
2589        AP_tree_edge *orgRootEdge = rootEdge();
2590
2591        env.push();
2592
2593        const char *tested_roots[] = {
2594            "CloButyr",
2595            "CloTyro4",
2596            "CloTyrob",
2597            "CloInnoc",
2598        };
2599
2600        for (size_t r = 0; r<ARRAY_ELEMS(tested_roots); ++r) {
2601            const char *leafName = tested_roots[r];
2602            env.root_node()->findLeafNamed(leafName)->set_root();
2603            calc_branchlengths_and_reorder(env.graphic_tree());
2604            orgRootEdge->set_root();
2605            env.graphic_tree()->reorderTree(BIG_BRANCHES_TO_TOP);
2606
2607            TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-calclength");
2608        }
2609        TEST_EXPECT_COMBINES_PERFORMED(env, 517);
2610
2611        env.pop();
2612    }
2613
2614    // test optimize (some)
2615    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "nucl-opti-NNI", PARSIMONY_NNI_MARKED, env, true)); // test recursive NNI
2616    TEST_EXPECT_COMBINES_PERFORMED(env, 208);
2617
2618    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-marked-global", PARSIMONY_OPTI_MARKED, env, true)); // test recursive NNI+KL
2619    TEST_EXPECT_COMBINES_PERFORMED(env, 18518);
2620
2621    {
2622        KL_Settings& KL = env.get_KL_settings();
2623        LocallyModify<EdgeSpec> target(KL.whichEdges, EdgeSpec(KL.whichEdges&~SKIP_UNMARKED_EDGES)); // ignore marks; skip folded
2624
2625        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-visible-global", PARSIMONY_OPTI_VISIBLE, env, true)); // same result as if all species marked (see below)
2626        TEST_EXPECT_COMBINES_PERFORMED(env, 34925);
2627
2628        KL.whichEdges = EdgeSpec(KL.whichEdges&~SKIP_FOLDED_EDGES); // ignore marks and folding
2629
2630        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-global", PARSIMONY_OPTI_ALL, env, true)); // same result as if all species marked and all groups unfolded (see below)
2631        TEST_EXPECT_COMBINES_PERFORMED(env, 124811);
2632    }
2633
2634    // -----------------------------
2635    //      test optimize (all)
2636
2637    // mark all species
2638    mark_all(env.gbmain());
2639    // unmark species not in tree
2640    {
2641        GB_transaction  ta(env.gbmain());
2642        GB_HASH        *markedNotInTree = GBT_create_marked_species_hash(env.gbmain());
2643        NT_remove_species_in_tree_from_hash(env.root_node(), markedNotInTree);
2644        GBS_hash_do_loop(markedNotInTree, unmark_unwanted, NULp);
2645        GBS_free_hash(markedNotInTree);
2646    }
2647    env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2648    TEST_EXPECT_EQUAL(GBT_count_marked_species(env.gbmain()), 15);
2649
2650    TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG);
2651
2652    // test branchlength calculation
2653    // (optimizations below implicitely recalculates branchlengths)
2654    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "nucl-calclength", PARSIMONY_ORG, env, false));
2655    TEST_EXPECT_COMBINES_PERFORMED(env, 120);
2656
2657    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "nucl-opti-all-NNI", PARSIMONY_NNI_ALL, env, true)); // test recursive NNI
2658    TEST_EXPECT_COMBINES_PERFORMED(env, 242);
2659
2660    {
2661        env.push();
2662        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-visible-global", PARSIMONY_OPTI_VISIBLE, env, false)); // test recursive NNI+KL
2663        TEST_EXPECT_COMBINES_PERFORMED(env, 34925);
2664
2665        TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_OPTI_VISIBLE));
2666        TEST_EXPECT_COMBINES_PERFORMED(env, 336);
2667        env.pop();
2668    }
2669
2670    // unfold groups
2671    {
2672        AP_tree_nlen *CloTyrob = env.root_node()->findLeafNamed("CloTyrob");
2673        AP_tree_nlen *group    = CloTyrob->get_father();
2674        ap_assert(group->gr.grouped);
2675        group->gr.grouped      = false; // unfold the only folded group
2676
2677        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-global", PARSIMONY_OPTI_ALL, env, true)); // test recursive NNI+KL
2678        TEST_EXPECT_COMBINES_PERFORMED(env, 124811);
2679    }
2680
2681    // test re-add all (i.e. test "create tree from scratch")
2682    // Note: trees generated below are NO LONGER better than optimized trees! (see also r13651)
2683
2684     // quick add:
2685    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-readdall-quick",       PARSIMONY_ORG-7, env, true));
2686    TEST_EXPECT_COMBINES_PERFORMED(env, 439);
2687
2688    // quick add + NNI:
2689    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI,   "nucl-readdall-NNI",         PARSIMONY_ORG-8, env, true));
2690    TEST_EXPECT_COMBINES_PERFORMED(env, 613);
2691
2692    // test adding a too short sequence
2693    // (has to be last test, because it modifies seq data)                    << ------------ !!!!!
2694    {
2695        env.push();
2696
2697        AP_tree_nlen *CloTyrob = env.root_node()->findLeafNamed("CloTyrob");
2698        mark_only(CloTyrob->gb_node);
2699        env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2700
2701        // modify sequence of CloTyrob (keep only some bases)
2702        {
2703            GB_transaction  ta(env.gbmain());
2704            GBDATA         *gb_seq = GBT_find_sequence(CloTyrob->gb_node, aliname);
2705
2706            char *seq        = GB_read_string(gb_seq);
2707            int   keep_bases = MIN_SEQUENCE_LENGTH-1;
2708
2709            for (int i = 0; seq[i]; ++i) {
2710                if (!GAP::is_std_gap(seq[i])) {
2711                    if (keep_bases) --keep_bases;
2712                    else seq[i] = '.';
2713                }
2714            }
2715
2716            GB_topSecurityLevel unsecured(gb_seq);
2717            TEST_EXPECT_NO_ERROR(GB_write_string(gb_seq, seq));
2718            free(seq);
2719        }
2720
2721        // remove CloTyrob
2722        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, PARSIMONY_ORG-1, env, false));
2723        TEST_EXPECT_COMBINES_PERFORMED(env, 4);
2724        TEST_EXPECT_EQUAL(env.root_node()->count_leafs(), 14);
2725
2726        // attempt to add CloTyrob (should fail because sequence too short) and CorGluta (should stay, because already in tree)
2727        TEST_REJECT_NULL(env.root_node()->findLeafNamed("CorGluta")); // has to be in tree
2728        {
2729            GB_transaction ta(env.gbmain());
2730            GBT_restore_marked_species(env.gbmain(), "CloTyrob;CorGluta");
2731            env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2732        }
2733
2734        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_ADD, NULp, PARSIMONY_ORG-1, env, false));
2735        TEST_EXPECT_COMBINES_PERFORMED(env, 110); // @@@ why does this perform combines at all?
2736        TEST_EXPECT_EQUAL(env.root_node()->count_leafs(), 14); // ok, CloTyrob was not added
2737        TEST_REJECT_NULL(env.root_node()->findLeafNamed("CorGluta")); // has to be in tree
2738
2739        env.pop();
2740    }
2741}
2742
2743void TEST_SLOW_prot_tree_modifications() {
2744    const char *aliname = "ali_tuf_pro";
2745
2746    PARSIMONY_testenv<AP_sequence_protein> env("TEST_prot.arb", aliname);
2747    TEST_EXPECT_NO_ERROR(env.load_tree("tree_prot_opti"));
2748    TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-initial");
2749
2750    const int PARSIMONY_ORG = 1081;
2751    TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 10);
2752
2753    // [PROTOPTI] opposed to nucleid tests above the initial tree here is already optimized! compare .@NUCOPTI
2754    // -> adding species approximately reproduces initial topology
2755    //
2756    // diff initial->add-quick: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/prot-add-quick.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fprot-initial.tree.expected
2757    // diff initial->add-NNI:   http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/prot-add-NNI.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fprot-initial.tree.expected
2758    //
2759    // Note: comparing these two diffs also demonstrates why quick-adding w/o NNI suffers
2760
2761    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, "prot-removed",   PARSIMONY_ORG-146, env, true)); // test remove-marked only (same code as part of nt_reAdd)
2762    TEST_EXPECT_COMBINES_PERFORMED(env, 5);
2763
2764    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD,   "prot-add-quick", PARSIMONY_ORG, env, true)); // test quick-add
2765    TEST_EXPECT_COMBINES_PERFORMED(env, 213);
2766
2767    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI,     "prot-add-NNI",   PARSIMONY_ORG, env, true)); // test add + NNI
2768    TEST_EXPECT_COMBINES_PERFORMED(env, 262);
2769
2770    // test partial-add
2771    {
2772        GBDATA *gb_main = env.gbmain();
2773
2774        // create 2 non-overlapping partial species
2775        GBDATA    *MucRaceP = createPartialSeqFrom(gb_main, aliname, "MucRaceP", "MucRacem", 0, 60+4); // (+4 = dots inserted into DB at left end)
2776        GBDATA    *StrCoelP = createPartialSeqFrom(gb_main, aliname, "StrCoelP", "StrCoel9", 66-1+4, 184-1+4);
2777        GBDATA    *StrCoelM = createPartialSeqFrom(gb_main, aliname, "StrCoelM", "StrCoel9", 66-1+4, 184-1+4);
2778        TEST_EXPECT_NO_ERROR(modifyOneBase(StrCoelM, aliname, 'Y', 'H')); // change first 'Y' into 'H'
2779
2780        // add StrCoelP (and undo)
2781        {
2782            env.push();
2783            // StrCoel9 and StrRamo3 do not differ in seq-range of partial -> any of both may be chosen as brother.
2784            // behavior should be changed with #605
2785            TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", "prot-addPart-StrCoelP", PARSIMONY_ORG,     env));
2786            TEST_EXPECT_COMBINES_PERFORMED(env, 4);
2787            env.pop();
2788        }
2789
2790        {
2791            env.push();
2792            TEST_EXPECTATION(addingPartialResultsIn(MucRaceP, "MucRacem",          "prot-addPart-MucRaceP",          PARSIMONY_ORG,    env)); // add MucRaceP
2793            TEST_EXPECT_COMBINES_PERFORMED(env, 6);
2794            TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", "prot-addPart-MucRaceP-StrCoelP", PARSIMONY_ORG,    env)); // also add StrCoelP
2795            TEST_EXPECT_COMBINES_PERFORMED(env, 4);
2796            env.pop();
2797        }
2798
2799        // now add MucRaceP as full, then StrCoelP and StrCoelM as partials
2800        {
2801            env.push();
2802
2803            mark_only(MucRaceP);
2804            env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2805            {
2806                GB_transaction  ta(gb_main);
2807                TEST_EXPECT_NO_ERROR(GBT_write_int(MucRaceP, "ARB_partial", 0)); // revert species to "full"
2808            }
2809
2810            TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "prot-addPartialAsFull-MucRaceP", PARSIMONY_ORG,   env, false));
2811            TEST_EXPECT_COMBINES_PERFORMED(env, 156);
2812            TEST_EXPECT_EQUAL(is_partial(MucRaceP), 0); // check MucRaceP was added as full sequence
2813            TEST_EXPECTATION(addedAsBrotherOf("MucRaceP", "Eukarya EF-Tu", env)); // partial created from MucRacem gets inserted next to this group
2814            // Note: looks ok. group contains MucRacem, AbdGlauc and 4 other species
2815
2816            // add StrCoelP as partial.
2817            // as expected it is placed next to matching full sequences (does not differ in partial range)
2818            TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", NULp, PARSIMONY_ORG, env));
2819            TEST_EXPECT_COMBINES_PERFORMED(env, 4);
2820
2821            // StrCoelM differs slightly in overlap with StrCoel9/StrRamo3, but has no overlap with MucRaceP
2822            // shows bug described in #609 is fixed:
2823            TEST_EXPECTATION(addingPartialResultsIn(StrCoelM, "StrCoelP", "prot-addPart-bug609",
2824                                                    PARSIMONY_ORG+1,  // @@@ known bug - partial should not affect parsimony value; possibly related to ../HELP_SOURCE/source/pa_partial.hlp@WARNINGS
2825                                                    env));
2826            TEST_EXPECT_COMBINES_PERFORMED(env, 5);
2827            env.pop();
2828        }
2829    }
2830
2831    TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-initial");
2832
2833    const unsigned mixseed = 8164724;
2834
2835    const long PARSIMONY_MIXED       = PARSIMONY_ORG + 1519;
2836    const long PARSIMONY_NNI_MARKED  = PARSIMONY_ORG + 1053;
2837    const long PARSIMONY_NNI_ALL     = PARSIMONY_ORG;
2838    const long PARSIMONY_OPTI_MARKED = PARSIMONY_ORG;
2839    const long PARSIMONY_OPTI_ALL    = PARSIMONY_ORG; // no gain (initial tree already is optimized)
2840
2841    // ------------------------------------------------------
2842    //      mix tree (original tree already is optimized)
2843
2844    GB_random_seed(mixseed);
2845    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_MIX_TREE, "prot-mixed", PARSIMONY_MIXED, env, false));
2846    TEST_EXPECT_COMBINES_PERFORMED(env, 89);
2847
2848    {
2849        env.push();
2850        TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_MIXED));
2851        TEST_EXPECT_COMBINES_PERFORMED(env, 234);
2852        env.pop();
2853    }
2854
2855    // ------------------------------
2856    //      test optimize (some)
2857
2858    // mark initially marked species
2859    {
2860        GB_transaction ta(env.gbmain());
2861
2862        GBT_restore_marked_species(env.gbmain(), "CytLyti6;StrRamo3;MucRace2;SacCere5");
2863        env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2864    }
2865
2866    TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_MIXED);
2867
2868    // test branchlength calculation
2869    // (optimizations below implicitely recalculates branchlengths)
2870    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "prot-calclength", PARSIMONY_MIXED, env, false));
2871    TEST_EXPECT_COMBINES_PERFORMED(env, 79);
2872
2873    // test whether branchlength calculation depends on root-position
2874    {
2875        AP_tree_edge *orgRootEdge = rootEdge();
2876
2877        env.push();
2878
2879        const char *tested_roots[] = {
2880            // "CytLyti6", // no effect on branchlengths
2881            "TaxOcell",
2882            "MucRace3",
2883            "StrCoel9",
2884        };
2885
2886        for (size_t r = 0; r<ARRAY_ELEMS(tested_roots); ++r) {
2887            TEST_ANNOTATE(tested_roots[r]);
2888            const char *leafName = tested_roots[r];
2889            env.root_node()->findLeafNamed(leafName)->set_root();
2890            calc_branchlengths_and_reorder(env.graphic_tree());
2891            orgRootEdge->set_root();
2892            env.graphic_tree()->reorderTree(BIG_BRANCHES_TO_TOP);
2893
2894            TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-calclength");
2895        }
2896        TEST_EXPECT_COMBINES_PERFORMED(env, 265);
2897
2898        env.pop();
2899    }
2900
2901    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "prot-opti-NNI", PARSIMONY_NNI_MARKED, env, true)); // test recursive NNI
2902    TEST_EXPECT_COMBINES_PERFORMED(env, 246);
2903
2904    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "prot-opti-marked-global", PARSIMONY_OPTI_MARKED, env, true)); // test recursive NNI+KL
2905    TEST_EXPECT_COMBINES_PERFORMED(env, 2810);
2906
2907    // -----------------------------
2908    //      test optimize (all)
2909
2910    // mark all species
2911    mark_all(env.gbmain());
2912    env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
2913    TEST_EXPECT_EQUAL(GBT_count_marked_species(env.gbmain()), 14);
2914
2915    TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_MIXED);
2916
2917    // test branchlength calculation
2918    // (optimizations below implicitely recalculates branchlengths)
2919    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "prot-calclength", PARSIMONY_MIXED, env, false));
2920    TEST_EXPECT_COMBINES_PERFORMED(env, 79);
2921
2922    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "prot-opti-all-NNI", PARSIMONY_NNI_ALL, env, true)); // test recursive NNI
2923    TEST_EXPECT_COMBINES_PERFORMED(env, 359);
2924
2925    {
2926        env.push();
2927        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "prot-opti-global", PARSIMONY_OPTI_ALL, env, false)); // test recursive NNI+KL
2928        TEST_EXPECT_COMBINES_PERFORMED(env, 1690);
2929
2930        TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_OPTI_ALL));
2931        TEST_EXPECT_COMBINES_PERFORMED(env, 215);
2932        env.pop();
2933    }
2934}
2935
2936void TEST_node_stack() {
2937    // test was used to fix #620
2938
2939    const char *aliname = "ali_5s";
2940    PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname);
2941    TEST_EXPECT_NO_ERROR(env.load_tree("tree_test"));
2942    TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial");
2943
2944    const int PARSIMONY_ORG = 302;
2945    TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14);
2946
2947    TEST_VALIDITY(env.root_node()->sequence_state_valid());
2948    TEST_EXPECT(env.root_node()->has_valid_root_remarks());
2949
2950    // test set root to CytAquat + pop (works)
2951    {
2952        env.push();
2953        env.root_node()->findLeafNamed("CytAquat")->set_root();
2954        TEST_VALIDITY(env.root_node()->sequence_state_valid());
2955        env.pop();
2956        TEST_VALIDITY(env.root_node()->sequence_state_valid());
2957    }
2958
2959    TEST_EXPECT(env.root_node()->has_valid_root_remarks());
2960
2961    // test set root to CloButyr + pop (works)
2962    {
2963        env.push();
2964        env.root_node()->findLeafNamed("CloButyr")->set_root();
2965        TEST_VALIDITY(env.root_node()->sequence_state_valid());
2966        env.pop();
2967        TEST_VALIDITY(env.root_node()->sequence_state_valid());
2968    }
2969
2970    TEST_EXPECT(env.root_node()->has_valid_root_remarks());
2971
2972    // test set root to CloBifer + set root to CloTyrob + pop (works)
2973    // Note: both species are in same subtree (of root)
2974    {
2975        env.push();
2976
2977        env.root_node()->findLeafNamed("CloBifer")->set_root();
2978        env.root_node()->findLeafNamed("CloTyrob")->set_root();
2979
2980        TEST_VALIDITY(env.root_node()->sequence_state_valid());
2981        env.pop();
2982        TEST_VALIDITY(env.root_node()->sequence_state_valid());
2983    }
2984
2985    TEST_EXPECT(env.root_node()->has_valid_root_remarks());
2986
2987    // test set root to CytAquat + set root to CloButyr + pop (failed, fixed by [13138])
2988    TEST_EXPECT_COMBINES_PERFORMED(env, 0);
2989    for (int calcCostsBetween = 0; calcCostsBetween<2; ++calcCostsBetween) {
2990        TEST_ANNOTATE(GBS_global_string("calcCostsBetween=%i", calcCostsBetween));
2991
2992        TEST_EXPECT_PARSVAL(env, PARSIMONY_ORG);
2993
2994        env.push();
2995
2996        env.root_node()->findLeafNamed("CytAquat")->set_root();
2997
2998        if (calcCostsBetween) {
2999            TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 2);
3000        }
3001
3002        env.root_node()->findLeafNamed("CloButyr")->set_root();
3003
3004        TEST_VALIDITY(env.root_node()->sequence_state_valid());
3005        TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 6);
3006
3007        env.pop();
3008
3009        TEST_VALIDITY(env.root_node()->sequence_state_valid());
3010        TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG);
3011        TEST_EXPECT_VALID_TREE(env.root_node());
3012    }
3013
3014    {
3015        env.push();
3016        {
3017            env.push();
3018
3019            env.root_node()->findLeafNamed("CloInnoc")->moveNextTo(env.root_node()->findLeafNamed("CytAquat"), 0.5);
3020            TEST_EXPECT_VALID_TREE(env.root_node());
3021            env.root_node()->findLeafNamed("CloInnoc")->set_root();
3022            TEST_EXPECT_VALID_TREE(env.root_node());
3023            env.root_node()->findLeafNamed("CytAquat")->moveNextTo(env.root_node()->findLeafNamed("CloPaste"), 0.5);
3024            TEST_EXPECT_VALID_TREE(env.root_node());
3025            env.root_node()->findLeafNamed("CloPaste")->set_root();
3026            TEST_EXPECT_VALID_TREE(env.root_node());
3027            env.root_node()->findLeafNamed("CloPaste")->moveNextTo(env.root_node()->findLeafNamed("CloInnoc"), 0.5);
3028            TEST_EXPECT_VALID_TREE(env.root_node());
3029
3030            {
3031                AP_tree_nlen *son_of_brother;
3032                AP_tree_nlen *brother_of_father;
3033
3034                // COVER1: son of root -> grandson of root
3035                {
3036                    AP_tree_nlen *son_of_root = env.root_node()->get_leftson();
3037                    ap_assert(son_of_root);
3038
3039                    son_of_brother = son_of_root->get_brother()->get_leftson();
3040                    son_of_root->moveNextTo(son_of_brother, 0.5);
3041                    TEST_EXPECT_VALID_TREE(env.root_node());
3042                }
3043
3044                // COVER2: grandson of root -> son of brother
3045                {
3046                    AP_tree_nlen *son_of_root      = env.root_node()->get_leftson();
3047                    AP_tree_nlen *grandson_of_root = son_of_root->get_brother()->get_rightson();
3048                    ap_assert(grandson_of_root);
3049
3050                    son_of_brother = grandson_of_root->get_brother()->get_leftson();
3051                    grandson_of_root->moveNextTo(son_of_brother, 0.5);
3052                    TEST_EXPECT_VALID_TREE(env.root_node());
3053                }
3054
3055                AP_tree_nlen *some_leaf = env.root_node()->findLeafNamed("CloBifer");
3056                ap_assert(some_leaf);
3057
3058                // COVER3: some leaf -> son of brother
3059                son_of_brother = some_leaf->get_brother()->get_leftson();
3060                some_leaf->moveNextTo(son_of_brother, 0.5);
3061                TEST_EXPECT_VALID_TREE(env.root_node());
3062
3063                // COVER4: some leaf -> son of brother
3064                brother_of_father = some_leaf->get_father()->get_brother();
3065                some_leaf->moveNextTo(brother_of_father, 0.5);
3066                TEST_EXPECT_VALID_TREE(env.root_node());
3067
3068                // test forbidden moves:
3069                TEST_EXPECT_ERROR_CONTAINS(some_leaf->cantMoveNextTo(some_leaf->get_father()),  "Already there");
3070                TEST_EXPECT_ERROR_CONTAINS(some_leaf->cantMoveNextTo(some_leaf->get_brother()), "Already there");
3071            }
3072
3073            TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG+5, 6);
3074
3075            env.pop();
3076            TEST_EXPECT_VALID_TREE(env.root_node());
3077        }
3078        env.pop();
3079
3080        TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG);
3081        TEST_EXPECT_VALID_TREE(env.root_node());
3082    }
3083
3084    // remove + quick add marked + pop() both works
3085    TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD,     "nucl-add-quick", PARSIMONY_ORG-18, env, true)); // test quick-add
3086    TEST_EXPECT_COMBINES_PERFORMED(env, 400);
3087
3088    TEST_EXPECT(env.root_node()->has_valid_root_remarks());
3089
3090    // remove + quick-add marked + pop() quick-add -> corrupts tree
3091    // (root-edge is lost)
3092    {
3093        env.push();
3094        TEST_EXPECT_VALID_TREE(env.root_node());
3095        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd)
3096        TEST_EXPECT_COMBINES_PERFORMED(env, 0);
3097        TEST_EXPECT_VALID_TREE(env.root_node());
3098
3099        TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees());
3100
3101        env.push();
3102        TEST_EXPECT_VALID_TREE(env.root_node());
3103        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd)
3104        TEST_EXPECT_COMBINES_PERFORMED(env, 400);
3105        TEST_EXPECT_VALID_TREE(env.root_node());
3106        TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees());
3107        env.pop();
3108
3109        TEST_VALIDITY(env.root_node()->has_valid_edges());
3110
3111        env.pop();
3112        TEST_EXPECT_VALID_TREE(env.root_node());
3113        TEST_EXPECT_COMBINES_PERFORMED(env, 0);
3114    }
3115
3116    // same as above, but with only 1 species marked
3117    const char *testSingle[] = {
3118        "CytAquat",  // CytAquat is the only grandson of root (CytAquat located in lower subtree)
3119        "CloBifer",  // two father nodes between CloBifer and root (CloBifer located in upper subtree)
3120        "CloPaste",  // two father nodes between CloPaste and root (CloPaste located in upper subtree)
3121        "CorGluta",  // three father nodes between CorGluta and root (CorGluta located in lower subtree)
3122        "CelBiazo",  // two father nodes between CelBiazo and root
3123        NULp
3124    };
3125
3126    for (int i = 0; testSingle[i]; ++i) {
3127        for (int swapped = 0; swapped<2; ++swapped) {
3128            TEST_ANNOTATE(GBS_global_string("single=%s swapped=%i", testSingle[i], swapped));
3129
3130            env.push();
3131            TEST_EXPECT_VALID_TREE(env.root_node());
3132            {
3133                AP_tree_nlen *old_rightson = env.root_node()->get_rightson();
3134                env.root_node()->get_leftson()->get_rightson()->set_root();
3135                old_rightson->get_leftson()->set_root();
3136                old_rightson->set_root();
3137
3138                ap_assert(env.root_node()->get_rightson() == old_rightson);
3139            }
3140            TEST_EXPECT_VALID_TREE(env.root_node());
3141
3142            mark_only(env.root_node()->findLeafNamed(testSingle[i])->gb_node);
3143            env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
3144
3145            env.push();
3146            if (swapped) {
3147                env.root_node()->swap_sons();
3148            }
3149
3150            TEST_EXPECT_VALID_TREE(env.root_node());
3151            TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd)
3152            TEST_EXPECT_VALID_TREE(env.root_node());
3153
3154            env.push();
3155            TEST_EXPECT_VALID_TREE(env.root_node());
3156            TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd)
3157            TEST_EXPECT_VALID_TREE(env.root_node());
3158            env.pop();
3159
3160            TEST_EXPECT_VALID_TREE(env.root_node());
3161            env.pop();
3162            TEST_EXPECT_VALID_TREE(env.root_node());
3163            env.pop();
3164            TEST_EXPECT_VALID_TREE(env.root_node());
3165        }
3166    }
3167    TEST_EXPECT_COMBINES_PERFORMED(env, 2120);
3168
3169    // similar to above (remove+add a grandson of root; here grandson is a subtree with 4 species)
3170
3171    for (int remove_from_lower_subtree = 0; remove_from_lower_subtree<2; ++remove_from_lower_subtree) {
3172        TEST_ANNOTATE(GBS_global_string("remove_from_lower_subtree=%i", remove_from_lower_subtree));
3173
3174        // mark a complete subtree (which - as a whole - forms a grandson of root). subtree is located in upper part of the tree
3175        mark_only(env.root_node()->findLeafNamed("CloButy2")->gb_node);
3176        mark(env.root_node()->findLeafNamed("CloButyr")->gb_node);
3177        mark(env.root_node()->findLeafNamed("CloCarni")->gb_node);
3178        mark(env.root_node()->findLeafNamed("CloPaste")->gb_node);
3179        env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
3180
3181        env.push();
3182        if (remove_from_lower_subtree) {
3183            env.root_node()->swap_sons();
3184        }
3185        TEST_EXPECT_VALID_TREE(env.root_node());
3186        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd)
3187        TEST_EXPECT_VALID_TREE(env.root_node());
3188
3189        env.push();
3190        TEST_EXPECT_VALID_TREE(env.root_node());
3191        TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd)
3192        TEST_EXPECT_VALID_TREE(env.root_node());
3193        env.pop();
3194
3195        TEST_VALIDITY(env.root_node()->has_valid_edges()); // now always valid
3196
3197        env.pop();
3198        TEST_EXPECT_VALID_TREE(env.root_node());
3199    }
3200    TEST_EXPECT_COMBINES_PERFORMED(env, 584);
3201}
3202
3203void TEST_node_edge_resources() {
3204    const char *aliname = "ali_5s";
3205
3206    // document memsize of nodes and edges:
3207
3208#define STATE_STACK_SIZE sizeof(StateStack) // 8 (Cxx11) or 16 (older C++); maybe 4/8 in 32bit
3209
3210#if defined(ARB_64)
3211    TEST_EXPECT_EQUAL(sizeof(AP_tree_nlen), 184 + STATE_STACK_SIZE);
3212    TEST_EXPECT_EQUAL(sizeof(AP_tree), 136);
3213    TEST_EXPECT_EQUAL(sizeof(ARB_seqtree), 88);
3214    TEST_EXPECT_EQUAL(sizeof(TreeNode), 80);
3215#else // !defined(ARB_64)
3216    TEST_EXPECT_EQUAL(sizeof(AP_tree_nlen), 112 + STATE_STACK_SIZE);
3217    TEST_EXPECT_EQUAL(sizeof(AP_tree), 84);
3218    TEST_EXPECT_EQUAL(sizeof(ARB_seqtree), 48);
3219    TEST_EXPECT_EQUAL(sizeof(TreeNode), 44);
3220#endif
3221
3222
3223#if defined(ARB_64)
3224    TEST_EXPECT_EQUAL(sizeof(AP_tree_edge), 64);
3225#else // !defined(ARB_64)
3226    TEST_EXPECT_EQUAL(sizeof(AP_tree_edge), 32);
3227#endif
3228
3229    PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname);
3230    TEST_EXPECT_NO_ERROR(env.load_tree("tree_test"));
3231
3232    const int PARSIMONY_ORG = 302;
3233    TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14);
3234
3235    AP_tree_nlen *CloButyr = env.root_node()->findLeafNamed("CloButyr");
3236    AP_tree_nlen *CloButy2 = env.root_node()->findLeafNamed("CloButy2");
3237    TEST_EXPECT_EQUAL(CloButyr->get_brother()->name, CloButy2->name); // test they are brothers
3238
3239    AP_tree_nlen *CorAquat = env.root_node()->findLeafNamed("CorAquat");
3240    AP_tree_nlen *CurCitre = env.root_node()->findLeafNamed("CurCitre");
3241    TEST_EXPECT_EQUAL(CorAquat->get_brother()->name, CurCitre->name); // test they are brothers
3242
3243    CorAquat->REMOVE();
3244
3245    for (int test = 1; test<=8; ++test) {
3246        // test == 1 -> provokes common nodes+edges in revert+accept
3247        // test == 2 -> provokes common nodes+edges in revert+accept
3248        // test == 3 -> provokes common nodes+edges in revert+accept
3249        // tests 4-7 do not provoke common nodes or edges
3250
3251        for (int mode = 0; mode<=3; ++mode) {
3252            bool accept_outer = mode&2;
3253            bool accept_inner = mode&1;
3254
3255            TEST_ANNOTATE(GBS_global_string("accept_outer=%i accept_inner=%i (mode=%i, test=%i)", accept_outer, accept_inner, mode, test));
3256
3257            TEST_EXPECT_NULL(CorAquat->get_father());
3258            TEST_EXPECT(CloButyr->get_brother() == CloButy2);
3259            TEST_EXPECT_VALID_TREE(env.root_node());
3260
3261            env.push();
3262
3263            switch (test) {
3264                case 1: CorAquat->insert(CurCitre); break;
3265                case 2: CorAquat->insert(CurCitre); break;
3266                case 3: break;
3267                case 4: CloButyr->REMOVE(); break;
3268                case 5: CloButyr->REMOVE(); break;
3269                case 6: break;
3270                case 7: CloButyr->moveNextTo(CurCitre, 0.5); break;
3271                case 8: break;
3272                default: ap_assert(0); break;
3273            }
3274            TEST_EXPECT_VALID_TREE(env.root_node());
3275
3276            {
3277                env.push();
3278
3279                switch (test) {
3280                    case 1: CorAquat->REMOVE(); break;
3281                    case 2: break;
3282                    case 3: CorAquat->insert(CurCitre); break;
3283                    case 4: CloButyr->insert(CloButy2); break;
3284                    case 5: break;
3285                    case 6: CloButyr->REMOVE(); break;
3286                    case 7: CloButyr->moveNextTo(CloButy2, 0.5); break;
3287                    case 8: CorAquat->insert(CurCitre); CorAquat->REMOVE(); break;
3288                    default: ap_assert(0); break;
3289                }
3290                TEST_EXPECT_VALID_TREE(env.root_node());
3291
3292                env.accept_if(accept_inner);
3293            }
3294
3295            switch (test) {
3296                case 1: break;
3297                case 2: CorAquat->REMOVE(); break;
3298                case 3: if (CorAquat->father) CorAquat->REMOVE(); break;
3299                case 4: break;
3300                case 5: CloButyr->insert(CloButy2); break;
3301                case 6: if (!CloButyr->father) CloButyr->insert(CloButy2); break;
3302                case 7: CloButyr->REMOVE(); break;
3303                case 8: break;
3304                default: ap_assert(0); break;
3305            }
3306            TEST_EXPECT_VALID_TREE(env.root_node());
3307
3308            env.accept_if(accept_outer);
3309
3310            // manually revert changes (outside any stack frame)
3311            if (CorAquat->father) CorAquat->REMOVE();
3312            if (!CloButyr->father) CloButyr->insert(CloButy2);
3313        }
3314    }
3315
3316    CorAquat->insert(CurCitre);
3317
3318    TEST_EXPECT_PARSVAL(env, PARSIMONY_ORG);
3319    env.combines_performed(); // accept any no of combines
3320}
3321
3322#endif // UNIT_TESTS
3323
3324// --------------------------------------------------------------------------------
3325
Note: See TracBrowser for help on using the repository browser.