source: branches/profile/GDE/RAxML8/arb_raxml8.sh

Last change on this file was 12532, checked in by epruesse, 10 years ago

remove double press-any-key

File size: 5.6 KB
Line 
1#!/bin/bash
2set -e
3
4BASES_PER_THREAD=300
5
6# always wait on exit
7trap on_exit EXIT
8on_exit() {
9    wait_and_exit
10}
11
12# return true if argument is file in path executable by user
13can_run() {
14    which "$1" && test -x `which "$1"`
15}
16
17wait_and_exit() {
18    read -p "Press key to close window"
19    exit
20}
21
22# show error in ARB and exit
23report_error() {
24    SELF=`basename "$0"`
25    echo "ARB ERROR: $*"
26    arb_message "$SELF failed with: $*"
27    wait_and_exit
28}
29
30# create named temporary directory
31prepare_tmp_dir() {
32    ARB_TMP="$HOME/.arb_tmp"
33    NAME="$1"
34    DATE=`date +%Y-%m-%d--%H-%M-%S`
35    for N in "" ".2" ".3" ".4"; do
36        DIR="$ARB_TMP/$NAME--$DATE$N"
37        if [ -e "$DIR" ]; then
38            continue;
39        else
40            mkdir -p "$DIR"
41            echo "$DIR"
42            return
43        fi
44    done
45    report_error "Unable to create temporary directory"
46}
47
48cpu_has_feature() {
49    case `uname` in
50        Darwin)
51            SHOW="sysctl machdep.cpu.features"
52            ;;
53        Linux)
54            SHOW="grep flags /proc/cpuinfo"
55            ;;
56    esac
57    $SHOW | grep -qi "$1"
58}
59
60cpu_get_cores() {
61    case `uname` in
62        Darwin)
63            sysctl -n hw.ncpu
64            ;;
65        Linux)
66            grep -c "^processor" /proc/cpuinfo
67            ;;
68    esac
69}
70
71# this is the "thorough" protocol.
72# 1. do $NTREE searches for best ML tree
73# 2. run $BOOTSTRAP BS searches
74# 3. combine into ML tree with support values
75# 4. calculate consensus tree
76# 5. import trees into ARB
77dna_tree_thorough() {
78    # try N ML searches
79    $RAXML -p "$SEED" -s "$SEQFILE" -m $MODEL \
80        -N "$NTREES" \
81        -n TREE_INFERENCE &
82
83    # wait for raxml to complete if we have not enough
84    # cores to run bootstrap search at the same time
85    if [ $(($THREADS * 2)) -lt $CORES ]; then
86        wait
87    fi
88
89    # run bootstraps
90    $RAXML -p "$SEED" -b "$SEED" -s "$SEQFILE" -m $MODEL \
91        -N "$BOOTSTRAPS" \
92        -n BOOTSTRAP &
93    wait
94
95    # draw bipartition information
96    $RAXML -f b -m $MODEL \
97        -t RAxML_bestTree.TREE_INFERENCE \
98        -z RAxML_bootstrap.BOOTSTRAP \
99        -n ML_TREE_WITH_SUPPORT
100    # import
101    arb_read_tree tree_raxml RAxML_bipartitions.ML_TREE_WITH_SUPPORT
102
103    # compute extended majority rule consensus
104    $RAXML -J MRE -m $MODEL -z RAxML_bootstrap.BOOTSTRAP -n BOOTSTRAP_CONSENSUS
105   
106    # import
107    arb_read_tree tree_raxml_mre RAxML_MajorityRuleExtendedConsensusTree.BOOTSTRAP_CONSENSUS
108}
109
110# this is the fast protocol
111# 1. run fast bootstraps
112# 2. calculate consensus
113# 3. import into ARB
114dna_tree_quick() {
115    # run fast bootstraps
116    $RAXML -f a -m $MODEL -p "$SEED" -x "$SEED" -s "$SEQFILE" -N "$BOOTSTRAPS" -n FAST_BS
117    # create consensus tree
118    $RAXML -J MRE -m $MODEL -z RAxML_bootstrap.FAST_BS -n FAST_BS_MAJORITY
119    # import
120    arb_read_tree tree_raxml RAxML_bipartitions.FAST_BS
121    arb_read_tree tree_raxml_mre RAxML_MajorityRuleExtendedConsensusTree.FAST_BS_MAJORITY
122}   
123
124calc_mem_size() {
125    # FIXME
126    echo
127}
128
129aa_tree() {
130    # FIXME
131    echo
132    # -m PROTGAMMALG4X
133}
134
135###### main #####
136
137
138while [ -n "$1" ]; do 
139  case "$1" in
140      -p) # protocol
141          PROT="$2"
142          shift
143          ;;
144      -m) # subst model
145          MODEL="$2"
146          shift
147          ;;
148      -s) # random seed
149          SEED="$2"
150          shift
151          ;;
152      -b) # bootstraps
153          BOOTSTRAPS="$2"
154          shift
155          ;;
156      -r) # number of tree searches
157          NTREES="$2"
158          shift
159          ;;
160      -nt) # seqtype dna
161          SEQTYPE=N
162          ;;
163      -aa) # seqtype proto
164          SEQTYPE=A
165          ;;
166      -f) # input file
167          FILE="$2"
168          shift
169          ;;
170      -t) # threads
171          THREADS="$2"
172          shift
173          ;;
174      *)
175          report_error argument not understood: "$1"
176          ;;
177  esac
178  shift
179done
180
181
182# set up environment
183if [ -n "$LD_LIBRARY_PATH" ]; then
184    LD_LIBRARY_PATH="$ARBHOME/lib"
185else
186    LD_LIBRARY_PATH="$ARBHOME/lib:$LD_LIBRARY_PATH"
187fi
188export LD_LIBRARY_PATH
189
190
191# prepare data in tempdir
192DIR="`prepare_tmp_dir raxml`"
193SEQFILE="$DIR/dna.phy"
194
195arb_convert_aln --arb-notify -GenBank "$FILE" -phylip "$SEQFILE" 2>&1 |\
196  grep -v "^WARNING(14): LOCUS" || true # remove spurious warning
197rm $FILE
198
199cd "$DIR"
200
201# select RAxML binary
202if cpu_has_feature avx && can_run raxmlHPC8-AVX.PTHREADS; then
203    RAXML=raxmlHPC8-AVX.PTHREADS
204elif cpu_has_feature sse3 && can_run raxmlHPC8-SSE3.PTHREADS; then
205    RAXML=raxmlHPC8-SSE3.PTHREADS
206elif can_run raxmlHPC8-PTHREADS; then
207    RAXML=raxmlHPC8-PTHREADS
208else
209    report_error Could not find working RAxML binary.
210fi
211
212# get some numbers
213CORES=`cpu_get_cores`
214BP=`head -n 1 $SEQFILE | cut -c 16-`
215NSEQS=`head -n 1 $SEQFILE | cut -c 1-16`
216
217# warn if model does not match sequence number
218if [ "$MODEL" == "GTRRAMMA" -a $NSEQS -gt 10000 ]; then
219    arb_message "Using the GTRGAMMA model on more than 10,000 sequences." \
220        "This is not considered good practice. Please refer to " \
221        "the RAxML manual for details."
222fi
223if [ "$MODEL" == "GTRCAT" -a -$NSEQS -lt 150 ]; then
224    arb_message "Using the GTRCAT model on less than 150 sequences." \
225        "This is not considered good practice. Please refer to " \
226        "the RAxML manual for details."
227fi
228
229# calculate number of threads (if not passed)
230if [ -z "$THREADS" ]; then
231    THREADS=$(( $BP / $BASES_PER_THREAD + 2))
232    # +1 is for master thread,
233    # another +1 for the first $BASES_PER_THREAD (bash truncates)
234
235    if [ $THREADS -gt $CORES ]; then
236        THREADS=$CORES
237    fi
238fi
239RAXML="$RAXML -T $THREADS" 
240
241
242case "${SEQTYPE}.${PROT}" in
243    N.quick)
244        dna_tree_quick
245        ;;
246    N.thorough)
247        dna_tree_thorough
248        ;;
249esac
250
251# FIXME: cleanup temp dir
252
Note: See TracBrowser for help on using the repository browser.