| 1 | #!/bin/bash |
|---|
| 2 | # Copyright (c) 2010, Elmar Pruesse <epruesse@mpi-bremen.de> |
|---|
| 3 | # All rights reserved. |
|---|
| 4 | # |
|---|
| 5 | # Redistribution and use in source and binary forms, with or without |
|---|
| 6 | # modification, are permitted provided that the following conditions are met: |
|---|
| 7 | # * Redistributions of source code must retain the above copyright |
|---|
| 8 | # notice, this list of conditions and the following disclaimer. |
|---|
| 9 | # * Redistributions in binary form must reproduce the above copyright |
|---|
| 10 | # notice, this list of conditions and the following disclaimer in the |
|---|
| 11 | # documentation and/or other materials provided with the distribution. |
|---|
| 12 | # * Neither the name of the <organization> nor the |
|---|
| 13 | # names of its contributors may be used to endorse or promote products |
|---|
| 14 | # derived from this software without specific prior written permission. |
|---|
| 15 | # |
|---|
| 16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
|---|
| 17 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|---|
| 18 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|---|
| 19 | # DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY |
|---|
| 20 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|---|
| 21 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|---|
| 22 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|---|
| 23 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|---|
| 24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 25 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 26 | |
|---|
| 27 | set -e |
|---|
| 28 | |
|---|
| 29 | if [ -z "$1" ]; then |
|---|
| 30 | echo "${0##*/} <filename> [size] [max]" |
|---|
| 31 | echo " Splits multi-fasta file <filename> into chunks of [size] sequences." |
|---|
| 32 | echo " If size is not specified, a default of 1000 is used." |
|---|
| 33 | exit 1; |
|---|
| 34 | fi |
|---|
| 35 | |
|---|
| 36 | FILENAME="$1" |
|---|
| 37 | if [ ! -r "$FILENAME" ]; then |
|---|
| 38 | echo "Cannot read from file '$FILENAME'!" |
|---|
| 39 | exit 2; |
|---|
| 40 | fi |
|---|
| 41 | |
|---|
| 42 | FILEBASE=`basename $FILENAME` |
|---|
| 43 | PREFIX=${FILEBASE%.*} |
|---|
| 44 | SUFFIX=${FILEBASE#$PREFIX} |
|---|
| 45 | |
|---|
| 46 | CHUNK="$2" |
|---|
| 47 | if [ -z "$CHUNK" ]; then |
|---|
| 48 | CHUNK=1000 |
|---|
| 49 | fi |
|---|
| 50 | if [ "$CHUNK" -lt 1 ]; then |
|---|
| 51 | echo "Chunk size must be greater than one!" |
|---|
| 52 | exit 3 |
|---|
| 53 | fi |
|---|
| 54 | |
|---|
| 55 | MAX="$3" |
|---|
| 56 | if [ -z "$MAX" ]; then |
|---|
| 57 | MAX=99999999 |
|---|
| 58 | fi |
|---|
| 59 | |
|---|
| 60 | awk ' |
|---|
| 61 | BEGIN { |
|---|
| 62 | N=0; |
|---|
| 63 | ON=0; |
|---|
| 64 | } |
|---|
| 65 | |
|---|
| 66 | /^>/ { |
|---|
| 67 | if (N % '$CHUNK' == 0) ++ON |
|---|
| 68 | ++N |
|---|
| 69 | if (N > '$MAX'*'$CHUNK') exit 0 |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | { |
|---|
| 73 | print > "'"$PREFIX"'." ON "'"$SUFFIX"'" |
|---|
| 74 | } |
|---|
| 75 | ' $FILENAME |
|---|
| 76 | |
|---|