GIGI_0_parsing.sh 13.2 KB
Newer Older
1 2
#!/bin/bash

3
#debug_parsing=true
4 5 6 7 8

command -v cut >/dev/null 2>&1 || { echo >&2 "cut is required but not installed or not in the path.  Aborting."; exit 1; }
command -v pwd >/dev/null 2>&1 || { echo >&2 "pwd is required but not installed or not in the path.  Aborting."; exit 1; }
command -v cat >/dev/null 2>&1 || { echo >&2 "cat is required but not installed or not in the path.  Aborting."; exit 1; }
command -v test >/dev/null 2>&1 || { echo >&2 "test is required but not installed or not in the path.  Aborting."; exit 1; }
9 10 11 12
command -v uname >/dev/null 2>&1 || { echo >&2 "uname is recommended but not installed or not in the path.  Continuing, but behavior is undetermined"; }
command -v hostname >/dev/null 2>&1 || { echo >&2 "hostname is recommended but not installed or not in the path.  Continuing, though some cosmetic information may be missing."; }
command -v hostid >/dev/null 2>&1 || { echo >&2 "hostid is recommended but not installed or not in the path.  Continuing, though some cosmetic information may be missing."; }
command -v uptime >/dev/null 2>&1 || { echo >&2 "uptime is recommended but not installed or not in the path.  Continuing, though some cosmetic information may be missing."; }
13 14 15
command -v lscpu >/dev/null 2>&1 || { echo >&2 "lscpu is required but not installed or not in the path.  Aborting."; exit 1; }
command -v grep >/dev/null 2>&1 || { echo >&2 "grep is required but not installed or not in the path.  Aborting."; exit 1; }
command -v tail >/dev/null 2>&1 || { echo >&2 "tail is required but not installed or not in the path.  Aborting."; exit 1; }
16
command -v free >/dev/null 2>&1 || { echo >&2 "free is required but not installed or not in the path.  Continuing, though some cosmetic information may be missing"; }
17 18 19
command -v rev >/dev/null 2>&1 || { echo >&2 "rev is required but not installed or not in the path.  Aborting."; exit 1; }
command -v seq >/dev/null 2>&1 || { echo >&2 "seq is required but not installed or not in the path.  Aborting."; exit 1; }
command -v tr >/dev/null 2>&1 || { echo >&2 "tr is required but not installed or not in the path.  Aborting."; exit 1; }
20
command -v sort >/dev/null 2>&1 || { echo >&2 "sort is recommended but not installed or not in the path.  Continuing, but behavior is undetermined"; }
21 22
command -v cgcreate >/dev/null 2>&1 || { echo >&2 "cgcreate from package cgroup-tools/libcgroups is recommended but not installed or not in the path.  Continuing, but memory limits may not be imposed.  See the README for more info"; }

23 24

echo
25 26 27 28 29
if [ ${debug_parsing} ]
then
  echo "base_path: " "$base_path"
  echo "parent_path: " "$parent_path"
  echo
30

31 32 33 34 35 36 37 38 39 40 41
  #Make sure that we are seeing all arguments
  index=1
  echo "Listing args with \"\$@\":"
  for arg in "$@"
  do
    echo "Arg #$index = $arg"
    let "index+=1"
  done             # $@ sees arguments as separate words.
  echo "Arg list seen as separate words."
  echo
fi
42 43 44

#DEFAULTS
output_folder="./"
45
export long="false"
46

47
#Argument Parsing
48
cd "${base_path}"
49 50 51 52 53
if [ ${debug_parsing} ]
then
  echo "First Argument: ${@:1:1}"
fi
if [ $# -ge 1 -a -f "${@:1:1}" ] || [ "${@:1:1}" == "-h" ] || [ "${@:1:1}" == "-v" ]
54
then
55 56 57 58 59
  if [ $# -ge 1 -a -f "${@:1:1}" ]
  then
    param_file="${@:1:1}"
    shift
  fi
60 61 62 63 64 65
  index=1
  for arg in "$@"
  do
    if [ "${arg:0:1}" == "-" ]
    then
    shift
66 67 68 69
      if [ ${debug_parsing} ]
      then
        echo "is a flag"
      fi
70
      flag_set=$(echo $arg | tr -d '[:space:]')
71 72 73 74
      if [ ${debug_parsing} ]
      then
        echo "Number of combined flags: " "$((${#arg} - 1))"
      fi
75 76
      for j in $(seq 1 1 "$((${#arg} - 1))")
      do
77 78 79 80
        if [ ${debug_parsing} ]
        then
          echo "${arg:${j}:1}"
        fi
81 82 83 84 85 86 87 88 89 90 91 92 93
        case "${arg:${j}:1}" in
          o )
            output_folder="${@:1:1}"
            shift
            if [ "${output_folder:0:1}" == "-" ]
            then
              echo "Flags shouldn't follow each other separately (e.g. -o -n ./out 4), but instead should be joined (e.g. -on ./out 4) or totally separate (e.g. -o ./out -n 4)"
              exit 1
            fi
            if [ "${output_folder}" ]
            then
              echo "Output folder set successfully: " "${output_folder}"
            else
94
              echo "-o flag given with no output folder, aborting."
95 96 97
              exit 1
            fi
          ;;
98
          n )
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
            run_name="${@:1:1}"
            shift
            if [ "${run_name:0:1}" == "-" ]
            then
              echo "Flags shouldn't follow each other separately (e.g. -o -n ./out 4), but instead should be joined (e.g. -on ./out 4) or totally separate (e.g. -o ./out -n 4)"
              exit 1
            fi
            if [ "${run_name}" ]
            then
              echo "Run name set successfully: " "${run_name}"
            else
              echo "-n flag given with no run name, aborting."
              exit 1
            fi
          ;;
          t )
            num_threads="${@:1:1}"
            shift
            if [ "${num_threads:0:1}" == "-" ]
            then
              echo "Flags shouldn't follow each other separately (e.g. -o -n ./out 4), but instead should be joined (e.g. -on ./out 4) or totally separate (e.g. -o ./out -n 4)"
              exit 1
            fi
            if [ "${num_threads}" ]
            then
              echo "Number of threads set successfully: " "${num_threads}"
            else
              echo "-t flag given with no number of threads, aborting."
              exit 1
            fi
          ;;
          m )
            memory="${@:1:1}"
            shift
            if [ "${memory:0:1}" == "-" ]
            then
              echo "Flags shouldn't follow each other separately (e.g. -o -n ./out 4), but instead should be joined (e.g. -on ./out 4) or totally separate (e.g. -o ./out -n 4)"
              exit 1
            fi
            if [ "${memory}" ]
            then
              echo "Memory set successfully: " "${memory}" "MB"
141
              export memory
142 143 144 145 146
            else
              echo "-m flag given with no amount of memory, aborting."
              exit 1
            fi
          ;;
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
          e )
            memory_estimate="${@:1:1}"
            shift
            if [ "${memory_estimate:0:1}" == "-" ]
            then
              echo "Flags shouldn't follow each other separately (e.g. -o -n ./out 4), but instead should be joined (e.g. -on ./out 4) or totally separate (e.g. -o ./out -n 4)"
              exit 1
            fi
            if [ "${memory_estimate}" ]
            then
              echo "Memory estimate set successfully: " "${memory_estimate}" "MB"
              export memory_estimate
            else
              echo "-e flag given with no amount of memory, aborting."
              exit 1
            fi
          ;;
164
          l )
165
            echo "long is true"
166 167
            export long="true"
          ;;
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
          q )
            queue_length="${@:1:1}"
            shift
            if [ "${queue_length:0:1}" == "-" ]
            then
              echo "Flags shouldn't follow each other separately (e.g. -o -n ./out 4), but instead should be joined (e.g. -on ./out 4) or totally separate (e.g. -o ./out -n 4)"
              exit 1
            fi
            if [ "${queue_length}" ]
            then
              echo "Queue set successfully: " "${queue_length}" "processes"
              export queue_length
            else
              echo "-q flag given with no queue length, aborting."
              exit 1
            fi
          ;;
Khalid Kunji's avatar
Khalid Kunji committed
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
          r )
            region_start="${@:1:1}"
            shift
            region_end="${@:1:1}"
            shift
            if [ "${queue_length:0:1}" == "-" ]
            then
              echo "Flags shouldn't follow each other separately (e.g. -o -n ./out 4), but instead should be joined (e.g. -on ./out 4) or totally separate (e.g. -o ./out -n 4)"
              exit 1
            fi
            if [[ "${region_start}" && "${region_end}" ]]
            then
              echo "Region set successfully: "
              echo "Start: " "${region_start}"
              echo "End: " "${region_end}"
              export region_start
              export region_end
            else
              echo "-r flag given without region start and end, aborting."
              exit 1
            fi
          ;;
207
          v )
208
            echo "GIGI-Quick version: 1.03"
209 210 211 212
            echo "If you intended to activate verbose output, that flag is now capitalized: '-V'"
            exit 0
          ;;
          V )
213 214 215
            echo "verbose is true"
            export verbose="true"
          ;;
216 217
          h )
            echo "Help: "
218
            echo "run_GIGI parameter_file -o [OUTPUT FOLDER] -n [RUN NAME] -t [THREADS] -m [MEMORY IN MB]  -q [QUEUE LENGTH] [-l] [-v] [-V] [-h]"
219 220 221 222 223
            echo "-o [OUTPUT FOLDER] : This is the path to use for the outputs from the run_GIGI scripts, including temporary files."
            echo "-n [RUN NAME] : This is a path relative to the [OUTPUT FOLDER] to use to keep the outputs from more than one run of run_GIGI separated."
            echo "-t [THREADS] : The number of threads to use for run_GIGI, and also the number of chunks to split the input into."
            echo "-m [MEMORY IN MB] : The amount of RAM that run_GIGI will restrict its use to, please set up the cgroup first"
            echo "-l : Specifies that the input is in the long format."
224 225
            echo "-v : Display the version of GIGI-Quick and exit."
            echo "-V : Verbose mode, output from run_GIGI is much quieter now, you can see much more of what it is doing and what variables are set to at various stages with -v. "
226
            echo "-h : Display this help text"
227 228 229 230 231
            echo "-r [START] [END]   : Run on only a selected region, starting at start and ending at end, this region will be selected before any further splitting."
            echo "-q [THREADS]       : Run in queued mode, this mode will run up to THREADS instances of GIGI at a time and will attempt to keep the total amount of memory being used less than "
            echo "   [MEMORY IN MB] using an estimate of the amount of memory GIGI may need.  If -m [MEMORY IN MB] wasn't given, then it will use the amount of memory available "
            echo "   as shown by 'free.'  For older kernels this isn't shown and we use an estimate that is no longer accurate for modern systems (amount free + amount of buff/cache). "
            echo "   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=34e431b0ae398fc54ea69ff85ec700722c9da773  Also, -t is ignored when -q is given."
232
            echo "-e [MEMORY IN MB]  : Manual estimate of how much memory GIGI will need for queued mode in case the calculated estimate is too inaccurate"
233 234 235
            echo "For further information please see the included readme file and or the same text online on the project page: https://cse-git.qcri.org/Imputation/Impute_Beaming"
            exit 0
          ;;
236 237 238 239 240 241 242
          * )
            echo "unknown flag: " "${arg:${j}:1}"
            exit 1
          ;;
        esac
      done
    else
243 244 245 246
      if [ ${debug_parsing} ]
      then
        echo "is an arg"
      fi
247 248 249 250 251
    fi
    let "index+=1"
  done
else
  echo "THE FIRST ARGUMENT ISN'T A PARAMETER FILE"
252
  echo "Usage: run_GIGI parameter_file -o [OUTPUT FOLDER] -n [RUN NAME] -t [THREADS] -m [MEMORY IN MB] [-l] [-v] [-V] [-h]"
253 254 255 256
  exit 1
fi
echo

257
#Path Handling
258 259 260 261 262 263
cd "$parent_path"
gigi="./GIGI/GIGI"
gigi_split="./SPLIT/gigisplit"
gigi_merge="./MERGE/gigimerge"

cd "${gigi%/*}"
264 265 266 267 268 269 270 271 272 273 274 275
if [ -x "$(pwd)"/GIGI ]
then
  export gigi="$(pwd)"/GIGI
  echo "Using user compiled GIGI"
elif [ $(getconf LONG_BIT) -eq 64 ]
then
  export gigi="$(pwd)"/GIGI-static-64
  echo "Using static 64 bit GIGI"
else
  export gigi="$(pwd)"/GIGI-static-32
  echo "Using static 32 bit GIGI"
fi
276 277 278
cd "$parent_path"

cd "${gigi_split%/*}"
279 280 281 282 283 284 285 286 287 288 289 290
if [ -x "$(pwd)"/gigisplit ]
then
  export gigi_split="$(pwd)"/gigisplit
  echo "Using user compiled gigisplit"
elif [ $(getconf LONG_BIT) -eq 64 ]
then
  export gigi_split="$(pwd)"/gigisplit-static-64
  echo "Using static 64 bit gigisplit"
else
  export gigi_split="$(pwd)"/gigisplit-static-32
  echo "Using static 32 bit gigisplit"
fi
291 292 293
cd "$parent_path"

cd "${gigi_merge%/*}"
294 295 296 297 298 299 300 301 302 303 304 305
if [ -x "$(pwd)"/gigimerge ]
then
  export gigi_merge="$(pwd)"/gigimerge
  echo "Using user compiled gigimerge"
elif [ $(getconf LONG_BIT) -eq 64 ]
then
  export gigi_merge="$(pwd)"/gigimerge-static-64
  echo "Using static 64 bit gigimerge"
else
  export gigi_merge="$(pwd)"/gigimerge-static-32
  echo "Using static 32 bit gigimerge"
fi
306

307 308 309 310 311 312 313
if [ ${verbose} ]
then
  echo "GIGI: " "$gigi"
  echo "GIGI SPLIT: " "$gigi_split"
  echo "GIGI MERGE: " "$gigi_merge"
  echo
fi
314 315 316 317 318 319 320

cd "$base_path"
mkdir -p "$output_folder"
cd "$output_folder"
export output_folder=$(pwd)
cd "$base_path"

321 322 323 324
if [ -d "${param_file%/*}" ]
then
  cd "${param_file%/*}"
fi
325 326 327 328 329
export param_file="$(pwd)"/"${param_file##*/}"
export input_folder="${param_file%/*}"
export gigi_split_chunks_folder="${output_folder}/${run_name}/split_output"
mkdir -p "$gigi_split_chunks_folder"

330 331 332 333 334 335
if [ ${verbose} ]
then
  echo "Parameter File: " "$param_file"
  echo "Input Folder: " "$input_folder"
  echo "Output Folder: " "$output_folder"
  echo "GIGI Split Chunks Folder: " "$gigi_split_chunks_folder"
336

337 338 339
  echo "Long input: " "${long}"
fi
#Run other scripts, preserving environment variables
340 341 342 343 344
cd "$parent_path"
. ./GIGI_1_setup.sh
. ./GIGI_2_split.sh
. ./GIGI_3_gigi.sh
. ./GIGI_4_merge.sh