Just a heads up, we don't have a huge amount of space on this machine, ~750 GB for the git repos. We can include some data in the projects, but really big datasets will need to remain elsewhere. For anyone new to Git, this is a fairly good place to start: http://gitref.org/index.html Documentation for Gitlab is available here: http://doc.gitlab.com/

Commit d6492d02 authored by Khalid Kunji's avatar Khalid Kunji

cleaned up a lot of output, memory managed via cgroups, added -v flag to see verbose outputs

parent 115508e3
#!/bin/bash
#debug_parsing=true
command -v cut >/dev/null 2>&1 || { echo >&2 "cut is required but not installed or not in the path. Aborting."; exit 1; }
command -v pwd >/dev/null 2>&1 || { echo >&2 "pwd is required but not installed or not in the path. Aborting."; exit 1; }
command -v cat >/dev/null 2>&1 || { echo >&2 "cat is required but not installed or not in the path. Aborting."; exit 1; }
command -v test >/dev/null 2>&1 || { echo >&2 "test is required but not installed or not in the path. Aborting."; exit 1; }
command -v uname >/dev/null 2>&1 || { echo >&2 "uname is recommended but not installed or not in the path. Aborting."; exit 1; }
command -v hostname >/dev/null 2>&1 || { echo >&2 "hostname is recommended but not installed or not in the path. Continuing though some cosmetic information may be missing."; }
command -v hostid >/dev/null 2>&1 || { echo >&2 "hostid is recommended but not installed or not in the path. Continuing though some cosmetic information may be missing."; }
command -v uptime >/dev/null 2>&1 || { echo >&2 "uptime is recommended but not installed or not in the path. Continuing though some cosmetic information may be missing."; }
command -v uname >/dev/null 2>&1 || { echo >&2 "uname is recommended but not installed or not in the path. Continuing, but behavior is undetermined"; }
command -v hostname >/dev/null 2>&1 || { echo >&2 "hostname is recommended but not installed or not in the path. Continuing, though some cosmetic information may be missing."; }
command -v hostid >/dev/null 2>&1 || { echo >&2 "hostid is recommended but not installed or not in the path. Continuing, though some cosmetic information may be missing."; }
command -v uptime >/dev/null 2>&1 || { echo >&2 "uptime is recommended but not installed or not in the path. Continuing, though some cosmetic information may be missing."; }
command -v lscpu >/dev/null 2>&1 || { echo >&2 "lscpu is required but not installed or not in the path. Aborting."; exit 1; }
command -v grep >/dev/null 2>&1 || { echo >&2 "grep is required but not installed or not in the path. Aborting."; exit 1; }
command -v tail >/dev/null 2>&1 || { echo >&2 "tail is required but not installed or not in the path. Aborting."; exit 1; }
command -v free >/dev/null 2>&1 || { echo >&2 "free is required but not installed or not in the path. Aborting."; exit 1; }
command -v free >/dev/null 2>&1 || { echo >&2 "free is required but not installed or not in the path. Continuing, though some cosmetic information may be missing."; }
command -v rev >/dev/null 2>&1 || { echo >&2 "rev is required but not installed or not in the path. Aborting."; exit 1; }
command -v seq >/dev/null 2>&1 || { echo >&2 "seq is required but not installed or not in the path. Aborting."; exit 1; }
command -v tr >/dev/null 2>&1 || { echo >&2 "tr is required but not installed or not in the path. Aborting."; exit 1; }
command -v prlimit >/dev/null 2>&1 || { echo >&2 "prlimit is recommended but not installed or not in the path. Continuing, but memory limits will be ignored"; }
command -v sort >/dev/null 2>&1 || { echo >&2 "sort is recommended but not installed or not in the path. Continuing, but behavior is undetermined"; }
command -v cgcreate >/dev/null 2>&1 || { echo >&2 "cgcreate from package cgroup-tools/libcgroups is recommended but not installed or not in the path. Continuing, but memory limits will not be imposed. See the README for more info"; }
echo
echo "base_path: " "$base_path"
echo "parent_path: " "$parent_path"
echo
index=1
echo "Listing args with \"\$@\":"
for arg in "$@"
do
echo "Arg #$index = $arg"
let "index+=1"
done # $@ sees arguments as separate words.
echo "Arg list seen as separate words."
echo
if [ ${debug_parsing} ]
then
echo "base_path: " "$base_path"
echo "parent_path: " "$parent_path"
echo
#Make sure that we are seeing all arguments
index=1
echo "Listing args with \"\$@\":"
for arg in "$@"
do
echo "Arg #$index = $arg"
let "index+=1"
done # $@ sees arguments as separate words.
echo "Arg list seen as separate words."
echo
fi
#DEFAULTS
output_folder="./"
export long="false"
#Argument Parsing
cd "${base_path}"
if [ $# -ge 1 -a -f "${@:1:1}" ]
then
......@@ -50,12 +56,21 @@ shift
if [ "${arg:0:1}" == "-" ]
then
shift
echo "is a flag"
if [ ${debug_parsing} ]
then
echo "is a flag"
fi
flag_set=$(echo $arg | tr -d '[:space:]')
echo "Number of combined flags: " "$((${#arg} - 1))"
if [ ${debug_parsing} ]
then
echo "Number of combined flags: " "$((${#arg} - 1))"
fi
for j in $(seq 1 1 "$((${#arg} - 1))")
do
echo "${arg:${j}:1}"
if [ ${debug_parsing} ]
then
echo "${arg:${j}:1}"
fi
case "${arg:${j}:1}" in
o )
output_folder="${@:1:1}"
......@@ -69,11 +84,11 @@ shift
then
echo "Output folder set successfully: " "${output_folder}"
else
echo "-o flag given with no output folder, aborting."
echo "-o flag given with no output folder, aborting."
exit 1
fi
;;
n )
n )
run_name="${@:1:1}"
shift
if [ "${run_name:0:1}" == "-" ]
......@@ -116,14 +131,20 @@ shift
if [ "${memory}" ]
then
echo "Memory set successfully: " "${memory}" "MB"
export memory
else
echo "-m flag given with no amount of memory, aborting."
exit 1
fi
;;
l )
echo "long is true"
export long="true"
;;
v )
echo "verbose is true"
export verbose="true"
;;
* )
echo "unknown flag: " "${arg:${j}:1}"
exit 1
......@@ -131,7 +152,10 @@ shift
esac
done
else
echo "is an arg"
if [ ${debug_parsing} ]
then
echo "is an arg"
fi
fi
let "index+=1"
done
......@@ -142,7 +166,7 @@ else
fi
echo
#Path Handling
cd "$parent_path"
gigi="./GIGI/GIGI"
gigi_split="./SPLIT/gigisplit"
......@@ -159,11 +183,13 @@ cd "$parent_path"
cd "${gigi_merge%/*}"
export gigi_merge="$(pwd)"/gigimerge
echo "GIGI: " "$gigi"
echo "GIGI SPLIT: " "$gigi_split"
echo "GIGI MERGE: " "$gigi_merge"
echo
if [ ${verbose} ]
then
echo "GIGI: " "$gigi"
echo "GIGI SPLIT: " "$gigi_split"
echo "GIGI MERGE: " "$gigi_merge"
echo
fi
cd "$base_path"
mkdir -p "$output_folder"
......@@ -177,27 +203,18 @@ export input_folder="${param_file%/*}"
export gigi_split_chunks_folder="${output_folder}/${run_name}/split_output"
mkdir -p "$gigi_split_chunks_folder"
echo "Parameter File: " "$param_file"
echo "Input Folder: " "$input_folder"
echo "Output Folder: " "$output_folder"
echo "GIGI Split Chunks Folder: " "$gigi_split_chunks_folder"
echo "Long input: " "${long}"
if [ ${verbose} ]
then
echo "Parameter File: " "$param_file"
echo "Input Folder: " "$input_folder"
echo "Output Folder: " "$output_folder"
echo "GIGI Split Chunks Folder: " "$gigi_split_chunks_folder"
echo "Long input: " "${long}"
fi
#Run other scripts, preserving environment variables
cd "$parent_path"
. ./GIGI_1_setup.sh
. ./GIGI_2_split.sh
. ./GIGI_3_gigi.sh
. ./GIGI_4_merge.sh
######ACTUALLY FIRST ARG SHOULD BE A FILE....######
# to parse from a file argument
# echo "INPUT FROM FILE"
# piped_string=$(cat "$1")
# echo "File String: " "$piped_string"
# arg_1="$(echo "$piped_string" | cut -d" " -f1)"
# arg_2="$(echo "$piped_string" | cut -d" " -f2)"
# arg_3="$(echo "$piped_string" | cut -d" " -f3)"
##################################################
#read -a arg_list
......@@ -2,40 +2,54 @@
### GIGI Config Outputs ###
# application path settings
#echo
#echo "#application path settings#"
if [ ${verbose} ]
then
# application path settings
echo
echo "#application path settings#"
#echo "gigi location: " "${gigi}"
#echo "gigi split location: " "${gigi_split}"
#echo
echo "gigi location: " "${gigi}"
echo "gigi split location: " "${gigi_split}"
echo
# data path settings
#echo "#data path settings#"
echo
echo "#data path settings#"
echo
fi
export gigi_split_prefix="${gigi_split_chunks_folder}"/chunk
echo "gigi_split_prefix: " "${gigi_split_prefix}"
echo
# INFO
echo
echo "#Local Resources#"
echo "System Info: " $(uname)
echo "Hostname: " $(hostname)
echo "Host ID: " $(hostid)
echo "Uptime: " $(uptime)
if [ ${verbose} ]
then
echo "gigi_split_prefix: " "${gigi_split_prefix}"
echo
# INFO
echo
echo "#Local Resources#"
echo "System Info: " $(uname)
echo "Hostname: " $(hostname)
echo "Host ID: " $(hostid)
echo "Uptime: " $(uptime)
fi
cores=$(lscpu | grep "Core(s)" | tail -c 2)
sockets=$(lscpu | grep "Socket(s)" | tail -c 2)
cores_total=$(($cores*$sockets))
threads_per_core=$(lscpu | grep Thread | tail -c 2)
threads_total=$(($cores_total*$threads_per_core))
echo "Number of Physical cores: " ${cores_total}
echo "Number of Threads: " ${threads_total}
#export num_chunks=$(($cores_total - 1))
if [ ${verbose} ]
then
echo "Number of Physical cores: " ${cores_total}
echo "Number of Threads: " ${threads_total}
fi
export num_chunks=$(($threads_total - $threads_per_core))
#Compare with num_threads
if [ "${num_threads}" ]
then
#Do Nothing
echo
else
num_threads="$num_chunks"
fi
#Compare with num_threads
if [ "$num_threads" -lt "$num_chunks" ]
then
export num_chunks="$num_threads"
......@@ -45,12 +59,56 @@ then
export num_chunks=1
echo "Trouble determining number of threads correctly, using 1 as a failsafe."
fi
echo "Number of chunks to split into: " ${num_chunks}
echo
if [ ${verbose} ]
then
echo "Number of chunks to split into: " ${num_chunks}
echo
fi
#Compare with memory
# Memory
free
if [ ${verbose} ]
then
free
fi
free_mem=$(free | grep "buffers/cache" | rev | cut -d" " -f1 | rev)
#Check that kernel supports cgroups
if [ ${verbose} ]
then
echo "Kernel: " $(uname -r)
echo $(uname -r) > kern_check
echo "2.6.24" >> kern_check
fi
kern=$(sort -V kern_check | head -1)
if [ ${verbose} ]
then
echo "Older kernel: " "$kern"
fi
if [ "$kern" != "2.6.24" ]
then
echo "Your kernel is too old to support cgroups, if you do not update your kernel to a version >=2.6.24 then memory limits will not have any effect."
fi
echo
echo
#Handle memory
if [ "${memory}" ]
then
#Create user cgroup for memory limits, and/or cpu share limits
#Here we depend on cgroup-tools
cgcreate -g memory,cpu:user_cgroup/gigi
cgstatus=$?
echo "${cgstatus}"
if [ "${cgstatus}" -ne 0 ]
then
echo "memory limits not enforced, could not create cgroup, error: ${cgstatus}"
else
#Limit to ~2 MB
echo 2000000 > /sys/fs/cgroup/memory/user_cgroup/gigi/memory.limit_in_bytes
#Limit to 200 KB
#echo 200000 > /sys/fs/cgroup/memory/test/test_limits/memory.limit_in_bytes
#Limit to ~10% of CPU (Actually 100/1024 'cpu shares')
#echo 100 > /sys/fs/cgroup/cpu/test/test_limits/cpu.shares
fi
fi
#!/bin/bash
export timecmd="$(which time)"
echo "INPUT FOLDER: " "${input_folder}"
if [ ${verbose} ]
then
echo
echo "INPUT FOLDER: " "${input_folder}"
fi
mkdir -p "${output_folder}/${run_name}/LOGS"
echo "Logs in: " "${output_folder}/${run_name}/LOGS"
cd ${input_folder}
find "${gigi_split_prefix%/*}" -name "chunk_0*"
if [[ -n $(find "${gigi_split_prefix%/*}" -name "chunk_0*") ]]
......@@ -9,9 +15,12 @@ then
echo "chunk_0.geno exists, file is most likely already split, if it is not, then remove the existing chunks from ${gigi_split_prefix%/*} and try again"
else
mkdir -p "${output_folder}/${run_name}/STATS"
echo "Long is: " "${long}"
$timecmd -o "${output_folder}/${run_name}/STATS/time${i}.log" -f'memory in kilobytes %M real %e user %U sys %S command %C' "${gigi_split}" "${param_file##*/}" "${num_chunks}" "${gigi_split_prefix}" "${long}"
echo "Split exit status: " "$?"
if [ ${verbose} ]
then
echo "Long is: " "${long}"
fi
$timecmd -o "${output_folder}/${run_name}/STATS/time${i}.log" -f'memory in kilobytes %M real %e user %U sys %S command %C' "${gigi_split}" "${param_file##*/}" "${num_chunks}" "${gigi_split_prefix}" "${long}" > "${output_folder}/${run_name}/LOGS/split.log"
echo "Split exit status: " "$?"
fi
cd "$parent_path"
echo
......
#!/bin/bash
cd "${input_folder}"
#cd "${gigi_split_chunks_folder}"
#i=0
pids=()
for file in "${gigi_split_chunks_folder}"/*.param
do
#echo "$file"
#echo "iteration: " "$i"
filename="${file##*/}"
echo "FILENAME: " "$filename"
echo "FILE: " "$file"
if [ ${verbose} ]
then
echo "FILENAME: " "$filename"
echo "FILE: " "$file"
fi
if [[ $filename =~ [0-9]+ ]] ; then
echo "Starting run on chunk: " "$BASH_REMATCH"
if [ ${verbose} ]
then
echo "Starting run on chunk: " "$BASH_REMATCH"
fi
else
echo "Failed to find chunk id int in filename"
exit 7
......@@ -24,17 +26,20 @@ do
echo "file ${output_folder}/${run_name}/gigi_output/${BASH_REMATCH}/impute.geno already exists."
else
mkdir -p "${output_folder}/${run_name}/gigi_output/${BASH_REMATCH}"
echo $(pwd)
echo "LONG IS: " "${long}"
if [ ${verbose} ]
then
echo $(pwd)
echo "LONG IS: " "${long}"
fi
if [[ "$long" == "true" ]]
then
$timecmd -o "${output_folder}/${run_name}/STATS/time${BASH_REMATCH}.log" -f'memory in kilobytes %M real %e user %U sys %S command %C' "${gigi}" "${file}" -outD="${output_folder}/${run_name}/gigi_output/${BASH_REMATCH}" -long & pids+=("$!")
cgexec -g memory,cpu:test/test_limits $timecmd -o "${output_folder}/${run_name}/STATS/time${BASH_REMATCH}.log" -f'memory in kilobytes %M real %e user %U sys %S command %C' "${gigi}" "${file}" -outD="${output_folder}/${run_name}/gigi_output/${BASH_REMATCH}" -long > "${output_folder}/${run_name}/LOGS/${BASH_REMATCH}.gigi.log" & pids+=("$!")
else
$timecmd -o "${output_folder}/${run_name}/STATS/time${BASH_REMATCH}.log" -f'memory in kilobytes %M real %e user %U sys %S command %C' "${gigi}" "${file}" -outD="${output_folder}/${run_name}/gigi_output/${BASH_REMATCH}" & pids+=("$!")
cgexec -g memory,cpu:test/test_limits $timecmd -o "${output_folder}/${run_name}/STATS/time${BASH_REMATCH}.log" -f'memory in kilobytes %M real %e user %U sys %S command %C' "${gigi}" "${file}" -outD="${output_folder}/${run_name}/gigi_output/${BASH_REMATCH}" > "${output_folder}/${run_name}/LOGS/${BASH_REMATCH}.gigi.log" & pids+=("$!")
fi
echo "$!"
last_pid="$!"
echo "gigi pid: " "$last_pid" " for part: " "${BASH_REMATCH}"
fi
#i=$(($i+1))
done
#Check exit status
......@@ -42,7 +47,7 @@ for pid in ${pids[*]}
do
if wait "${pid}"
then
echo "${pid}" "$?"
echo "gigi exit status for pid: ""${pid}" " is " "$?"
else
echo "FAILED TEST: " "${pid}" "$?"
exit 1
......
......@@ -3,12 +3,15 @@
#GIGIMerge <directory> <partition count>
echo "GIGI Merge Location: " "${gigi_merge}"
echo "Directory: " "${output_folder}/${run_name}/gigi_output"
echo "Num Chunks: " "${num_chunks}"
echo "Long is: " "${long}"
if [ ${verbose} ]
then
echo "GIGI Merge Location: " "${gigi_merge}"
echo "Directory: " "${output_folder}/${run_name}/gigi_output"
echo "Num Chunks: " "${num_chunks}"
echo "Long is: " "${long}"
fi
$timecmd -o "${output_folder}/${run_name}/STATS/time_merge.log" -f'memory in kilobytes %M real %e user %U sys %S command %C' "${gigi_merge}" "${output_folder}/${run_name}/gigi_output" "${num_chunks}" "${long}"
$timecmd -o "${output_folder}/${run_name}/STATS/time_merge.log" -f'memory in kilobytes %M real %e user %U sys %S command %C' "${gigi_merge}" "${output_folder}/${run_name}/gigi_output" "${num_chunks}" "${long}" > "${output_folder}/${run_name}/LOGS/merge.log"
merge_status="$?"
echo "Merge exit status: " "$merge_status"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment