diff --git a/README b/README
index 69e5f060891e6785d68e04a582e3c5323f3d3c2b..1d7c8e5249b8a945276308d1184fe480f36b2129 100644
--- a/README
+++ b/README
@@ -1,3 +1,21 @@
-1.) Make sure permissions are correct (aka user running the script has access to $project_folder directory)
-2.) Make a $project_folder and place the input fastq.gz files in it, or symlinks to them
-3.) Run ./Run_Pipeline
+1.) Make sure Oracle Java 1.7 is installed, directions here for Ubuntu 14.04 http://askubuntu.com/questions/521145/how-to-install-oracle-java-on-ubuntu-14-04 
+    This is needed for Mutect (Stage 11)
+2.) Make sure bwa and samtools are installed and in your path (or change the location to where they are in the Stage 1 file, on Ubuntu 14.04 they may be 
+    installed with apt-get:
+    sudo apt-get install bwa
+    sudo apt-get install samtools
+3.) Install the multitude of tools referred to in the Stage 1 file and replace those paths with the paths to those tools and files for things like
+    reference genomes etc...
+4.) Make sure permissions are correct (aka user running the script has access to $project_folder directory)
+5.) Make a $project_folder and place the input fastq.gz files in it, or symlinks to them
+6.) Run ./Run_Pipeline
+
+Installing VEP:
+  
+Full API Installation: http://asia.ensembl.org/info/docs/api/api_installation.html
+Some additional Perl modules may need to be installed for Bioperl, install anything it complains about missing using CPAN
+If you use screen, note that the Perl environment variables set as described to not persist into the screen session without a little more configuration.  
+Create a file in your home directory called "screenrc" with the conetents "shell -$SHELL" without the quotes.  Now screen will load your shell environment 
+when it starts.  
+In our case we don't want to have to read out online every time we run VEP, so we need to download some database/cache files as described here: 
+http://asia.ensembl.org/info/docs/tools/vep/script/vep_cache.html#cache
diff --git a/R_Code_to_generate_Gene_List.R b/R_Code_to_generate_Gene_List.R
new file mode 100755
index 0000000000000000000000000000000000000000..1ea504f6d28665d7516743e281182a9902562f5c
--- /dev/null
+++ b/R_Code_to_generate_Gene_List.R
@@ -0,0 +1,35 @@
+#GOAL: Get a gene list for use when calculating DepthOfCoverage from GATK
+#The gene list is described here: http://gatkforums.broadinstitute.org/gatk/discussion/40/performing-sequence-coverage-analysis
+#They describe how you can get a genelist here: http://gatkforums.broadinstitute.org/gatk/discussion/1329/where-can-i-get-a-gene-list-in-refseq-format
+#When using the gene list particularly for the DepthOfCoverage it does not need the special format described there (-[arg]:REFSEQ /path/to/refSeq), use it as shown on the 1st page linked
+#Make sure to leave the format as "all fields from selected table"
+#Unfortunately the processing isn't done, they say: To run with the GATK, contigs other than the standard 1-22,X,Y,MT must be removed, and the file sorted in karyotypic order.
+#That is what this R script does.  GATK used to provide a Perl script for this but it has not been maintained and is in a broken state.  
+refseq<- read.delim("/media/Storage/Work/Michelle/Exome_Pipeline/FullTableDumpRefSeq.refSeq",sep="\t",as.is=TRUE,header = FALSE)
+dim(refseq)
+head(refseq)
+refseq[,3] <- factor(refseq[,3], levels = c("chrM", "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY"))
+refseq<- refseq[order(refseq[,3],as.numeric(refseq[,5])),]
+unique(refseq[,3])
+paste0("chr",c(1:22,"X","Y","M"))
+refseq_f <- refseq[refseq[,3] %in% paste0("chr",c(1:22,"X","Y","M")),]
+unique(refseq_f[,3])
+#This file inludes genes that have no coding region (cdsStart == cdsEnd) and gives the warning below
+write.table(refseq_f,file="/media/Storage/Work/Michelle/Exome_Pipeline/RefSeq_hg19_Proper_Format.GeneList",sep="\t",quote=FALSE,col.names = FALSE,row.names=FALSE)
+
+#These are the genes that cause the warning: 
+#WARN  09:02:59,808 Utils - ******************************************************************************** 
+#WARN  09:02:59,809 Utils - * WARNING: 
+#WARN  09:02:59,810 Utils - * 
+#WARN  09:02:59,810 Utils - * RefSeq file contains transcripts with zero coding length. Such 
+#WARN  09:02:59,810 Utils - * transcripts will be ignored (this warning is printed only once) 
+#WARN  09:02:59,811 Utils - ******************************************************************************** 
+#They appear to be non-coding genes?
+#https://www.biostars.org/p/132742/
+#https://github.com/broadgsa/gatk/blob/master/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/codecs/refseq/RefSeqCodec.java
+#equalTest <- (abs(as.numeric(refseq_f[,7])-as.numeric(refseq_f[,8])) < 0.001)
+equalTest <- (abs(as.numeric(refseq_f[,7])-as.numeric(refseq_f[,8])) < 0.001)
+write.table(refseq_f[which(equalTest == TRUE),], file = "/media/Storage/Work/Michelle/Exome_Pipeline/RefSeq_hg19_transcripts_with_zero_coding_length.GeneList",sep="\t",quote=FALSE,col.names = FALSE,row.names=FALSE)
+
+#This file excludes the non-coding genes, since we are looking at exomes we use this
+write.table(refseq_f[which(equalTest == FALSE),], file = "/media/Storage/Work/Michelle/Exome_Pipeline/RefSeq_hg19_Proper_Format_Filtered.GeneList",sep="\t",quote=FALSE,col.names = FALSE,row.names=FALSE)