|
|
| (2 intermediate revisions by the same user not shown) |
| Line 1: |
Line 1: |
| = Structural variant calling =
| | For sequences from UMICH sequencing core. Example Makefile for converting fastq's for 1 sample to a single recalibrated deduped bam file: |
| Python script for scanning bam files to extract reads.
| | /net/twins/svrieze/vrieze-pipeline/demux/Pool_18158/intermediateFiles/S001/Makefile |
| <syntaxhighlight lang="python">
| |
| ## Short, hard-coded script to find the file that has the read mate
| |
| ## error as identified during the discovery process in genome
| |
| ## strip. All information is contained in this file, including the
| |
| ## original bam that gave the error. I don't replace this bam, as it
| |
| ## worked fine for variant calling with gotcloud, I simply generate a
| |
| ## new bam with fixed mate info and revise my bam index file for
| |
| ## genomestrip
| |
|
| |
|
| ## Scott Vrieze 5-23-2014
| | For first set of 300ish sequences from HudsonAlpha core. Example Makefile for converting bam to fastq back to bam. |
| | /net/twins/svrieze/BRIDGES-samples-bams/bwa-mem-conversion/1497-RMM-1540.makefile |
|
| |
|
| import os, subprocess, glob
| | For second set of 180ish sequences from HudsonAlpha core. Example Makefile for converting bam to fastq back to bam. |
| | | /net/twins/svrieze/BRIDGES-samples-bams/bwa-mem-conversion/1497-RMM-1540.makefile |
| ############################################################################
| |
| ### First time I encountered genomestrip error these reads were identified
| |
| ProblemReads = {'HWI-ST1235:116:C27LRACXX:1:1209:18833:64097' : ["1", "152185614"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1104:15091:29326' : ["1", "149348181"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2114:6552:61741' : ["1", "28877651"],
| |
| 'HWI-ST1235:116:C27LRACXX:3:1116:16138:88445' : ["1", "97346071"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1101:16984:92469' : ["1", "196985444"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2311:20887:98203' : ["1", "86631400"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1212:7489:45010' : ["1", "63708242"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1101:16984:92469' : ["1", "196985444"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1203:8503:27883' : ["1", "109573059"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1214:12166:55285' : ["1", "58743732"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1101:13049:69672' : ["1", "13539601"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1312:12924:75077' : ["1", "28558321"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1210:6203:5899' : ["1", "168025777"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1209:19210:28938' : ["1", "64958078"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1105:14368:54781' : ["1", "147501360"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1305:13399:83424' : ["1", "5624169"],
| |
| 'HWI-ST1235:116:C27LRACXX:2:2315:13218:30081' : ["1", "74321313"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1214:11024:90812' : ["1", "142853175"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1214:4043:49416' : ["1", "89475771"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1210:19619:95557' : ["1", "172420422"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1311:5174:62356' : ["1", "44943950"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1114:11387:66571' : ["1", "144894013"],
| |
| 'HWI-ST1235:116:C27LRACXX:2:2103:10249:62709' : ["1", "101854331"],
| |
| 'HWI-ST1235:116:C27LRACXX:2:2301:18136:47075' : ["1", "191916208"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1207:9056:54326' : ["1", "55095967"],
| |
| 'HWI-ST1235:116:C27LRACXX:3:1102:3157:49915' : ["1", "76255012"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2301:13274:100251' : ["1", "117881944"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2304:10850:92597' : ["1", "7369483"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1209:18833:64097' : ["1", "152185614"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1201:14927:11454' : ["1", "86406908"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1201:2968:35004' : ["1", "68007915"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1210:6203:5899' : ["1", "168025777"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1102:13060:92872' : ["1", "17223428"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1105:14764:19187' : ["1", "2055679"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1104:11370:77429' : ["1", "22300624"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2111:3085:73279' : ["1", "80223029"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1103:19465:87910' : ["1", "1407393"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1203:13585:14610' : ["1", "103816935"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2313:13071:60114' : ["1", "173348328"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2106:8822:12192' : ["1", "119304549"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1305:7095:90839' : ["1", "179607151"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1105:5048:79898' : ["1", "32666418"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2101:13901:77430' : ["1", "199109753"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1110:12962:41359' : ["1", "11101794"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1204:19295:76622' : ["1", "83127593"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1104:19785:96395' : ["1", "91561242"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1102:15252:80735' : ["1", "161413534"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1107:10413:71292' : ["1", "158867402"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1114:19425:76541' : ["1", "25161545"],
| |
| 'HWI-ST1235:116:C27LRACXX:2:1111:12794:88546' : ["1", "34356865"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1215:7064:78709' : ["1", "20932742"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1102:15252:80735' : ["1", "161413534"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2204:15231:88401' : ["1", "201180171"],
| |
| 'HWI-ST1235:116:C27LRACXX:2:1203:7516:85091' : ["1", "177814302"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1206:5915:62183' : ["1", "184820785"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1108:13389:68730' : ["1", "112837688"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:2208:9685:43976' : ["1", "50383920"],
| |
| 'HWI-ST1235:116:C27LRACXX:1:1107:1714:34047' : ["1", "207293348"]}
| |
| | |
| | |
| | |
| problemReads = {'HWI-ST1235:89:C1EACACXX:4:1110:17337:34885' : ["1", "5434828"],
| |
| 'HWI-ST1128:112:D1M6MACXX:5:2310:8276:30225' : ["2", "62686397"],
| |
| 'HWI-ST1235:90:D1JBCACXX:6:2316:13678:73655' : ["1", "243992751"],
| |
| 'HWI-ST1235:90:D1JBCACXX:2:1207:19752:10062' : ["1", "33258942"],
| |
| 'HWI-D00196:27:C24JDACXX:5:2304:7990:99369' : ["1", "43519620"],
| |
| 'HWI-ST1128:112:D1M6MACXX:4:2303:14871:42579' :["1", "103611138"],
| |
| 'HWI-ST1235:99:D1UVYACXX:8:2101:4568:19065' : ["2", "96353717"],
| |
| 'HWI-ST1278:65:C1GMDACXX:2:2308:5263:39362' : ["1", "118404738"]
| |
| }
| |
| | |
| problemReads2 = {'HWI-ST1235:89:C1EACACXX:4:2316:1665:75803' : ['2', '201502302'],
| |
| 'D7DHSVN1:181:D1UFGACXX:1:1201:9709:12620' : ['2', '222133942'],
| |
| 'D98XHXP1:167:D1V31ACXX:4:1306:18790:39307' : ['1', '114132271'],
| |
| 'DN1JJN1:230:C1M0VACXX:4:1312:7101:8405' : ['2', '51845427'],
| |
| 'DGFC08P1:183:C236HACXX:4:1101:15397:89335' : ['2', '237524844'],
| |
| 'HWI-D00196:32:C2488ACXX:6:1115:1408:92777' : ['1', '39511328'],
| |
| 'HWI-ST1278:95:C24K8ACXX:2:1110:18590:51321' : ['1', '105104897'],
| |
| 'HWI-ST1235:100:C1WWAACXX:3:1213:15321:58621': ['3', '37997105'],
| |
| 'HWI-ST1235:89:C1EACACXX:4:2316:1665:75803' : ['2', '201502302'],
| |
| 'HWI-ST1235:89:C1EACACXX:3:2111:4969:23592' : ['2', '189558120'],
| |
| 'HWI-ST1235:99:D1UVYACXX:7:1210:13710:47000' : ['1', '183446624'],
| |
| 'HWI-ST1278:95:C24K8ACXX:2:1110:18590:51321' : ['1', '105104897'],
| |
| 'HWI-ST1278:65:C1GMDACXX:6:2211:14565:62417' : ['3', '46511845'],
| |
| 'HWI-ST1235:112:C246EACXX:6:2109:21177:54211': ['2', '108797670'],
| |
| 'D7DHSVN1:187:D25M0ACXX:7:1307:7383:14983' : ['1', '150668190'],
| |
| 'HWI-ST1278:66:D1JL1ACXX:5:1311:8882:80458' : ['2', '89181987'],
| |
| 'D7DHSVN1:187:D25M0ACXX:1:1315:16706:34327' : ['3', '65652910'],
| |
| 'D98XHXP1:184:C2442ACXX:3:1211:18889:64457' : ['2', '227318023'],
| |
| 'DGFC08P1:176:D208VACXX:2:2109:2661:62728' : ['2', '14577245'],
| |
| 'DGFC08P1:187:C24K9ACXX:8:1115:15740:92483' : ['2', '146589268'],
| |
| 'DGFC08P1:175:C1WWNACXX:1:1308:18078:66229' : ['2', '209041636'],
| |
| 'DGFC08P1:179:C1WG9ACXX:8:2207:16097:100436' : ['3', '95931986'],
| |
| 'HWI-ST1128:112:D1M6MACXX:3:1106:9609:82824' : ['2', '161141810'],
| |
| 'HWI-ST1235:90:D1JBCACXX:4:1316:14511:80355' : ['3', '80001169'],
| |
| 'DGFC08P1:183:C236HACXX:4:1101:15397:89335' : ['2', '237524844'],
| |
| 'HWI-ST1278:79:D1YLUACXX:1:1116:5017:17159' : ['2', '58364318'],
| |
| 'DN1JJN1:247:C1WWBACXX:1:1202:2575:42060' : ['3', '85663102'],
| |
| 'DGFC08P1:188:C23YGACXX:1:2211:1161:77886' : ['2', '181566932'],
| |
| 'HWI-ST1278:79:D1YLUACXX:1:1116:5017:17159' : ['2', '58364318'],
| |
| 'HWI-ST1128:112:D1M6MACXX:3:1106:9609:82824' : ['2', '161141810'],
| |
| 'HWI-ST1235:99:D1UVYACXX:7:1210:13710:47000' : ['1', '183446624'],
| |
| 'DN1JJN1:230:C1M0VACXX:4:1312:7101:8405' : ['2', '51845427'],
| |
| 'DGFC08P1:176:D208VACXX:2:2109:2661:62728' : ['2', '14577245'],
| |
| 'HWI-D00196:24:D25T7ACXX:5:2107:1368:70330' : ['2', '125149933'],
| |
| 'D98XHXP1:173:C1WW7ACXX:6:1308:15891:94317' : ['2', '32489586'],
| |
| 'D98XHXP1:173:C1WW7ACXX:6:1308:15891:94317' : ['2', '32489586'],
| |
| 'HWI-ST1278:65:C1GMDACXX:7:2102:5628:18232' : ['3', '8787234'],
| |
| 'HWI-D00196:24:D25T7ACXX:5:2107:1368:70330' : ['2', '125149933'],
| |
| 'HWI-ST1235:100:C1WWAACXX:3:1213:15321:58621': ['3', '37997105'],
| |
| 'HWI-ST1235:89:C1EACACXX:3:2111:4969:23592' : ['2', '189558120'],
| |
| 'DGFC08P1:175:C1WWNACXX:5:1306:15909:24801' : ['3', '100323704'],
| |
| 'HWI-ST1278:65:C1GMDACXX:6:2314:2041:32221' : ['2', '219887572'],
| |
| 'D7DHSVN1:181:D1UFGACXX:1:1201:9709:12620' : ['2', '222133942'],
| |
| 'HWI-ST1278:65:C1GMDACXX:6:2211:14565:62417' : ['3', '46511845'],
| |
| 'HWI-ST1278:66:D1JL1ACXX:2:2310:19324:87925' : ['3', '41926471'],
| |
| 'HWI-ST1278:66:D1JL1ACXX:6:2214:16514:46688' : ['1', '230920699'],
| |
| 'HWI-ST1278:66:D1JL1ACXX:6:2214:16514:46688' : ['1', '230920699'],
| |
| 'HWI-ST1235:89:C1EACACXX:7:2207:12879:48410' : ['3', '16425304']
| |
| }
| |
| | |
| THese had error "Read pair records have different read groups"
| |
| #HWI-ST1235:116:C27LRACXX:1:2108:5198:92412: UM65301_1:7,UM6532_1:23
| |
| #HWI-ST1235:116:C27LRACXX:4:1311:20639:88390: UM65301_4:7,UM6532_4:23
| |
| | |
| | |
| | |
| ### Find the bams that have the problematic reads
| |
| def findFiles(problemReads):
| |
| problemFiles = []
| |
| for read in problemReads.keys():
| |
| flag = False
| |
| chrom = problemReads[read][0]
| |
| pos = problemReads[read][1]
| |
| f = open('final.bam.index-high-contamination-samples-dropped.index', 'r')
| |
| for File in f:
| |
| fname = File.split()[1]
| |
| cmd = ['/usr/cluster/bin/samtools', 'view', fname, chrom + ":" + pos +"-" + pos]
| |
| rslt = subprocess.check_output(cmd).split('\n')
| |
| for line in rslt:
| |
| if read in line:
| |
| problemFiles.append(fname)
| |
| print fname, line
| |
| flag = True
| |
| if flag:
| |
| break
| |
| f.close()
| |
| return(problemFiles)
| |
| | |
| | |
| problemFiles = findFiles(ProblemReads)
| |
| pf = list(set(problemFiles))
| |
| | |
| wd = os.getcwd() + "/"
| |
| picard = "/net/twins/svrieze/vrieze-pipeline/pipeline-utilities/bin/picard-tools-1.91/FixMateInformation.jar"
| |
| os.system("mkdir BAMS_FixMateInformation")
| |
| shellFile=open('runFixMateInformation.sh', "w")
| |
| for File in pf:
| |
| shellFile.write("nohup mosbatch -E"+wd+" -j1,10,11,12,13 " +
| |
| "java -Xmx4g -Djava.io.tmpdir="+wd+"/tmp -jar " + picard +
| |
| " INPUT=" + File +
| |
| " OUTPUT="+wd+"BAMS_FixMateInformation/"+File.split('/')[-1][:-4]+".FixMateInformation.bam "+
| |
| " VALIDATION_STRINGENCY=SILENT TMP_DIR="+ wd + "/tmp &\n\n")
| |
| | |
| shellFile.write("wait \n\n")
| |
| for File in pf:
| |
| shellFile.write("/usr/cluster/bin/samtools index " + wd + "BAMS_FixMateInformation/" +
| |
| File.split('/')[-1][:-4] + ".FixMateInformation.bam & \n\n")
| |
| | |
| shellFile.close()
| |
| | |
| | |
| ## The following dictionary I just manually compiled from the bams directory
| |
| cmd = ['ls', '-lhrt', 'bams/']
| |
| files = subprocess.check_output(cmd).splitlines()
| |
| pf = list(set(problemFiles))
| |
| y=[i.split('/')[-1].strip('.bam') for i in pf]
| |
| count=1
| |
| for File in files:
| |
| if len(File.split()) < 3:
| |
| continue
| |
| symlink = File.split()[8]
| |
| File = File.split()[-1]
| |
| sampleID = '.'.join(File.split('/')[-1].split('.')[0:2])
| |
| if any([sampleID in i for i in y]):
| |
| if 'FixMateInformation' not in File:
| |
| os.chdir('/net/twins/svrieze/genome-strip/bams')
| |
| os.system('ln -sf ' '/net/twins/svrieze/genome-strip/BAMS_FixMateInformation/' + File.replace('.bam', '.FixMateInformation.bam').split('/')[-1] + " " + os.getcwd() + "/" + symlink)
| |
| os.chdir('/net/twins/svrieze/genome-strip')
| |
| print count
| |
| count += 1
| |
| </syntaxhighlight>
| |