|
9 | 9 |
|
10 | 10 | # TC Classes |
11 | 11 | from transcript import Transcript |
| 12 | +from transcript import check_seq_and_cigar_length |
12 | 13 | from spliceJunction import * |
13 | 14 | from intronBound import IntronBound |
14 | 15 | from optparse import OptionParser |
@@ -120,12 +121,13 @@ def getOptions(): |
120 | 121 | parser.add_option("--primaryOnly", dest ="primaryOnly", action='store_true', |
121 | 122 | help = "If this option is set, TranscriptClean will only \ |
122 | 123 | output primary mappings of transcripts (ie it will filter \ |
123 | | - out unmapped and multimapped lines from the SAM input.") |
| 124 | + out unmapped and multimapped lines from the SAM input.", |
| 125 | + default = False) |
124 | 126 | parser.add_option("--canonOnly", dest ="canonOnly", action='store_true', |
125 | 127 | help = ("If this option is set, TranscriptClean will " |
126 | 128 | "output only canonical transcripts and transcripts " |
127 | 129 | "containing annotated noncanonical junctions to the " |
128 | | - "clean SAM file at the end of the run.")) |
| 130 | + "clean SAM file at the end of the run."), default = False) |
129 | 131 | parser.add_option("--tmpDir", dest ="tmp_path", |
130 | 132 | help = ("If you would like the tmp files to be written " |
131 | 133 | "somewhere different than the final output, " |
@@ -411,7 +413,7 @@ def correct_transcript(transcript_line, options, refs): |
411 | 413 | options.maxLenIndel, upd_logInfo) |
412 | 414 | if ins_TE != "": |
413 | 415 | TE_entries += ins_TE |
414 | | - |
| 416 | + |
415 | 417 | # Deletion correction |
416 | 418 | del_TE = correctDeletions(upd_transcript, refs.genome, refs.deletions, |
417 | 419 | options.maxLenIndel, upd_logInfo) |
@@ -1212,6 +1214,7 @@ def update_post_ncsj_correction(transcript, splice_jn_num, genome, sjAnnot): |
1212 | 1214 | transcript.jM, transcript.jI = transcript.get_jM_jI_tags_from_sjs() |
1213 | 1215 | transcript.isCanonical = transcript.recheckCanonical() |
1214 | 1216 | transcript.allJnsAnnotated = transcript.recheckJnsAnnotated() |
| 1217 | + |
1215 | 1218 | return |
1216 | 1219 |
|
1217 | 1220 | def attempt_jn_correction(transcript, splice_jn_num, genome, ref_donors, |
@@ -1247,17 +1250,19 @@ def attempt_jn_correction(transcript, splice_jn_num, genome, ref_donors, |
1247 | 1250 | transcript.POS, splice_jn_num, |
1248 | 1251 | donor, ref_donor.dist, genome, |
1249 | 1252 | transcript.SEQ, transcript.CIGAR) |
1250 | | - |
| 1253 | + |
1251 | 1254 | # Attempt to fix the splice acceptor side |
1252 | 1255 | acceptor = junction.get_splice_acceptor() |
1253 | 1256 | transcript.SEQ, transcript.CIGAR = fix_one_side_of_junction(transcript.CHROM, |
1254 | 1257 | transcript.POS, splice_jn_num, |
1255 | 1258 | acceptor, ref_acceptor.dist, genome, |
1256 | 1259 | transcript.SEQ, transcript.CIGAR) |
| 1260 | + |
1257 | 1261 | # Now, perform updates: |
1258 | 1262 | update_post_ncsj_correction(transcript, splice_jn_num, genome, sjAnnot) |
1259 | 1263 |
|
1260 | | - except: |
| 1264 | + except Exception as e: |
| 1265 | + print(e) |
1261 | 1266 | return False, "Other", combined_dist |
1262 | 1267 |
|
1263 | 1268 | return True, "NA", combined_dist |
@@ -1384,6 +1389,9 @@ def fix_one_side_of_junction(chrom, transcript_start, jn_number, intronBound, d, |
1384 | 1389 | newCIGAR = newCIGAR + exonCIGARs[i] + str(intronCIGARs[i]) + "N" |
1385 | 1390 | newCIGAR = newCIGAR + exonCIGARs[-1] |
1386 | 1391 |
|
| 1392 | + if not check_seq_and_cigar_length(newSeq, newCIGAR): |
| 1393 | + raise RuntimeError("CIGAR string and sequence are not the same length") |
| 1394 | + |
1387 | 1395 | return newSeq, newCIGAR |
1388 | 1396 |
|
1389 | 1397 |
|
@@ -1580,6 +1588,7 @@ def dryRun(sam, options, outfiles): |
1580 | 1588 | write_to_transcript_log(logInfo, tL) |
1581 | 1589 | return |
1582 | 1590 |
|
| 1591 | + |
1583 | 1592 | if __name__ == '__main__': |
1584 | 1593 | #pr = cProfile.Profile() |
1585 | 1594 | #pr.enable() |
|
0 commit comments