88from .utils import check_protospacer_length , trim_protospacer
99
1010
11- def load_cas9_sgRNA_library (library_path , library_type , sep = '\t ' , index_col = 0 , protospacer_length = 19 , verbose = True ):
11+ def load_cas9_sgRNA_library (library_path , library_type , sep = '\t ' , index_col = 0 , protospacer_length = 19 , verbose = True , ** args ):
1212 '''Load Cas9 sgRNA library table for single or dual guide design.
1313 '''
1414 library = pd .read_csv (
1515 library_path ,
1616 sep = sep ,
1717 index_col = index_col ,
18+ ** args
1819 )
1920
2021 ## Evaluate library table and reformat columns for downstream analysis
2122 # I would like to name the target column 'target' if it is named 'gene'!
22- if 'gene' in library .columns :
23- # rename gene column to target
24- library = library .rename (columns = {'gene' : 'target' })
2523
2624 if library_type == "single_guide_design" :
27- eval_columns = ['target' , 'sgID' , 'protospacer' ]
25+ eval_columns = ['target' , 'sgID' , 'protospacer' , 'sequence' ]
26+
27+ # reformating columns as needed
28+ if 'gene' in library .columns :
29+ # rename gene column to target
30+ library = library .rename (columns = {'gene' : 'target' })
31+ if 'sequence' in library .columns and 'protospacer' not in library .columns :
32+ library .rename (columns = {'sequence' : 'protospacer' }, inplace = True )
33+ if 'sgId' in library .columns :
34+ library .rename (columns = {'sgId' : 'sgID' }, inplace = True )
2835
2936 # Upper case protospacer sequences
3037 library ['protospacer' ] = library ['protospacer' ].str .upper ()
3138
39+ protospacer_col = 'protospacer'
40+ in_length = check_protospacer_length (library , 'protospacer' )
41+ if in_length == protospacer_length :
42+ pass
43+ elif in_length > protospacer_length :
44+ if verbose : print (f"Trimming protospacer sequences in '{ protospacer_col } ' column." )
45+ library = trim_protospacer (
46+ library , protospacer_col ,
47+ '5prime' ,
48+ in_length - protospacer_length
49+ )
50+
51+ elif in_length < protospacer_length :
52+ raise ValueError (
53+ f"Input protospacer length for '{ protospacer_col } ' is less than { protospacer_length } "
54+ )
55+
56+ # write `sequence` column as `protospacer` (after trimming)
57+ library ['sequence' ] = library ['protospacer' ]
58+
3259 for col in eval_columns :
3360 if col not in library .columns :
3461 raise ValueError (f"Column '{ col } ' not found in library table." )
@@ -43,6 +70,11 @@ def load_cas9_sgRNA_library(library_path, library_type, sep='\t', index_col=0, p
4370 'sequence'
4471 ]
4572
73+ # reformating columns as needed
74+ if 'gene' in library .columns :
75+ # rename gene column to target
76+ library = library .rename (columns = {'gene' : 'target' })
77+
4678 # Upper case protospacer sequences
4779 library ['protospacer_A' ] = library ['protospacer_A' ].str .upper ()
4880 library ['protospacer_B' ] = library ['protospacer_B' ].str .upper ()
@@ -62,10 +94,10 @@ def load_cas9_sgRNA_library(library_path, library_type, sep='\t', index_col=0, p
6294
6395 elif in_length < protospacer_length :
6496 raise ValueError (
65- f"Input protospacer length for '{ protospacer_col } 'is less than { protospacer_length } "
97+ f"Input protospacer length for '{ protospacer_col } ' is less than { protospacer_length } "
6698 )
6799
68- # if ' sequence' not in library.columns:
100+ # write ` sequence` column as `protospacer_A;protospacer_B` (after trimming)
69101 library ['sequence' ] = library ['protospacer_A' ] + ';' + library ['protospacer_B' ]
70102
71103 for col in eval_columns :
@@ -74,6 +106,9 @@ def load_cas9_sgRNA_library(library_path, library_type, sep='\t', index_col=0, p
74106
75107 library = library [eval_columns ]
76108
109+ else :
110+ raise ValueError (f"Invalid library type: { library_type } . Please choose 'single_guide_design' or 'dual_guide_design'." )
111+
77112 if verbose : print ("Library table successfully loaded." )
78113
79114 return library
0 commit comments