-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmerchant_map.py
More file actions
103 lines (51 loc) · 3.06 KB
/
Copy pathmerchant_map.py
File metadata and controls
103 lines (51 loc) · 3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import distance
def fnSplitString( strTransaction ) :
dictTransSplit = strTransaction.split( "\t" )
# print( "Pre strTrx: ", dictTransSplit )
strTrx = str( dictTransSplit[ 2 ] )
listTrxSplit = strTrx.split( "|" )
return listTrxSplit
def fnCompareMerchant( strCleanMerchantVal, strDirtyMerchantVal, IsTesting ) :
print( "Merchant to Check:", strDirtyMerchantVal )
if IsTesting :
dictLetters = { "S":2, "T":1, "A":1, "R":1, "B":1, "U":1, "C":1, "K":1 }
for Letter in dictLetters :
print( "Checking letter: ", str( Letter ).upper() )
intLetterCount = strDirtyMerchantVal.count( str( Letter ).upper() )
if intLetterCount == dictLetters[ str( Letter ).upper() ] :
# print( "Merchant from File:", strDirtyMerchantVal )
print( str( Letter ), "is a match." )
elif intLetterCount >= dictLetters[ str( Letter ).upper() ] :
# print( "Merchant from File:", strDirtyMerchantVal )
print( str( Letter ), "is a possible match." )
else :
# print( "Prod code not ready yet to compare ", strCleanMerchantVal, " and ", strDirtyMerchantVal )
intDiff = distance.hamming( strDirtyMerchantVal, strCleanMerchantVal )
# fnCalculateDiff( strDirtyMerchantVal, intDiff, n )
print( strDirtyMerchantVal, " and ", strCleanMerchantVal, " have a hamming distance of: ", str( intDiff ) )
# main
strFile = input( "Specify the file to be processed: " )
strMerchantList = input( "Specify Merchant List File: " )
boolTesting = input( "Are we testing? 1 for Yes, 0 for No: ")
with open( strFile, "r" ) as oFiMerchantFile :
for fmLine in oFiMerchantFile :
listTrans = fnSplitString( ( fmLine.strip( "\r\n" ) ) )
# print( listTrans )
# print( "Vendor: ", listTrans[ 0 ] )
strOrigMerchantValue = str( listTrans[ 0 ] )
# strip out any whitespaces to create compressed string
strDirtyMerchant = strOrigMerchantValue.replace( " ", "" )
# print( "Stripped Vendor: ", strCompMerchantValue )
with open( strMerchantList, "r" ) as oCleanMerchantFile :
for mlLine in oCleanMerchantFile :
strRawMerchant = str( mlLine )
strStripMerchant = strRawMerchant.strip("\r\n")
strCleanMerchant = ( strStripMerchant.replace( " ", "" ).upper() )
# commenting out hamming distance due to requirement that strings be same length
if len( strDirtyMerchant ) == len( strCleanMerchant ) :
fnCompareMerchant( strCleanMerchant, strDirtyMerchant, boolTesting )
# calculate jaccard distance for comparison
d = distance.jaccard( strOrigMerchantValue, strStripMerchant )
print( "Jaccard distance for ", strOrigMerchantValue, " and ", strStripMerchant, "is: ", str( d ) )
oCleanMerchantFile.close()
# oFiMerchantFile.close()