-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathPreprocessing.py
More file actions
84 lines (68 loc) · 3.11 KB
/
Preprocessing.py
File metadata and controls
84 lines (68 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#importing Libraries
import math
import pandas as pd
import numpy as np
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import LabelEncoder
pd.set_option('display.max_rows', 900)
pd.set_option('display.max_columns', 15)
pd.set_option('display.width', 8000)
pd.set_option('display.max_colwidth', 8000)
def Preprocessing():
#Data reading from the file :-
loan_Df = pd.read_csv("loan_data.csv")
#Filling Empty cells at Column"Gender"(Once'Male' , Once'Feamle') :-
G_check = 0
for i in loan_Df.index:
if loan_Df.loc[i,"Gender"] != 'Male' and loan_Df.loc[i,"Gender"] != 'Female':
if G_check % 2 == 0:
loan_Df.loc[i,"Gender"] = 'Male'
G_check = G_check + 1
else :
loan_Df.loc[i,"Gender"] = 'Female'
G_check = G_check + 1
#Filling Empty cells at Column "Married" with 'No' :-
loan_Df["Married"].fillna('No',inplace = True)
#Replacing Wrong Data at Column "Dependents" :-
for i in loan_Df.index:
if loan_Df.loc[i,"Dependents"] == '3+':
loan_Df.loc[i,"Dependents"] = 3
#Filling Empty cells at Column"Dependents" with 0 :-
loan_Df["Dependents"].fillna(0,inplace = True)
#Filling Empty cells at Column"Self_Employed"(Once'Yes' , Once'No') :-
SE_check = 0
for i in loan_Df.index:
if loan_Df.loc[i,"Self_Employed"] != 'Yes' and loan_Df.loc[i,"Self_Employed"] != 'No':
if SE_check % 2 == 0:
loan_Df.loc[i, "Self_Employed"] = 'Yes'
SE_check = SE_check + 1
else :
loan_Df.loc[i,"Self_Employed"] = 'No'
SE_check = SE_check + 1
#Filling Empty cells at Column"LoanAmount" with mean value :-
loan_Df["LoanAmount"].fillna(math.ceil(loan_Df["LoanAmount"].mean()),inplace = True)
#Filling Empty cells at Column"Loan_Amount_Term" with mean value :-
loan_Df["Loan_Amount_Term"].fillna(330,inplace = True)#330 is Average Term !
#Filling Empty cells at Column"Credit_History"(Once 1 , Once 0) :-
CH_check = 0
for i in loan_Df.index:
if loan_Df.loc[i,"Credit_History"] != 1 and loan_Df.loc[i,"Credit_History"] != 0:
if CH_check % 2 == 0:
loan_Df.loc[i,"Credit_History"] = 1
CH_check = CH_check + 1
else :
loan_Df.loc[i,"Credit_History"] = 0
CH_check = CH_check + 1
#Encoding ("Gender","Married","Education","Self_Employed","Property_Area"):-
X = loan_Df.iloc[:,1:12]
cols = ['Gender','Married','Education','Self_Employed','Property_Area']
for c in cols:
lbl = LabelEncoder()
X[c] = lbl.fit_transform(list(X[c].values))
loan_Df.iloc[:,1:12] = X
#Data Sclaing :-
data_for_scale = np.array(loan_Df.iloc[:,6:10])
Scaler = MaxAbsScaler()
data_for_scale = Scaler.fit_transform(data_for_scale)
loan_Df.iloc[:,6:10] = data_for_scale
return loan_Df