You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
114 lines
3.9 KiB
114 lines
3.9 KiB
from sklearn.svm import LinearSVC
|
|
import numpy as np
|
|
import pandas as pd
|
|
from pandas import DataFrame
|
|
import matplotlib.pyplot as plt
|
|
from pprint import pprint as prt
|
|
from datetime import datetime as dt
|
|
|
|
|
|
def eval_approval(sbss: int, yib: float) -> int:
|
|
"""
|
|
Very basic approval algorithm
|
|
Essential there are ceratin YIB breaks for each SBSS range
|
|
This is why we see a downward 'step' deliminator between approvals
|
|
This is also why our basic 'LINEAR' model struggles around the pivot points
|
|
"""
|
|
if sbss < 140:
|
|
return 0
|
|
if sbss < 180:
|
|
if yib < 10:
|
|
return 0
|
|
else: return 1
|
|
elif sbss < 200:
|
|
if yib < 8:
|
|
return 0
|
|
else: return 1
|
|
elif sbss < 220:
|
|
if yib < 6:
|
|
return 0
|
|
else: return 1
|
|
elif sbss < 240:
|
|
if yib < 5:
|
|
return 0
|
|
else: return 1
|
|
elif sbss < 260:
|
|
if yib < 3:
|
|
return 0
|
|
else: return 1
|
|
elif sbss < 280:
|
|
if yib < 2:
|
|
return 0
|
|
else: return 1
|
|
else: return 1
|
|
|
|
|
|
def generate_sample_data(data_size: int, save_data: bool = False, save_name: str = None) -> DataFrame:
|
|
# Small Business Scoring System
|
|
# using a normal dist with a mean of 200 and a std of 50
|
|
# This can produce 'invalid' SBSS of > 300
|
|
sbss = np.random.normal(loc= 200,scale= 50, size= data_size)
|
|
# Years in Buiness
|
|
# equal chances of 0->15 years
|
|
yib = np.random.uniform(low= 0, high= 15, size= data_size)
|
|
# Business Id
|
|
bid = np.array([_ for _ in range(0, data_size)])
|
|
|
|
data_set = {"BusinessID": bid, "YearsInBusiness": yib, "SBSS": sbss}
|
|
df = DataFrame(data_set).set_index("BusinessID")
|
|
# SBSS should be whole numbers
|
|
df["SBSS"] = df["SBSS"].astype("int32")
|
|
# Round YIB to 2 decimals (easy to look at)
|
|
df["YearsInBusiness"] = df["YearsInBusiness"].apply(lambda y: round(y,2))
|
|
# Add an approval column based on the approval function defined earlier
|
|
df["Approved"] = df.apply(lambda row: eval_approval(row["SBSS"], row["YearsInBusiness"]), axis=1)
|
|
if save_data:
|
|
save_name = save_name if save_name != None else f"basic_credit_sample_data_{dt.now().strftime('%Y-%M-%d')}.csv"
|
|
df.to_csv(save_name)
|
|
return df
|
|
|
|
|
|
def predict_with_model(model):
|
|
yib = float(input("How many years in business?:\t"))
|
|
if yib == "": return False
|
|
sbss = int(input("What is your SBSS?:\t\t"))
|
|
if sbss == "": return False
|
|
model_pred = model.predict([[yib,sbss]])[0] == 1
|
|
actual = eval_approval(sbss, yib) == 1
|
|
print(f"The model predicts:\t\t{model_pred}\nThe rules say:\t\t\t{actual}.")
|
|
print(f"The model was {'correct' if model_pred == actual else 'incorrect'}.\n")
|
|
return True
|
|
|
|
|
|
sample_data = generate_sample_data(100000, True, "default_cd_sample.csv")
|
|
#sample_data = pd.read_csv("default_cd_sample.csv", index_col="BusinessID")
|
|
prt(sample_data)
|
|
print(f"Approval Rate: {round(len(sample_data.query('Approved == 1'))/len(sample_data),4)* 100}%\n")
|
|
|
|
#
|
|
# Generate a scatter plot
|
|
#
|
|
colors = []
|
|
for a in sample_data["Approved"].to_list():
|
|
if a == 0:
|
|
# Approved deals will be green
|
|
colors.append('r')
|
|
else:
|
|
# Declined deals will be red
|
|
colors.append('g')
|
|
plt.scatter(sample_data["YearsInBusiness"], sample_data["SBSS"], c=colors)
|
|
plt.plot()
|
|
plt.xlabel("# of Years in Business")
|
|
plt.ylabel("SBSS")
|
|
plt.title("Credit Decision")
|
|
plt.savefig("cd_scatter1.jpg")
|
|
|
|
# Now we need to combine our features (YIB & SBSS) into a list of pairs
|
|
combined_data = list(zip(sample_data["YearsInBusiness"].to_list(), sample_data["SBSS"].to_list()))
|
|
# Feed that data into the model
|
|
LSVCClf = LinearSVC(dual = False, random_state = 0, penalty = 'l2',tol = 1e-5, max_iter=1000000)
|
|
model = LSVCClf.fit(combined_data ,sample_data["Approved"])
|
|
|
|
# Let people play and see how the model does at prediction
|
|
while True:
|
|
if not predict_with_model(model): break |