commit
a1d93803d3
@ -0,0 +1,4 @@ |
||||
*.jpg |
||||
*.png |
||||
*.csv |
||||
/venv |
||||
@ -0,0 +1,114 @@ |
||||
from sklearn.svm import LinearSVC |
||||
import numpy as np |
||||
import pandas as pd |
||||
from pandas import DataFrame |
||||
import matplotlib.pyplot as plt |
||||
from pprint import pprint as prt |
||||
from datetime import datetime as dt |
||||
|
||||
|
||||
def eval_approval(sbss: int, yib: float) -> int: |
||||
""" |
||||
Very basic approval algorithm |
||||
Essential there are ceratin YIB breaks for each SBSS range |
||||
This is why we see a downward 'step' deliminator between approvals |
||||
This is also why our basic 'LINEAR' model struggles around the pivot points |
||||
""" |
||||
if sbss < 140: |
||||
return 0 |
||||
if sbss < 180: |
||||
if yib < 10: |
||||
return 0 |
||||
else: return 1 |
||||
elif sbss < 200: |
||||
if yib < 8: |
||||
return 0 |
||||
else: return 1 |
||||
elif sbss < 220: |
||||
if yib < 6: |
||||
return 0 |
||||
else: return 1 |
||||
elif sbss < 240: |
||||
if yib < 5: |
||||
return 0 |
||||
else: return 1 |
||||
elif sbss < 260: |
||||
if yib < 3: |
||||
return 0 |
||||
else: return 1 |
||||
elif sbss < 280: |
||||
if yib < 2: |
||||
return 0 |
||||
else: return 1 |
||||
else: return 1 |
||||
|
||||
|
||||
def generate_sample_data(data_size: int, save_data: bool = False, save_name: str = None) -> DataFrame: |
||||
# Small Business Scoring System |
||||
# using a normal dist with a mean of 200 and a std of 50 |
||||
# This can produce 'invalid' SBSS of > 300 |
||||
sbss = np.random.normal(loc= 200,scale= 50, size= data_size) |
||||
# Years in Buiness |
||||
# equal chances of 0->15 years |
||||
yib = np.random.uniform(low= 0, high= 15, size= data_size) |
||||
# Business Id |
||||
bid = np.array([_ for _ in range(0, data_size)]) |
||||
|
||||
data_set = {"BusinessID": bid, "YearsInBusiness": yib, "SBSS": sbss} |
||||
df = DataFrame(data_set).set_index("BusinessID") |
||||
# SBSS should be whole numbers |
||||
df["SBSS"] = df["SBSS"].astype("int32") |
||||
# Round YIB to 2 decimals (easy to look at) |
||||
df["YearsInBusiness"] = df["YearsInBusiness"].apply(lambda y: round(y,2)) |
||||
# Add an approval column based on the approval function defined earlier |
||||
df["Approved"] = df.apply(lambda row: eval_approval(row["SBSS"], row["YearsInBusiness"]), axis=1) |
||||
if save_data: |
||||
save_name = save_name if save_name != None else f"basic_credit_sample_data_{dt.now().strftime('%Y-%M-%d')}.csv" |
||||
df.to_csv(save_name) |
||||
return df |
||||
|
||||
|
||||
def predict_with_model(model): |
||||
yib = float(input("How many years in business?:\t")) |
||||
if yib == "": return False |
||||
sbss = int(input("What is your SBSS?:\t")) |
||||
if sbss == "": return False |
||||
model_pred = model.predict([[yib,sbss]]) == 1 |
||||
actual = eval_approval(sbss, yib) == 1 |
||||
print(f"The model predicts:\t{model_pred}\nThe rules say:\t{actual}.") |
||||
print(f"The model was {'correct' if model_pred == actual else 'incorrect'}\n") |
||||
return True |
||||
|
||||
|
||||
sample_data = generate_sample_data(100000, True, "default_cd_sample.csv") |
||||
#sample_data = pd.read_csv("default_cd_sample.csv", index_col="BusinessID") |
||||
prt(sample_data) |
||||
print(f"Approval Rate: {round(len(sample_data.query('Approved == 1'))/len(sample_data),4)* 100}%\n") |
||||
|
||||
# |
||||
# Generate a scatter plot |
||||
# |
||||
colors = [] |
||||
for a in sample_data["Approved"].to_list(): |
||||
if a == 0: |
||||
# Approved deals will be green |
||||
colors.append('r') |
||||
else: |
||||
# Declined deals will be red |
||||
colors.append('g') |
||||
plt.scatter(sample_data["YearsInBusiness"], sample_data["SBSS"], c=colors) |
||||
plt.plot() |
||||
plt.xlabel("# of Years in Business") |
||||
plt.ylabel("SBSS") |
||||
plt.title("Credit Decision") |
||||
plt.savefig("cd_scatter1.jpg") |
||||
|
||||
# Now we need to combine our features (YIB & SBSS) into a list of pairs |
||||
combined_data = list(zip(sample_data["YearsInBusiness"].to_list(), sample_data["SBSS"].to_list())) |
||||
# Feed that data into the model |
||||
LSVCClf = LinearSVC(dual = False, random_state = 0, penalty = 'l2',tol = 1e-5, max_iter=1000000) |
||||
model = LSVCClf.fit(combined_data ,sample_data["Approved"]) |
||||
|
||||
# Let people play and see how the model does at prediction |
||||
while True: |
||||
if not predict_with_model(model): break |
||||
@ -0,0 +1,17 @@ |
||||
from sklearn.svm import LinearSVC |
||||
from sklearn.datasets import make_classification |
||||
import numpy as np |
||||
from pprint import pprint as prt |
||||
|
||||
x2 = np.random.rand(100) |
||||
|
||||
X, y = make_classification(n_features = 5, random_state = 0) |
||||
|
||||
print(X.shape) |
||||
prt(X) |
||||
print('\n') |
||||
print(len(y)) |
||||
|
||||
|
||||
LSVCClf = LinearSVC(dual = False, random_state = 0, penalty = 'l1',tol = 1e-5) |
||||
LSVCClf.fit(X, y) |
||||
Loading…
Reference in new issue