commit
a1d93803d3
@ -0,0 +1,4 @@ |
|||||||
|
*.jpg |
||||||
|
*.png |
||||||
|
*.csv |
||||||
|
/venv |
||||||
@ -0,0 +1,114 @@ |
|||||||
|
from sklearn.svm import LinearSVC |
||||||
|
import numpy as np |
||||||
|
import pandas as pd |
||||||
|
from pandas import DataFrame |
||||||
|
import matplotlib.pyplot as plt |
||||||
|
from pprint import pprint as prt |
||||||
|
from datetime import datetime as dt |
||||||
|
|
||||||
|
|
||||||
|
def eval_approval(sbss: int, yib: float) -> int: |
||||||
|
""" |
||||||
|
Very basic approval algorithm |
||||||
|
Essential there are ceratin YIB breaks for each SBSS range |
||||||
|
This is why we see a downward 'step' deliminator between approvals |
||||||
|
This is also why our basic 'LINEAR' model struggles around the pivot points |
||||||
|
""" |
||||||
|
if sbss < 140: |
||||||
|
return 0 |
||||||
|
if sbss < 180: |
||||||
|
if yib < 10: |
||||||
|
return 0 |
||||||
|
else: return 1 |
||||||
|
elif sbss < 200: |
||||||
|
if yib < 8: |
||||||
|
return 0 |
||||||
|
else: return 1 |
||||||
|
elif sbss < 220: |
||||||
|
if yib < 6: |
||||||
|
return 0 |
||||||
|
else: return 1 |
||||||
|
elif sbss < 240: |
||||||
|
if yib < 5: |
||||||
|
return 0 |
||||||
|
else: return 1 |
||||||
|
elif sbss < 260: |
||||||
|
if yib < 3: |
||||||
|
return 0 |
||||||
|
else: return 1 |
||||||
|
elif sbss < 280: |
||||||
|
if yib < 2: |
||||||
|
return 0 |
||||||
|
else: return 1 |
||||||
|
else: return 1 |
||||||
|
|
||||||
|
|
||||||
|
def generate_sample_data(data_size: int, save_data: bool = False, save_name: str = None) -> DataFrame: |
||||||
|
# Small Business Scoring System |
||||||
|
# using a normal dist with a mean of 200 and a std of 50 |
||||||
|
# This can produce 'invalid' SBSS of > 300 |
||||||
|
sbss = np.random.normal(loc= 200,scale= 50, size= data_size) |
||||||
|
# Years in Buiness |
||||||
|
# equal chances of 0->15 years |
||||||
|
yib = np.random.uniform(low= 0, high= 15, size= data_size) |
||||||
|
# Business Id |
||||||
|
bid = np.array([_ for _ in range(0, data_size)]) |
||||||
|
|
||||||
|
data_set = {"BusinessID": bid, "YearsInBusiness": yib, "SBSS": sbss} |
||||||
|
df = DataFrame(data_set).set_index("BusinessID") |
||||||
|
# SBSS should be whole numbers |
||||||
|
df["SBSS"] = df["SBSS"].astype("int32") |
||||||
|
# Round YIB to 2 decimals (easy to look at) |
||||||
|
df["YearsInBusiness"] = df["YearsInBusiness"].apply(lambda y: round(y,2)) |
||||||
|
# Add an approval column based on the approval function defined earlier |
||||||
|
df["Approved"] = df.apply(lambda row: eval_approval(row["SBSS"], row["YearsInBusiness"]), axis=1) |
||||||
|
if save_data: |
||||||
|
save_name = save_name if save_name != None else f"basic_credit_sample_data_{dt.now().strftime('%Y-%M-%d')}.csv" |
||||||
|
df.to_csv(save_name) |
||||||
|
return df |
||||||
|
|
||||||
|
|
||||||
|
def predict_with_model(model): |
||||||
|
yib = float(input("How many years in business?:\t")) |
||||||
|
if yib == "": return False |
||||||
|
sbss = int(input("What is your SBSS?:\t")) |
||||||
|
if sbss == "": return False |
||||||
|
model_pred = model.predict([[yib,sbss]]) == 1 |
||||||
|
actual = eval_approval(sbss, yib) == 1 |
||||||
|
print(f"The model predicts:\t{model_pred}\nThe rules say:\t{actual}.") |
||||||
|
print(f"The model was {'correct' if model_pred == actual else 'incorrect'}\n") |
||||||
|
return True |
||||||
|
|
||||||
|
|
||||||
|
sample_data = generate_sample_data(100000, True, "default_cd_sample.csv") |
||||||
|
#sample_data = pd.read_csv("default_cd_sample.csv", index_col="BusinessID") |
||||||
|
prt(sample_data) |
||||||
|
print(f"Approval Rate: {round(len(sample_data.query('Approved == 1'))/len(sample_data),4)* 100}%\n") |
||||||
|
|
||||||
|
# |
||||||
|
# Generate a scatter plot |
||||||
|
# |
||||||
|
colors = [] |
||||||
|
for a in sample_data["Approved"].to_list(): |
||||||
|
if a == 0: |
||||||
|
# Approved deals will be green |
||||||
|
colors.append('r') |
||||||
|
else: |
||||||
|
# Declined deals will be red |
||||||
|
colors.append('g') |
||||||
|
plt.scatter(sample_data["YearsInBusiness"], sample_data["SBSS"], c=colors) |
||||||
|
plt.plot() |
||||||
|
plt.xlabel("# of Years in Business") |
||||||
|
plt.ylabel("SBSS") |
||||||
|
plt.title("Credit Decision") |
||||||
|
plt.savefig("cd_scatter1.jpg") |
||||||
|
|
||||||
|
# Now we need to combine our features (YIB & SBSS) into a list of pairs |
||||||
|
combined_data = list(zip(sample_data["YearsInBusiness"].to_list(), sample_data["SBSS"].to_list())) |
||||||
|
# Feed that data into the model |
||||||
|
LSVCClf = LinearSVC(dual = False, random_state = 0, penalty = 'l2',tol = 1e-5, max_iter=1000000) |
||||||
|
model = LSVCClf.fit(combined_data ,sample_data["Approved"]) |
||||||
|
|
||||||
|
# Let people play and see how the model does at prediction |
||||||
|
while True: |
||||||
|
if not predict_with_model(model): break |
||||||
@ -0,0 +1,17 @@ |
|||||||
|
from sklearn.svm import LinearSVC |
||||||
|
from sklearn.datasets import make_classification |
||||||
|
import numpy as np |
||||||
|
from pprint import pprint as prt |
||||||
|
|
||||||
|
x2 = np.random.rand(100) |
||||||
|
|
||||||
|
X, y = make_classification(n_features = 5, random_state = 0) |
||||||
|
|
||||||
|
print(X.shape) |
||||||
|
prt(X) |
||||||
|
print('\n') |
||||||
|
print(len(y)) |
||||||
|
|
||||||
|
|
||||||
|
LSVCClf = LinearSVC(dual = False, random_state = 0, penalty = 'l1',tol = 1e-5) |
||||||
|
LSVCClf.fit(X, y) |
||||||
Loading…
Reference in new issue