Working basic 2 feature model

3 years ago · a1d93803d3
commit a1d93803d3
3 changed files with 135 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 *.jpg
 *.png
 *.csv
 /venv
--- a/BasicCreditDec.py
+++ b/BasicCreditDec.py
@ -0,0 +1,114 @@
 from sklearn.svm import LinearSVC
 import numpy as np
 import pandas as pd
 from pandas import DataFrame
 import matplotlib.pyplot as plt
 from pprint import pprint as prt
 from datetime import datetime as dt
 def eval_approval(sbss: int, yib: float) -> int:
    """
    Very basic approval algorithm
    Essential there are ceratin YIB breaks for each SBSS range
    This is why we see a downward 'step' deliminator between approvals
    This is also why our basic 'LINEAR' model struggles around the pivot points
    """
    if sbss < 140:
        return 0
    if sbss < 180:
        if yib < 10:
            return 0
        else: return 1
    elif sbss < 200:
        if yib < 8:
            return 0
        else: return 1
    elif sbss < 220:
        if yib < 6:
            return 0
        else: return 1
    elif sbss < 240:
        if yib < 5:
            return 0
        else: return 1
    elif sbss < 260:
        if yib < 3:
            return 0
        else: return 1
    elif sbss < 280:
        if yib < 2:
            return 0
        else: return 1
    else: return 1
 def generate_sample_data(data_size: int, save_data: bool = False, save_name: str = None) -> DataFrame:
    # Small Business Scoring System
    # using a normal dist with a mean of 200 and a std of 50
    # This can produce 'invalid' SBSS of > 300
    sbss = np.random.normal(loc= 200,scale= 50, size= data_size)
    # Years in Buiness
    # equal chances of 0->15 years 
    yib = np.random.uniform(low= 0, high= 15, size= data_size)
    # Business Id
    bid =  np.array([_ for _ in range(0, data_size)])
    data_set  = {"BusinessID": bid, "YearsInBusiness": yib, "SBSS": sbss}
    df = DataFrame(data_set).set_index("BusinessID")
    # SBSS should be whole numbers
    df["SBSS"] = df["SBSS"].astype("int32")
    # Round YIB to 2 decimals (easy to look at)
    df["YearsInBusiness"] = df["YearsInBusiness"].apply(lambda y: round(y,2))
    # Add an approval column based on the approval function defined earlier
    df["Approved"] = df.apply(lambda row: eval_approval(row["SBSS"], row["YearsInBusiness"]), axis=1)
    if save_data:
        save_name = save_name if save_name != None else f"basic_credit_sample_data_{dt.now().strftime('%Y-%M-%d')}.csv"
        df.to_csv(save_name)
    return df
 def predict_with_model(model):
    yib = float(input("How many years in business?:\t"))
    if yib == "": return False
    sbss = int(input("What is your SBSS?:\t"))
    if sbss == "": return False
    model_pred = model.predict([[yib,sbss]]) == 1
    actual = eval_approval(sbss, yib) == 1
    print(f"The model predicts:\t{model_pred}\nThe rules say:\t{actual}.")
    print(f"The model was {'correct' if model_pred == actual else 'incorrect'}\n")
    return True
 sample_data = generate_sample_data(100000, True, "default_cd_sample.csv")
 #sample_data = pd.read_csv("default_cd_sample.csv", index_col="BusinessID")
 prt(sample_data)
 print(f"Approval Rate: {round(len(sample_data.query('Approved == 1'))/len(sample_data),4)* 100}%\n")
 #
 #   Generate a scatter plot
 #
 colors = []
 for a in sample_data["Approved"].to_list():
    if a == 0:
        # Approved deals will be green
        colors.append('r')
    else:
        # Declined deals will be red
        colors.append('g')
 plt.scatter(sample_data["YearsInBusiness"], sample_data["SBSS"], c=colors)
 plt.plot()
 plt.xlabel("# of Years in Business")
 plt.ylabel("SBSS")
 plt.title("Credit Decision")
 plt.savefig("cd_scatter1.jpg")
 # Now we need to combine our features (YIB & SBSS) into a list of pairs
 combined_data = list(zip(sample_data["YearsInBusiness"].to_list(), sample_data["SBSS"].to_list()))
 # Feed that data into the model
 LSVCClf = LinearSVC(dual = False, random_state = 0, penalty = 'l2',tol = 1e-5, max_iter=1000000)
 model = LSVCClf.fit(combined_data ,sample_data["Approved"])
 # Let people play and see how the model does at prediction
 while True:
    if not predict_with_model(model): break
--- a/LinearSVC.py
+++ b/LinearSVC.py
@ -0,0 +1,17 @@
 from sklearn.svm import LinearSVC
 from sklearn.datasets import make_classification
 import numpy as np
 from pprint import pprint as prt
 x2 = np.random.rand(100)
 X, y = make_classification(n_features = 5, random_state = 0)
 print(X.shape)
 prt(X)
 print('\n')
 print(len(y))
 LSVCClf = LinearSVC(dual = False, random_state = 0, penalty = 'l1',tol = 1e-5)
 LSVCClf.fit(X, y)