https://wandb.ai/som/Zest-AI/reports/Lending-Club-Analysis--VmlldzoxNTgyODI4
Because plotly doesn't save it's plots in jupyter notebook they cannot be seen wihtout running notebook. For complete run notebook see the pdf version attached.
The dataset is taken from kaggle.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap
from utils.accept_utils import *
from utils.reject_utils import *
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
class EDA:
def __init__(self, data, conclusion):
"""
Initialize the EDA class.
reading the data and the conclusions for the visualizations
"""
self.data = data
self.conclusion = conclusion
def make_visualizations(self,plot_wandb= False):
"""
Make the visualizations for the data and the conclusions
inputs:
plot_wandb: boolean, if True, plots will be saved to wandb.
if false, plots will be saved to local directory.
* you need to have wandb installed to use this option
"""
if self.conclusion == "accept":
other_plots(self.data,plot_wandb= False)
plot_categotical_features_accept(self.data, plot_wandb)
elif self.conclusion == "reject":
plot_reject_yearwise(self.data, plot_wandb)
plot_loan_title_category(self.data, plot_wandb)
plot_risk_score_category(self.data, plot_wandb)
plot_state_category(self.data, plot_wandb)
plot_employment_length_category(self.data, plot_wandb)
plot_policy_code_category(self.data, plot_wandb)
plot_amount_requested_category(self.data, plot_wandb)
"""Since we don't have a clear understanding for many features, it would be unwise to include them into our analysis without truly understanding a feature. \ Moreover, not all features will be available to us before a person gets loan. \ Some features will only be available after a person gets a loan. We have the following features we will be proceeding with:
"""
accept = pd.read_csv('data/accept_sample.csv', low_memory=False)
reject = pd.read_csv('data/reject_sample.csv', low_memory=False)
acception_visual = EDA(accept, 'accept')
rejection_visual = EDA(reject, 'reject')
acception_visual.make_visualizations(plot_wandb=False)
# print("Now making visualizations for rejected loans")
rejection_visual.make_visualizations(plot_wandb=False)
<Figure size 720x432 with 0 Axes>
<Figure size 720x432 with 0 Axes>
here we can observe that we are < 1 year emp length are getting rejected most
<Figure size 720x360 with 0 Axes>