{ "cells": [ { "cell_type": "markdown", "execution_count": null, "metadata": { "papermill": { "duration": 0.021928, "end_time": "2020-09-08T03:15:55.361283", "exception": false, "start_time": "2020-09-08T03:15:55.339355", "status": "completed" }, "tags": [] }, "source": [ "# Predicting Loan Repayment\n", "\n", "\n", "The dataset for this project is retrieved from kaggle, the home of Data Science.\n", "\n", "The major aim of this project is to predict whether the customers will have their loan paid or not. Therefore, this is a supervised classification problem to be trained." ] }, { "cell_type": "markdown", "execution_count": null, "metadata": { "papermill": { "duration": 0.02001, "end_time": "2020-09-08T03:15:55.401984", "exception": false, "start_time": "2020-09-08T03:15:55.381974", "status": "completed" }, "tags": [] }, "source": [ "### **1- Importing Libraries**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2020-09-08T03:15:55.456123Z", "iopub.status.busy": "2020-09-08T03:15:55.455295Z", "iopub.status.idle": "2020-09-08T03:15:58.502367Z", "shell.execute_reply": "2020-09-08T03:15:58.501676Z" }, "papermill": { "duration": 3.080197, "end_time": "2020-09-08T03:15:58.502515", "exception": false, "start_time": "2020-09-08T03:15:55.422318", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "%matplotlib inline\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import confusion_matrix, classification_report,accuracy_score\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.model_selection import GridSearchCV\n", "import plotly.express as px" ] }, { "cell_type": "markdown", "execution_count": null, "metadata": { "papermill": { "duration": 0.0203, "end_time": "2020-09-08T03:15:58.543518", "exception": false, "start_time": "2020-09-08T03:15:58.523218", "status": "completed" }, "tags": [] }, "source": [ "### **2- Getting Data**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2020-09-08T03:15:58.596255Z", "iopub.status.busy": "2020-09-08T03:15:58.594912Z", "iopub.status.idle": "2020-09-08T03:15:58.615955Z", "shell.execute_reply": "2020-09-08T03:15:58.616654Z" }, "papermill": { "duration": 0.052871, "end_time": "2020-09-08T03:15:58.616870", "exception": false, "start_time": "2020-09-08T03:15:58.563999", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df=pd.read_csv('../input/loan-prediction-problem-dataset/train_u6lujuX_CVtuZ9i.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2020-09-08T03:15:58.702993Z", "iopub.status.busy": "2020-09-08T03:15:58.701710Z", "iopub.status.idle": "2020-09-08T03:15:58.726499Z", "shell.execute_reply": "2020-09-08T03:15:58.727470Z" }, "papermill": { "duration": 0.08401, "end_time": "2020-09-08T03:15:58.727718", "exception": false, "start_time": "2020-09-08T03:15:58.643708", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | Loan_ID | \n", "Gender | \n", "Married | \n", "Dependents | \n", "Education | \n", "Self_Employed | \n", "ApplicantIncome | \n", "CoapplicantIncome | \n", "LoanAmount | \n", "Loan_Amount_Term | \n", "Credit_History | \n", "Property_Area | \n", "Loan_Status | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "LP001002 | \n", "Male | \n", "No | \n", "0 | \n", "Graduate | \n", "No | \n", "5849 | \n", "0.0 | \n", "NaN | \n", "360.0 | \n", "1.0 | \n", "Urban | \n", "Y | \n", "
1 | \n", "LP001003 | \n", "Male | \n", "Yes | \n", "1 | \n", "Graduate | \n", "No | \n", "4583 | \n", "1508.0 | \n", "128.0 | \n", "360.0 | \n", "1.0 | \n", "Rural | \n", "N | \n", "
2 | \n", "LP001005 | \n", "Male | \n", "Yes | \n", "0 | \n", "Graduate | \n", "Yes | \n", "3000 | \n", "0.0 | \n", "66.0 | \n", "360.0 | \n", "1.0 | \n", "Urban | \n", "Y | \n", "
3 | \n", "LP001006 | \n", "Male | \n", "Yes | \n", "0 | \n", "Not Graduate | \n", "No | \n", "2583 | \n", "2358.0 | \n", "120.0 | \n", "360.0 | \n", "1.0 | \n", "Urban | \n", "Y | \n", "
4 | \n", "LP001008 | \n", "Male | \n", "No | \n", "0 | \n", "Graduate | \n", "No | \n", "6000 | \n", "0.0 | \n", "141.0 | \n", "360.0 | \n", "1.0 | \n", "Urban | \n", "Y | \n", "
\n", " | loan_id | \n", "gender | \n", "married | \n", "dependents | \n", "education | \n", "self_employed | \n", "applicant_income | \n", "co-applicant_income | \n", "loan_amount | \n", "loan_amount_term | \n", "credit_history | \n", "property_area | \n", "loan_status | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "LP001002 | \n", "Male | \n", "No | \n", "0 | \n", "Graduate | \n", "No | \n", "5849 | \n", "0.0 | \n", "146.412162 | \n", "360.0 | \n", "1.0 | \n", "Urban | \n", "Y | \n", "
1 | \n", "LP001003 | \n", "Male | \n", "Yes | \n", "1 | \n", "Graduate | \n", "No | \n", "4583 | \n", "1508.0 | \n", "128.000000 | \n", "360.0 | \n", "1.0 | \n", "Rural | \n", "N | \n", "
2 | \n", "LP001005 | \n", "Male | \n", "Yes | \n", "0 | \n", "Graduate | \n", "Yes | \n", "3000 | \n", "0.0 | \n", "66.000000 | \n", "360.0 | \n", "1.0 | \n", "Urban | \n", "Y | \n", "
3 | \n", "LP001006 | \n", "Male | \n", "Yes | \n", "0 | \n", "Not Graduate | \n", "No | \n", "2583 | \n", "2358.0 | \n", "120.000000 | \n", "360.0 | \n", "1.0 | \n", "Urban | \n", "Y | \n", "
4 | \n", "LP001008 | \n", "Male | \n", "No | \n", "0 | \n", "Graduate | \n", "No | \n", "6000 | \n", "0.0 | \n", "141.000000 | \n", "360.0 | \n", "1.0 | \n", "Urban | \n", "Y | \n", "
\n", " | applicant_income | \n", "co-applicant_income | \n", "loan_amount | \n", "loan_amount_term | \n", "credit_history | \n", "
---|---|---|---|---|---|
count | \n", "542.000000 | \n", "542.000000 | \n", "542.000000 | \n", "542.000000 | \n", "542.000000 | \n", "
mean | \n", "5291.911439 | \n", "1590.699114 | \n", "145.518139 | \n", "341.955720 | \n", "0.863469 | \n", "
std | \n", "5506.461371 | \n", "2543.801628 | \n", "81.749385 | \n", "65.441582 | \n", "0.343669 | \n", "
min | \n", "150.000000 | \n", "0.000000 | \n", "9.000000 | \n", "12.000000 | \n", "0.000000 | \n", "
25% | \n", "2843.750000 | \n", "0.000000 | \n", "100.000000 | \n", "360.000000 | \n", "1.000000 | \n", "
50% | \n", "3848.000000 | \n", "1149.000000 | \n", "129.500000 | \n", "360.000000 | \n", "1.000000 | \n", "
75% | \n", "5820.500000 | \n", "2281.000000 | \n", "167.750000 | \n", "360.000000 | \n", "1.000000 | \n", "
max | \n", "81000.000000 | \n", "33837.000000 | \n", "650.000000 | \n", "480.000000 | \n", "1.000000 | \n", "