## **Initilize**

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plot

from sklearn.model_selection import train_test_split


import sklearn.gaussian_process as gp
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC


### **Import Bayesian Optimization Module by Thomas Huijskens**

**Source**: https://github.com/thuijskens/bayesian-optimization

In [13]:
from google.colab import files
src = list(files.upload().values())[0]
open('gp.py','wb').write(src)
import gp

%load gp.py
%run gp.py

Saving gp.py to gp (1).py


### **Import German Credit Data Set**

In [3]:
from google.colab import files
uploaded = files.upload()


import io

data = pd.read_csv(io.BytesIO(uploaded['German_Credit_Data.csv']))

Saving German_Credit_Data.csv to German_Credit_Data (1).csv


In [4]:
data.shape

(1000, 21)

In [5]:
data.head(n=7)

Unnamed: 0,Status_of_existing_checking_account,Duration_in_month,Credit_history,Purpose,Credit_amount,Savings_account,Present_employment_since,Installment_rate,Marital_status_and_gender,Guarantors,Present_residence_since,Property,Age,Other_installment_plans,Housing,No_of_Existing_credits,Job,liable_people_for_maintenance,Telephone,Foreign_worker,Good_Bad
0,A11,6,A34,A43,1169,A65,A75,4,A93,A101,4,A121,67,A143,A152,2,A173,1,A192,A201,1
1,A12,48,A32,A43,5951,A61,A73,2,A92,A101,2,A121,22,A143,A152,1,A173,1,A191,A201,2
2,A14,12,A34,A46,2096,A61,A74,2,A93,A101,3,A121,49,A143,A152,1,A172,2,A191,A201,1
3,A11,42,A32,A42,7882,A61,A74,2,A93,A103,4,A122,45,A143,A153,1,A173,2,A191,A201,1
4,A11,24,A33,A40,4870,A61,A73,3,A93,A101,4,A124,53,A143,A153,2,A173,2,A191,A201,2
5,A14,36,A32,A46,9055,A65,A73,2,A93,A101,4,A124,35,A143,A153,1,A172,2,A192,A201,1
6,A14,24,A32,A42,2835,A63,A75,3,A93,A101,4,A122,53,A143,A152,1,A173,1,A191,A201,1


### **Feature Engineering**

In [6]:
data['Duration_in_month_sqr']=data['Duration_in_month']**2
data['Credit_amount_sqr']=data['Credit_amount']**2
data['Installment_rate_sqr']=data['Installment_rate']**2

In [8]:
nms = list(data.columns.values)
p = len(nms)

x_nm =['Duration_in_month','Credit_amount','Installment_rate','Present_residence_since','Age'
       ,'No_of_Existing_credits','liable_people_for_maintenance','Duration_in_month_sqr'
       ,'Credit_amount_sqr','Installment_rate_sqr']
y_nm =['Good_Bad']

print('Feature variables are :', x_nm)
print('Target Variable is : ', y_nm)

x = data[x_nm]
y = data[y_nm]


Feature variables are : ['Duration_in_month', 'Credit_amount', 'Installment_rate', 'Present_residence_since', 'Age', 'No_of_Existing_credits', 'liable_people_for_maintenance', 'Duration_in_month_sqr', 'Credit_amount_sqr', 'Installment_rate_sqr']
Target Variable is :  ['Good_Bad']


In [10]:
# create training and testing vars
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

### **SVM Classification with Bayesian Optimization**

In [11]:
## Define loss function 

def sample_loss(params):
  C = params[0]
  gamma = params[1]

  # Sample C and gamma on the log-uniform scale
  model = SVC(C=10**C, gamma= 10**gamma, random_state=123)

  # Sample parameters on a log scale
  return cross_val_score(model,X=X_train,y=y_train,scoring='accuracy', cv=3).mean()

### **Define the boundary**

In [14]:
bounds = np.array([[-10, 10], [-10, 10]])
print(bounds)

xp, yp = bayesian_optimisation(n_iters=25, sample_loss=sample_loss, 
                               bounds=bounds,
                               n_pre_samples=10)


# The maximum is at:
xp_hat = xp[np.array(yp).argmax(), :]

print(xp_hat)

[[-10  10]
 [-10  10]]


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


[6.86151037 2.61298404]


In [16]:
print(10**xp_hat)

SVC_best = SVC(C=10**xp_hat[0],gamma=10**xp_hat[1])

SVC_best.fit(X_train, y_train) 


print("In-sample score = ",SVC_best.score(X_train,y_train)*100)
print("Out-sample score = ",SVC_best.score(X_test,y_test)*100)

[7.26959761e+06 4.10189031e+02]
In-sample score =  100.0
Out-sample score =  68.66666666666667


### **Neural Network Classification with Bayesian Optimization**

In [24]:
from sklearn.neural_network import MLPClassifier


#Initializing the MLPClassifier
NNclassifier = MLPClassifier(hidden_layer_sizes=(3,2,3,2)
                             , max_iter=100
                             ,activation = 'relu'
                             ,solver='sgd'
                             ,random_state=1)

#Fitting the training data to the network
NNclassifier.fit(X_train, y_train)

print("In-sample score = ",NNclassifier.score(X_train,y_train))
print("Out-sample score = ",NNclassifier.score(X_test,y_test))
print("cross val score = ",cross_val_score(NNclassifier,X=X_train,y=y_train,scoring='accuracy', cv=3))




In-sample score =  0.7057142857142857
Out-sample score =  0.6866666666666666
cross val score =  [0.70512821 0.70815451 0.70386266]


**This is an ad-hoc choice for hidden_layer_sizes**

In [25]:
## Define loss function 

def sample_loss_NN(params):
  h1 = np.int(params[0])
  h2 = np.int(params[1])
  h3 = np.int(params[2])
  h4 = np.int(params[3])
  
  # Sample C and gamma on the log-uniform scale
  model = MLPClassifier(hidden_layer_sizes=(h1,h2,h3,h4), max_iter=500,activation = 'relu',solver='sgd',random_state=1)
  model.fit(X_train, y_train)
  # Sample parameters on a log scale
  return cross_val_score(model,X=X_train,y=y_train,scoring='accuracy', cv=3).mean()

In [26]:
bounds = np.array([[2, 5], [2, 5], [2,5], [2,5]])
print(bounds)

xp, yp = bayesian_optimisation(n_iters=25, sample_loss=sample_loss_NN, 
                               bounds=bounds,
                               n_pre_samples=10)


[[2 5]
 [2 5]
 [2 5]
 [2 5]]


In [28]:
#print(xp)
#print(yp)

# The maximum is at:
xp_hat = np.round(xp[np.array(yp).argmax(), :])

print(np.round(xp_hat))

[4. 2. 5. 3.]


In [29]:
#Initializing the MLPClassifier
NNclassifier_best = MLPClassifier(hidden_layer_sizes=(4,2,4,3)
                                      , max_iter=100
                                       ,activation = 'relu'
                                       ,solver='sgd'
                                       ,random_state=1)

#Fitting the training data to the network
NNclassifier_best.fit(X_train, y_train)

print("In-sample score = ",NNclassifier_best.score(X_train,y_train))
print("Out-sample score = ",NNclassifier_best.score(X_test,y_test))


In-sample score =  0.7057142857142857
Out-sample score =  0.6866666666666666
