In [2]:
import seaborn as sns
import pandas as pd
In [ ]:
penguins = sns.load_dataset('penguins')
penguins
# penguins.columns
Out[ ]:
Index(['species', 'island', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex'], dtype='object')
In [ ]:
penguins['species'].unique()
penguins['island'].unique()
# penguins['sex'].unique()
# sns.pairplot(penguins, hue='island').add_legend()
# sns.pairplot(penguins, hue='sex').add_legend()
# sns.pairplot(penguins, hue='species').add_legend()
Out[ ]:
array(['Male', 'Female', nan], dtype=object)
In [18]:
print(penguins.isnull().sum())
# penguins_clean = penguins.drop(columns=[ 'sex'])
# penguins_clean = penguins_clean.dropna()
# penguins_clean
species 0 island 0 bill_length_mm 2 bill_depth_mm 2 flipper_length_mm 2 body_mass_g 2 sex 11 dtype: int64
In [19]:
# sns.pairplot(penguins, hue='island').add_legend()
sns.pairplot(penguins, hue='sex').add_legend()
# sns.pairplot(penguins, hue='species').add_legend()
Out[19]:
<seaborn.axisgrid.PairGrid at 0x169e2cd70>
In [53]:
from sklearn.preprocessing import LabelEncoder
import numpy as np
penguins_clean = penguins.drop(columns=[ 'sex'])
penguins_clean = penguins_clean.dropna()
feature_names = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
X = np.array(penguins_clean[feature_names])
le = LabelEncoder()
y = le.fit_transform(penguins_clean['species'])
target_names = le.classes_
In [54]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=y, random_state=42
)
In [55]:
import xgboost as xgb
from sklearn.metrics import accuracy_score
model = xgb.XGBClassifier()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print( f"{accuracy*100:.2f}%")
97.10%
In [56]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
cm = confusion_matrix(y_test, y_pred)
print(cm)
cm_display = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = target_names)
cm_display.plot()
plt.show()
[[29 1 0] [ 0 14 0] [ 0 1 24]]
In [57]:
import qmlmodel
X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)
X_train_rescaled = qmlmodel.rescale_to_angle(X_train,
X)
X_test_rescaled = qmlmodel.rescale_to_angle(X_test,
X)
# X_train_rescaled = qmlmodel.rescale_to_angle(X_train,
# X_min, X_max)
# X_test_rescaled = qmlmodel.rescale_to_angle(X_test,
# X_min, X_max)
def objective_function(t):
return qmlmodel.loss(t,X_train_rescaled,y_train)
In [84]:
from scipy.optimize import minimize
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
np.random.seed(42)
# np.random.seed(15)
t0= np.random.rand(4)*np.pi*2
optimization_path = []
accuracy_path=[]
t_path = []
def objective_function_with_memory(t):
t_path.append(t)
z = objective_function(t)
optimization_path.append(z)
accuracy = accuracy_score(y_train, qmlmodel.predict(X_train_rescaled, t))
accuracy_path.append(accuracy*100)
return z
result = minimize(objective_function_with_memory,
t0, method='COBYLA' )
# t0, method='COBYLA',options={'maxiter': 31} )
print(result)
y_pred = qmlmodel.predict(X_train_rescaled, result.x)
accuracy = accuracy_score(y_train, y_pred)
print( f" \nAccuracy on training set: {accuracy*100:.2f}%")
message: Optimization terminated successfully. success: True status: 1 fun: 0.7455756825332729 x: [ 2.651e+00 8.121e+00 3.931e+00 7.059e+00] nfev: 230 maxcv: 0.0 Accuracy on training set: 84.25%
In [86]:
plt.plot(list(range(len(accuracy_path)))[20:50],
accuracy_path[20:50], marker='o', linestyle='-')
# plt.plot(list(range(len(optimization_path))),
# optimization_path, marker='o', linestyle='-')
plt.grid(True)
plt.show()
In [67]:
y_pred = qmlmodel.predict(X_test_rescaled, result.x)
accuracy = accuracy_score(y_test, y_pred)
print( f" \nAccuracy on test set: {accuracy*100:.2f}%")
Accuracy on test set: 86.96%
In [79]:
best_accuracy = 0
for seed in range(15,16):
np.random.seed(seed)
t0= np.random.rand(4)*np.pi*2
optimization_path = []
accuracy_path=[]
t_path = []
def objective_function_with_memory(t):
t_path.append(t)
z = objective_function(t)
optimization_path.append(z)
accuracy = accuracy_score(y_train, qmlmodel.predict(X_train_rescaled, t))
accuracy_path.append(accuracy*100)
return z
result = minimize(objective_function_with_memory,
t0, method='COBYLA',options={'maxiter': 50} )
iter_nb = np.array(accuracy_path).argmax()
t_best = t_path[iter_nb]
y_pred = qmlmodel.predict(X_train_rescaled, t_best)
accuracy = accuracy_score(y_train, y_pred)
if accuracy>best_accuracy:
best_accuracy = accuracy
print( f"\nSeed:{seed} - iter_nb:{iter_nb} - Accuracy on training: {accuracy*100:.2f}% - {t_best}")
else:
print(f"{seed}",end='-')
Seed:15 - iter_nb:17 - Accuracy on training: 94.51% - [ 3.87029391 1.8466505 -0.74787299 0.81880668]
In [82]:
y_pred = qmlmodel.predict(X_test_rescaled, t_best)
accuracy = accuracy_score(y_test, y_pred)
print( f" \nAccuracy on test set: {accuracy*100:.2f}%")
cm = confusion_matrix(y_test, y_pred)
print(cm)
cm_display = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = target_names)
cm_display.plot()
plt.show()
Accuracy on test set: 95.65% [[29 0 1] [ 1 12 1] [ 0 0 25]]
In [87]:
t_best
Out[87]:
array([ 3.87029391, 1.8466505 , -0.74787299, 0.81880668])