import seaborn as sns
import pandas as pd

penguins = sns.load_dataset('penguins')
penguins
# penguins.columns

Index(['species', 'island', 'bill_length_mm', 'bill_depth_mm',
       'flipper_length_mm', 'body_mass_g', 'sex'],
      dtype='object')

penguins['species'].unique()
penguins['island'].unique()
# penguins['sex'].unique()
# sns.pairplot(penguins, hue='island').add_legend()
# sns.pairplot(penguins, hue='sex').add_legend()
# sns.pairplot(penguins, hue='species').add_legend()

array(['Male', 'Female', nan], dtype=object)

print(penguins.isnull().sum())
# penguins_clean = penguins.drop(columns=[ 'sex'])
# penguins_clean = penguins_clean.dropna()

# penguins_clean

species               0
island                0
bill_length_mm        2
bill_depth_mm         2
flipper_length_mm     2
body_mass_g           2
sex                  11
dtype: int64

# sns.pairplot(penguins, hue='island').add_legend()
sns.pairplot(penguins, hue='sex').add_legend()
# sns.pairplot(penguins, hue='species').add_legend()

<seaborn.axisgrid.PairGrid at 0x169e2cd70>

from sklearn.preprocessing import LabelEncoder
import numpy as np

penguins_clean = penguins.drop(columns=[ 'sex'])
penguins_clean = penguins_clean.dropna()


feature_names = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
X = np.array(penguins_clean[feature_names])
le = LabelEncoder()
y = le.fit_transform(penguins_clean['species'])
target_names = le.classes_

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

import xgboost as xgb
from sklearn.metrics import accuracy_score

model = xgb.XGBClassifier()
model.fit(X_train,y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print( f"{accuracy*100:.2f}%")

97.10%

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt


cm = confusion_matrix(y_test, y_pred)
print(cm)

cm_display = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = target_names)

cm_display.plot()

plt.show()

[[29  1  0]
 [ 0 14  0]
 [ 0  1 24]]

import qmlmodel

X_min, X_max = np.min(X, axis=0), np.max(X, axis=0)

X_train_rescaled = qmlmodel.rescale_to_angle(X_train, 
                                             X)
X_test_rescaled = qmlmodel.rescale_to_angle(X_test, 
                                             X)

# X_train_rescaled = qmlmodel.rescale_to_angle(X_train, 
#                                              X_min, X_max)
# X_test_rescaled = qmlmodel.rescale_to_angle(X_test, 
#                                              X_min, X_max)



def objective_function(t):
    return qmlmodel.loss(t,X_train_rescaled,y_train)

from scipy.optimize import minimize
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt


np.random.seed(42)
# np.random.seed(15)
t0= np.random.rand(4)*np.pi*2


optimization_path = []
accuracy_path=[]
t_path = []

def objective_function_with_memory(t):
    t_path.append(t)
    z = objective_function(t)
    optimization_path.append(z)
    accuracy = accuracy_score(y_train, qmlmodel.predict(X_train_rescaled, t))
    accuracy_path.append(accuracy*100)
    return z

result = minimize(objective_function_with_memory, 
                  t0, method='COBYLA' )
                #   t0, method='COBYLA',options={'maxiter': 31} )

print(result)

y_pred = qmlmodel.predict(X_train_rescaled, result.x)
accuracy = accuracy_score(y_train, y_pred)
print( f" \nAccuracy on training set: {accuracy*100:.2f}%")

 message: Optimization terminated successfully.
 success: True
  status: 1
     fun: 0.7455756825332729
       x: [ 2.651e+00  8.121e+00  3.931e+00  7.059e+00]
    nfev: 230
   maxcv: 0.0
 
Accuracy on training set: 84.25%

plt.plot(list(range(len(accuracy_path)))[20:50], 
         accuracy_path[20:50], marker='o', linestyle='-')
# plt.plot(list(range(len(optimization_path))), 
#          optimization_path, marker='o', linestyle='-')

plt.grid(True)
plt.show()

y_pred = qmlmodel.predict(X_test_rescaled, result.x)
accuracy = accuracy_score(y_test, y_pred)
print( f" \nAccuracy on test set: {accuracy*100:.2f}%")

 
Accuracy on test set: 86.96%

best_accuracy = 0
for seed in range(15,16):
    np.random.seed(seed)
    t0= np.random.rand(4)*np.pi*2


    optimization_path = []
    accuracy_path=[]
    t_path = []

    def objective_function_with_memory(t):
        t_path.append(t)
        z = objective_function(t)
        optimization_path.append(z)
        accuracy = accuracy_score(y_train, qmlmodel.predict(X_train_rescaled, t))
        accuracy_path.append(accuracy*100)
        return z

    result = minimize(objective_function_with_memory, 
                    t0, method='COBYLA',options={'maxiter': 50} )
    iter_nb = np.array(accuracy_path).argmax()
    t_best = t_path[iter_nb]

    y_pred = qmlmodel.predict(X_train_rescaled, t_best)
    accuracy = accuracy_score(y_train, y_pred)
    if accuracy>best_accuracy:
        best_accuracy = accuracy
        print( f"\nSeed:{seed} - iter_nb:{iter_nb} - Accuracy on training: {accuracy*100:.2f}% - {t_best}")
    else:
        print(f"{seed}",end='-')

Seed:15 - iter_nb:17 - Accuracy on training: 94.51% - [ 3.87029391  1.8466505  -0.74787299  0.81880668]

y_pred = qmlmodel.predict(X_test_rescaled, t_best)
accuracy = accuracy_score(y_test, y_pred)
print( f" \nAccuracy on test set: {accuracy*100:.2f}%")


cm = confusion_matrix(y_test, y_pred)
print(cm)

cm_display = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = target_names)

cm_display.plot()

plt.show()

 
Accuracy on test set: 95.65%
[[29  0  1]
 [ 1 12  1]
 [ 0  0 25]]

t_best

array([ 3.87029391,  1.8466505 , -0.74787299,  0.81880668])