import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load the data
df = pd.read_csv("numbers.csv")

# Features (timestamp) and target (digit)
X = pd.DataFrame({"rownumber": np.arange(len(df))})
y = df["generated number"]

# Train-test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train the Gradient Boosting Classifier
model = GradientBoostingClassifier(n_estimators=9999)
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
random_accuracy = 1 / len(y.unique())

print(f"Model accuracy: {accuracy:.4f}")
print(f"Random guessing baseline: {random_accuracy:.4f}")

from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (|actual - predicted|): {mae:.4f}")

# Optional: Show confusion matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred, labels=sorted(y.unique()))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=sorted(y.unique()))
disp.plot(cmap='Blues')
plt.title("Confusion Matrix")
plt.show()
