Demand Forecast

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib  # For saving and loading the model
import warnings
from datetime import timedelta
from bokeh.plotting import figure, show
from bokeh.io import output_file
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource

# Suppress warnings
warnings.filterwarnings("ignore")

# Function to load data
def load_data(data_path):
    df = pd.read_csv(data_path)
    df['transaction_date'] = pd.to_datetime(df['transaction_date'], format='%Y-%m-%d')
    df['year_month'] = df['transaction_date'].dt.to_period('M')
    return df

# Function to prepare the dataset
def prepare_data(df):
    monthly_demand = df.groupby(['item_id', 'year_month'])['quantity'].sum().reset_index()
    monthly_demand['month'] = monthly_demand['year_month'].dt.month
    monthly_demand['year'] = monthly_demand['year_month'].dt.year

    # Create lag features
    for lag in range(1, 5):
        monthly_demand[f'lag_{lag}'] = monthly_demand['quantity'].shift(lag)

    monthly_demand.dropna(inplace=True)
    return monthly_demand

# Function to train the model
def train_model(X_train, y_train):
    param_grid = {
        'n_estimators': [50, 100],
        'max_depth': [3, 5],
        'learning_rate': [0.1, 0.2],
        'subsample': [0.8, 1.0]
    }
    grid_search = GridSearchCV(XGBRegressor(random_state=42), param_grid, cv=5, scoring='r2', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_

# Function to predict future demand
def predict_demand(best_model, item_data, current_stock_level, months_ahead=6):
    future_dates = pd.date_range(start=item_data['year_month'].iloc[-1].end_time + timedelta(days=1),
                                 periods=months_ahead, freq='M')
    future_df = pd.DataFrame({'year_month': future_dates})

    # Prepare future DataFrame with lagged values
    for lag in range(1, 5):
        future_df[f'lag_{lag}'] = item_data['quantity'].iloc[-lag] if len(item_data) >= lag else 0

    future_df['item_id'] = item_data['item_id'].iloc[0]
    last_entry = item_data.iloc[-1]
    future_df['month'] = last_entry['month']
    future_df['year'] = last_entry['year']
    future_X = future_df[['item_id', 'month', 'year', 'lag_1', 'lag_2', 'lag_3', 'lag_4']]

    future_demand = best_model.predict(future_X)
    alerts = []

    for month, demand in zip(future_dates, future_demand):
        reorder_amount = max(0, demand - current_stock_level)
        alerts.append((month, demand, reorder_amount))

    return alerts

# Function to plot results
def plot_results(item_data, alerts, item_id):
    output_file("demand_forecasting.html")

    source_actual = ColumnDataSource(data=dict(
        months=item_data['year_month'].astype(str).tolist(),
        actual_quantities=item_data['quantity'].tolist(),
    ))

    future_dates = [alert[0] for alert in alerts]
    predicted_quantities = [alert[1] for alert in alerts]

    source_predicted = ColumnDataSource(data=dict(
        months=[date.strftime('%Y-%m') for date in future_dates],
        predicted_quantities=predicted_quantities,
    ))

    p_actual = figure(title=f"Actual Demand for Item ID {item_id}", x_axis_label='Months', y_axis_label='Quantity',
                      x_range=source_actual.data['months'], height=400, width=350)
    p_predicted = figure(title=f"Predicted Demand for Item ID {item_id}", x_axis_label='Months',
                         y_axis_label='Quantity',
                         x_range=source_predicted.data['months'], height=400, width=350)

    p_actual.line('months', 'actual_quantities', source=source_actual, line_width=2, color='green',
                  legend_label="Actual Demand")
    p_predicted.vbar(x='months', top='predicted_quantities', source=source_predicted, width=0.9, color='blue',
                     legend_label="Predicted Demand")

    grid = gridplot([[p_actual, p_predicted]])
    show(grid)

# Main function to run the script
def main():
    data_path = 'data/orders2.csv'  # Update this path if necessary
    df = load_data(data_path)
    monthly_demand = prepare_data(df)

    # Split the data into features and target variable
    X = monthly_demand.drop(columns=['quantity', 'year_month'])
    y = monthly_demand['quantity']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Check if the model already exists
    try:
        best_model = joblib.load('best_xgb_model.joblib')
        print("Loaded pre-trained model.")
    except FileNotFoundError:
        best_model = train_model(X_train, y_train)
        joblib.dump(best_model, 'best_xgb_model.joblib')
        print("Trained and saved model.")

    # Make predictions and evaluate
    y_pred = best_model.predict(X_test)
    print("Model Evaluation Metrics:")
    print(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.2f}")
    print(f"Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred):.2f}")
    print(f"R² Score: {r2_score(y_test, y_pred):.2f}")

    # User input for analysis
    item_id = int(input("Enter the item_id you want to analyze: "))
    current_stock_level = int(input("Enter the current stock level for this item: "))

    # Filter data for the selected item
    item_data = monthly_demand[monthly_demand['item_id'] == item_id].copy()

    # Check for existing entries
    if item_data.empty:
        print(f"No historical data available for item ID {item_id}.")
    else:
        alerts = predict_demand(best_model, item_data, current_stock_level)
        for month, demand, reorder_amount in alerts:
            print(f"Projected demand for item ID {item_id} in {month.strftime('%Y-%m')} is {demand:.2f}.")
            if reorder_amount > 0:
                print(
                    f"Alert: You may need to reorder {reorder_amount:.2f} units of item ID {item_id} by {month.strftime('%Y-%m')} as the stock might run out.")
            else:
                print(f"No reorder necessary for item ID {item_id}. Sufficient stock available.")

        # Plot the results
        plot_results(item_data, alerts, item_id)

if __name__ == "__main__":
    main()