Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.model_selection import train_test_split, GridSearchCV
- from xgboost import XGBRegressor
- from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
- import joblib # For saving and loading the model
- import warnings
- from datetime import timedelta
- from bokeh.plotting import figure, show
- from bokeh.io import output_file
- from bokeh.layouts import gridplot
- from bokeh.models import ColumnDataSource
- # Suppress warnings
- warnings.filterwarnings("ignore")
- # Function to load data
- def load_data(data_path):
- df = pd.read_csv(data_path)
- df['transaction_date'] = pd.to_datetime(df['transaction_date'], format='%Y-%m-%d')
- df['year_month'] = df['transaction_date'].dt.to_period('M')
- return df
- # Function to prepare the dataset
- def prepare_data(df):
- monthly_demand = df.groupby(['item_id', 'year_month'])['quantity'].sum().reset_index()
- monthly_demand['month'] = monthly_demand['year_month'].dt.month
- monthly_demand['year'] = monthly_demand['year_month'].dt.year
- # Create lag features
- for lag in range(1, 5):
- monthly_demand[f'lag_{lag}'] = monthly_demand['quantity'].shift(lag)
- monthly_demand.dropna(inplace=True)
- return monthly_demand
- # Function to train the model
- def train_model(X_train, y_train):
- param_grid = {
- 'n_estimators': [50, 100],
- 'max_depth': [3, 5],
- 'learning_rate': [0.1, 0.2],
- 'subsample': [0.8, 1.0]
- }
- grid_search = GridSearchCV(XGBRegressor(random_state=42), param_grid, cv=5, scoring='r2', n_jobs=-1)
- grid_search.fit(X_train, y_train)
- return grid_search.best_estimator_
- # Function to predict future demand
- def predict_demand(best_model, item_data, current_stock_level, months_ahead=6):
- future_dates = pd.date_range(start=item_data['year_month'].iloc[-1].end_time + timedelta(days=1),
- periods=months_ahead, freq='M')
- future_df = pd.DataFrame({'year_month': future_dates})
- # Prepare future DataFrame with lagged values
- for lag in range(1, 5):
- future_df[f'lag_{lag}'] = item_data['quantity'].iloc[-lag] if len(item_data) >= lag else 0
- future_df['item_id'] = item_data['item_id'].iloc[0]
- last_entry = item_data.iloc[-1]
- future_df['month'] = last_entry['month']
- future_df['year'] = last_entry['year']
- future_X = future_df[['item_id', 'month', 'year', 'lag_1', 'lag_2', 'lag_3', 'lag_4']]
- future_demand = best_model.predict(future_X)
- alerts = []
- for month, demand in zip(future_dates, future_demand):
- reorder_amount = max(0, demand - current_stock_level)
- alerts.append((month, demand, reorder_amount))
- return alerts
- # Function to plot results
- def plot_results(item_data, alerts, item_id):
- output_file("demand_forecasting.html")
- source_actual = ColumnDataSource(data=dict(
- months=item_data['year_month'].astype(str).tolist(),
- actual_quantities=item_data['quantity'].tolist(),
- ))
- future_dates = [alert[0] for alert in alerts]
- predicted_quantities = [alert[1] for alert in alerts]
- source_predicted = ColumnDataSource(data=dict(
- months=[date.strftime('%Y-%m') for date in future_dates],
- predicted_quantities=predicted_quantities,
- ))
- p_actual = figure(title=f"Actual Demand for Item ID {item_id}", x_axis_label='Months', y_axis_label='Quantity',
- x_range=source_actual.data['months'], height=400, width=350)
- p_predicted = figure(title=f"Predicted Demand for Item ID {item_id}", x_axis_label='Months',
- y_axis_label='Quantity',
- x_range=source_predicted.data['months'], height=400, width=350)
- p_actual.line('months', 'actual_quantities', source=source_actual, line_width=2, color='green',
- legend_label="Actual Demand")
- p_predicted.vbar(x='months', top='predicted_quantities', source=source_predicted, width=0.9, color='blue',
- legend_label="Predicted Demand")
- grid = gridplot([[p_actual, p_predicted]])
- show(grid)
- # Main function to run the script
- def main():
- data_path = 'data/orders2.csv' # Update this path if necessary
- df = load_data(data_path)
- monthly_demand = prepare_data(df)
- # Split the data into features and target variable
- X = monthly_demand.drop(columns=['quantity', 'year_month'])
- y = monthly_demand['quantity']
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
- # Check if the model already exists
- try:
- best_model = joblib.load('best_xgb_model.joblib')
- print("Loaded pre-trained model.")
- except FileNotFoundError:
- best_model = train_model(X_train, y_train)
- joblib.dump(best_model, 'best_xgb_model.joblib')
- print("Trained and saved model.")
- # Make predictions and evaluate
- y_pred = best_model.predict(X_test)
- print("Model Evaluation Metrics:")
- print(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.2f}")
- print(f"Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred):.2f}")
- print(f"R² Score: {r2_score(y_test, y_pred):.2f}")
- # User input for analysis
- item_id = int(input("Enter the item_id you want to analyze: "))
- current_stock_level = int(input("Enter the current stock level for this item: "))
- # Filter data for the selected item
- item_data = monthly_demand[monthly_demand['item_id'] == item_id].copy()
- # Check for existing entries
- if item_data.empty:
- print(f"No historical data available for item ID {item_id}.")
- else:
- alerts = predict_demand(best_model, item_data, current_stock_level)
- for month, demand, reorder_amount in alerts:
- print(f"Projected demand for item ID {item_id} in {month.strftime('%Y-%m')} is {demand:.2f}.")
- if reorder_amount > 0:
- print(
- f"Alert: You may need to reorder {reorder_amount:.2f} units of item ID {item_id} by {month.strftime('%Y-%m')} as the stock might run out.")
- else:
- print(f"No reorder necessary for item ID {item_id}. Sufficient stock available.")
- # Plot the results
- plot_results(item_data, alerts, item_id)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment