import pandas as pd
from io import StringIO
import pandas as pd
import numpy as np
import xgboost as xgb
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import plotly.express as px
import logging

from datetime import datetime

import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import pyplot
import whisper
from openai import AzureOpenAI
import json
import re
import gradio as gr

# Configure logging
logging.basicConfig(
    filename='demand_forecasting.log',  # You can adjust the log file name here
    filemode='a',
    format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
    datefmt='%Y-%b-%d %H:%M:%S'
)
LOGGER = logging.getLogger(__name__)

log_level_env = 'INFO'  # You can adjust the log level here
log_level_dict = {
    'DEBUG': logging.DEBUG,
    'INFO': logging.INFO,
    'WARNING': logging.WARNING,
    'ERROR': logging.ERROR,
    'CRITICAL': logging.CRITICAL
}
if log_level_env in log_level_dict:
    log_level = log_level_dict[log_level_env]
else:
    log_level = log_level_dict['INFO']
LOGGER.setLevel(log_level)

class DemandForecasting:
    def __init__(self):
        self.client = AzureOpenAI()
        self.whisper_model = whisper.load_model("medium.en")


    def get_column(self,train_csv_path: str):
        # Load the training data from the specified CSV file
        train_df = pd.read_csv(train_csv_path)

        column_names = train_df.columns.tolist()
        return column_names

    def load_data(self, train_csv_path: str) -> pd.DataFrame:
        """
        Load training data from a CSV file.

        Args:
            train_csv_path (str): Path to the training CSV file.

        Returns:
            pd.DataFrame: DataFrame containing the training data.
        """
        try:
            # Load the training data from the specified CSV file
            train_df = pd.read_csv(train_csv_path)


            # Return a tuple containing the training DataFrame
            return train_df

        except Exception as e:
            # Log an error message if an exception occurs during data loading
            LOGGER.error(f"Error loading data: {e}")

            # Return None
            return None


    def find_date_column(self, df_data: pd.DataFrame, list_columns: list) -> str:
        """
        Find the column containing date information from the list of columns.

        Args:
        - df_data (pd.DataFrame): Input DataFrame.
        - list_columns (list): List of column names to search for date information.

        Returns:
        - str: Name of the column containing date information.
        """
        for column in list_columns:
            # Check if the column contains date-like values
            try:
                pd.to_datetime(df_data[column])
                return column
            except ValueError:
                pass

        # Return None if no date column is found
        return None

    def preprocess_data(self, df_data: pd.DataFrame, list_columns) -> pd.DataFrame:
        """
        Preprocess the input DataFrame.

        Args:
        - df_data (pd.DataFrame): Input DataFrame to preprocess.

        Returns:
        - pd.DataFrame: Preprocessed DataFrame.
        """
        try:
            print(type(list_columns))
            # Make a copy of the input DataFrame to avoid modifying the original data
            df_data = df_data.copy()

            list_columns.append(target_column)

            # Drop columns not in list_columns
            columns_to_drop = [col for col in df_data.columns if col not in list_columns]
            df_data.drop(columns=columns_to_drop, inplace=True)

            # Find the date column
            date_column = self.find_date_column(df_data, list_columns)
            if date_column is None:
                raise ValueError("No date column found in the provided list of columns.")


            # Parse date information
            df_data[date_column] = pd.to_datetime(df_data[date_column])     # Convert 'date' column to datetime format
            df_data['day'] = df_data[date_column].dt.day        # Extract day of the month
            df_data['month'] = df_data[date_column].dt.month         # Extract month
            df_data['year'] = df_data[date_column].dt.year        # Extract year

            # Cyclical Encoding for Months
            df_data['month_sin'] = np.sin(2 * np.pi * df_data['month'] / 12)   # Cyclical sine encoding for month
            df_data['month_cos'] = np.cos(2 * np.pi * df_data['month'] / 12)   # Cyclical cosine encoding for month

            # Day of the Week
            df_data['day_of_week'] = df_data[date_column].dt.weekday      # Extract day of the week (0 = Monday, 6 = Sunday)

            # Week of the Year
            df_data['week_of_year'] = df_data[date_column].dt.isocalendar().week.astype(int)   # Extract week of the year as integer

            df_data.drop(columns=[date_column], inplace=True)

            print("df_data", df_data)
            return df_data

        except Exception as e:
            # Log an error message if an exception occurs during data preprocessing
            LOGGER.error(f"Error preprocessing data: {e}")

            # Return None in case of an error
            return None

    def train_model(self, train: pd.DataFrame, target_column, list_columns) -> tuple:
        """
        Train an XGBoost model using the provided training data.

        Args:
        - train (pd.DataFrame): DataFrame containing training data.

        Returns:
        - tuple: A tuple containing the trained model, true validation labels, and predicted validation labels.
        """
        try:

            # Extract features and target variable
            X = train.drop(columns=[target_column])
            y = train[target_column]

            # Cannot use cross validation because it will use future data
            X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=333)

            # Convert data into DMatrix format for XGBoost
            dtrain = xgb.DMatrix(X_train, label=y_train)
            dval = xgb.DMatrix(X_val, label=y_val)

            # Parameters for XGBoost
            param = {
                'max_depth': 9,
                'eta': 0.3,
                'objective': 'reg:squarederror'
            }

            num_round = 60

            # Train the model
            model_xgb = xgb.train(param, dtrain, num_round)

            # Validate the model
            y_val_pred = model_xgb.predict(dval)     # Predict validation set labels

            # Calculate mean squared error
            mse = mean_squared_error(y_val, y_val_pred)

            # Print validation RMSE
            validation = f"Validation RMSE: {np.sqrt(mse)}"

            # Return trained model, true validation labels, and predicted validation labels
            return model_xgb, y_val, y_val_pred, validation

        except Exception as e:
            # Log an error message if an exception occurs during model training
            LOGGER.error(f"Error training model: {e}")

            # Return None for all outputs in case of an error
            return None, None, None

    def plot_evaluation_interactive(self, y_true: np.ndarray, y_pred: np.ndarray, title: str) -> None:
        """
        Plot interactive evaluation using Plotly.

        Args:
        - y_true (np.ndarray): True values.
        - y_pred (np.ndarray): Predicted values.
        - title (str): Title of the plot.
        """
        try:
            # Create a scatter plot using Plotly
            fig = px.scatter(x=y_true, y=y_pred, labels={'x': 'True Values', 'y': 'Predictions'}, title=title, color_discrete_map={'': 'purple'})
            fig.show()
            return fig

        except Exception as e:
            # Log an error message if an exception occurs during plot generation
            LOGGER.error(f"Error plotting evaluation: {e}")


    def predict_sales_for_date(self, input_data, model: xgb.Booster) -> float:
        """
        Predict the sales for a specific date using the trained model.

        Args:
        - date_input (str): Date for which sales prediction is needed (in 'YYYY-MM-DD' format).
        - model (xgb.Booster): Trained XGBoost model.
        - features (pd.DataFrame): DataFrame containing features for the date.

        Returns:
        - float: Predicted sales value.
        """
        try:
            input_features = pd.DataFrame([input_data])

            # Regular expression pattern for date in the format 'dd-mm-yyyy'
            for key, value in input_data.items():
                if isinstance(value, str) and re.match(r'\d{2}-\d{2}-\d{4}', value):
                  date_column = key

            if date_column:
                # # Assuming date_input is a datetime object
                date_input = pd.to_datetime(input_features[date_column])

                # Extract day of the month
                input_features['day'] = date_input.dt.day

                # Extract month
                input_features['month'] = date_input.dt.month

                # Extract year
                input_features['year'] = date_input.dt.year

                # Cyclical sine encoding for month
                input_features['month_sin'] = np.sin(2 * np.pi * input_features['month'] / 12)

                # Cyclical cosine encoding for month
                input_features['month_cos'] = np.cos(2 * np.pi * input_features['month'] / 12)

                # Extract day of the week (0 = Monday, 6 = Sunday)
                input_features['day_of_week'] = date_input.dt.weekday

                # Extract week of the year as integer
                input_features['week_of_year'] = date_input.dt.isocalendar().week


            input_features.drop(columns=[date_column], inplace=True)

            # Convert input features to DMatrix format
            dinput = xgb.DMatrix(input_features)

            # Make predictions using the trained model
            predicted_sales = model.predict(dinput)[0]

            # Print the predicted sales value
            predicted_result = f"""{input_data[str(date_column)]}Predicted Value Is {predicted_sales}"""
            # Return the predicted sales value
            return predicted_result

        except Exception as e:
            # Log an error message if an exception occurs during sales prediction
            LOGGER.error(f"Error predicting sales: {e}")

            # Return None in case of an error
            return None

    def audio_to_text(self, audio_path):
        """
        transcribe the audio to text.
        """


        result = self.whisper_model.transcribe(audio_path)
        print("audio_to_text",result["text"])
        return result["text"]


    def parse_text(self, text, column_list):

        # Define the prompt or input for the model
        conversation =[{"role": "system", "content": ""},
        {"role": "user", "content":f""" extract the {column_list}. al
        l values should be intiger data type. if date in there the format is dd-mm-YYYY.
        text```{text}```
        return result should be in JSON format:

        """
        }]

        # Generate a response from the GPT-3 model
        chat_completion = self.client.chat.completions.create(
            model = "GPT-3",
            messages = conversation,
            max_tokens=500,
            temperature=0,
            n=1,
            stop=None,
        )

        # Extract the generated text from the API response
        generated_text = chat_completion.choices[0].message.content

        # Assuming jsonString is your JSON string
        json_data = json.loads(generated_text)
        print("parse_text",json_data)
        return json_data

    def main(self, train_csv_path: str, audio_path, target_column, column_list) -> None:
        """
        Main function to execute the demand forecasting pipeline.

        Args:
        - train_csv_path (str): Path to the training CSV file.
        - date (str): Date for which sales prediction is needed (in 'YYYY-MM-DD' format).
        """
        try:


            # Split the string by comma and convert it into a list
            column_list = column_list.split(", ")

            print("train_csv_path", train_csv_path)
            print("audio_path", audio_path)
            print("column_list", column_list)
            print("target_column", target_column)

            text = self.audio_to_text(audio_path)

            input_data = self.parse_text(text, column_list)

            #load data
            train_data = self.load_data(train_csv_path)

            #preprocess the train data
            train_df = self.preprocess_data(train_data, column_list)

            # Train model and get validation predictions
            trained_model, y_val, y_val_pred, validation = self.train_model(train_df, target_column, column_list)

            # Plot interactive evaluation for training
            plot = self.plot_evaluation_interactive(y_val, y_val_pred, title='Validation Set Evaluation')

            # Predict sales for the specified date using the trained model
            predicted_value = self.predict_sales_for_date(input_data, trained_model)

            return plot, predicted_value, validation

        except Exception as e:
            # Log an error message if an exception occurs in the main function
            LOGGER.error(f"Error in main function: {e}")

    def gradio_interface(self):
        with gr.Blocks(css="style.css", theme="freddyaboulton/test-blue") as demo:

            gr.HTML("""<center><h1 style="color:#fff">Demand Forecasting</h1></center>""")

            with gr.Row():
                with gr.Column(scale=0.50):
                    train_csv = gr.File(elem_classes="uploadbutton")
                with gr.Column(scale=0.50):
                    column_list = gr.Textbox(label="Column List")
    
            with gr.Row():                
                with gr.Column(scale=0.50):
                    audio_path = gr.Audio(sources=["microphone"], type="filepath")
            with gr.Row():
                with gr.Column(scale=0.50):
                    selected_column = gr.Textbox(label="Select column")
                with gr.Column(scale=0.50):
                    target_column = gr.Textbox(label="target column")    


            with gr.Row():
                  validation = gr.Textbox(label="Validation")
                  predicted_result = gr.Textbox(label="Predicted Result")
                  plot = gr.Plot()

            train_csv.upload(self.get_column, train_csv, column_list)
            audio_path.stop_recording(self.main, [train_csv, audio_path, target_column, selected_column], [plot, predicted_result, validation])

        demo.launch(debug=True)

if __name__ == "__main__":
    
    demand = DemandForecasting()
    demand.gradio_interface()