mlproject / src /components /model_trainer.py
adarshadda's picture
initial commit
54e6328
raw
history blame
4.33 kB
import os
import sys
from dataclasses import dataclass
from catboost import CatBoostRegressor
from sklearn.ensemble import (
AdaBoostRegressor,
GradientBoostingRegressor,
RandomForestRegressor,
)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from src.exception import CustomException
from src.logger import logging
from src.utils import save_object,evaluate_models
@dataclass
class ModelTrainerConfig:
trained_model_file_path=os.path.join("artifacts","model.pkl")
class ModelTrainer:
def __init__(self):
self.model_trainer_config=ModelTrainerConfig()
def initiate_model_trainer(self,train_array,test_array):
try:
logging.info("Split training and test input data")
X_train,y_train,X_test,y_test=(
train_array[:,:-1],
train_array[:,-1],
test_array[:,:-1],
test_array[:,-1]
)
models = {
"Random Forest": RandomForestRegressor(),
"Decision Tree": DecisionTreeRegressor(),
"Gradient Boosting": GradientBoostingRegressor(),
"Linear Regression": LinearRegression(),
"XGBRegressor": XGBRegressor(),
"CatBoosting Regressor": CatBoostRegressor(verbose=False),
"AdaBoost Regressor": AdaBoostRegressor(),
}
params={
"Decision Tree": {
'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
# 'splitter':['best','random'],
# 'max_features':['sqrt','log2'],
},
"Random Forest":{
# 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
# 'max_features':['sqrt','log2',None],
'n_estimators': [8,16,32,64,128,256]
},
"Gradient Boosting":{
# 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'],
'learning_rate':[.1,.01,.05,.001],
'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
# 'criterion':['squared_error', 'friedman_mse'],
# 'max_features':['auto','sqrt','log2'],
'n_estimators': [8,16,32,64,128,256]
},
"Linear Regression":{},
"XGBRegressor":{
'learning_rate':[.1,.01,.05,.001],
'n_estimators': [8,16,32,64,128,256]
},
"CatBoosting Regressor":{
'depth': [6,8,10],
'learning_rate': [0.01, 0.05, 0.1],
'iterations': [30, 50, 100]
},
"AdaBoost Regressor":{
'learning_rate':[.1,.01,0.5,.001],
# 'loss':['linear','square','exponential'],
'n_estimators': [8,16,32,64,128,256]
}
}
model_report:dict=evaluate_models(X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test,
models=models,param=params)
## To get best model score from dict
best_model_score = max(sorted(model_report.values()))
## To get best model name from dict
best_model_name = list(model_report.keys())[
list(model_report.values()).index(best_model_score)
]
best_model = models[best_model_name]
if best_model_score<0.6:
raise CustomException("No best model found")
logging.info(f"Best found model on both training and testing dataset")
save_object(
file_path=self.model_trainer_config.trained_model_file_path,
obj=best_model
)
predicted=best_model.predict(X_test)
r2_square = r2_score(y_test, predicted)
return r2_square
except Exception as e:
raise CustomException(e,sys)