mlproject / src /components /data_ingestion.py
adarshadda's picture
initial commit
54e6328
raw
history blame
2.17 kB
import os
import sys
import time
from src.exception import CustomException
from src.logger import logging
import pandas as pd
from sklearn.model_selection import train_test_split
from dataclasses import dataclass
from src.components.data_transformation import DataTransformation
from src.components.data_transformation import DataTransformationConfig
from src.components.model_trainer import ModelTrainerConfig
from src.components.model_trainer import ModelTrainer
@dataclass
class DataIngestionConfig:
train_data_path: str=os.path.join('artifacts',"train.csv")
test_data_path: str=os.path.join('artifacts',"test.csv")
raw_data_path: str=os.path.join('artifacts',"data.csv")
class DataIngestion:
def __init__(self):
self.ingestion_config=DataIngestionConfig()
def initiate_data_ingestion(self):
logging.info("Entered the data ingestion method or component")
try:
df=pd.read_csv('notebook/data/stud.csv')
logging.info('Read the dataset as dataframe')
os.makedirs(os.path.dirname(self.ingestion_config.train_data_path),exist_ok=True)
df.to_csv(self.ingestion_config.raw_data_path,index=False,header=True)
logging.info("Train test split initiated")
train_set,test_set=train_test_split(df,test_size=0.2,random_state=42)
train_set.to_csv(self.ingestion_config.train_data_path,index=False,header=True)
test_set.to_csv(self.ingestion_config.test_data_path,index=False,header=True)
logging.info("Inmgestion of the data iss completed")
return(
self.ingestion_config.train_data_path,
self.ingestion_config.test_data_path
)
except Exception as e:
raise CustomException(e,sys)
if __name__=="__main__":
obj=DataIngestion()
train_data,test_data=obj.initiate_data_ingestion()
data_transformation=DataTransformation()
train_arr,test_arr,_=data_transformation.initiate_data_transformation(train_data,test_data)
modeltrainer=ModelTrainer()
print(modeltrainer.initiate_model_trainer(train_arr,test_arr))