kgauvin603 commited on
Commit
78ecccd
1 Parent(s): bf20a70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -49
app.py CHANGED
@@ -47,59 +47,108 @@ import warnings
47
  # Ignore all warnings
48
  warnings.filterwarnings("ignore")
49
 
50
- # Download the dataset
51
- url = "http://www.ulb.ac.be/di/map/adalpozz/data/creditcard.Rdata"
52
- dst_path = "./creditcard.Rdata"
53
- wget.download(url, dst_path)
54
-
55
- # Load the dataset
56
- parsed_res = rdata.parser.parse_file(dst_path)
57
- res = rdata.conversion.convert(parsed_res)
58
- dataset = res['creditcard'].reset_index(drop=True).drop(['Time'], axis=1)
59
-
60
- # Prepare the data
61
- y = dataset['Class'].astype(int) # Convert to integers
62
- df = dataset.drop(['Class'], axis=1)
63
- df.columns = df.columns.astype(str)
64
-
65
- # Split the data
66
- X_train, X_test, y_train, y_test = train_test_split(df, y, train_size=0.6, random_state=0, stratify=y)
67
- X_train, _, y_train, _ = train_test_split(X_train, y_train, train_size=0.2, random_state=0, stratify=y_train)
68
-
69
- # Reset indices
70
- X_train.reset_index(drop=True, inplace=True)
71
- y_train.reset_index(drop=True, inplace=True)
72
-
73
- # Define the numerical features and the pipeline for numerical features
74
- numerical_features = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
75
- 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
76
- 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']
77
-
78
- numerical_pipeline = make_pipeline(
79
- StandardScaler() # Example: Standardize numerical features
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  )
81
 
82
- # Creating a column transformer named preprocessor to apply specific pipelines to numerical and categorical features separately.
83
- preprocessor = make_column_transformer(
84
- (numerical_pipeline, numerical_features)
 
 
85
  )
86
 
87
- # Creating model
88
- clf = MCD()
89
-
90
- # Creating a pipeline combining preprocessing steps (imputation and encoding) with linear regression modeling.
91
- model_pipeline = make_pipeline(
92
- preprocessor, # Applying preprocessing steps
93
- clf # Training linear regression model
94
  )
95
 
96
- # Fit the model and train model to predict anomalies
97
- model_pipeline.fit(X_train)
98
- y_test_pred = model_pipeline.predict(X_test)
99
-
100
- # Evaluate the model
101
- f1 = f1_score(y_test, y_test_pred)
102
- conf_matrix = confusion_matrix(y_test, y_test_pred)
103
- model_pipeline.named_steps
104
- #
105
 
 
47
  # Ignore all warnings
48
  warnings.filterwarnings("ignore")
49
 
50
+ # Run the training script placed in the same directory as app.py
51
+ # The training script will train and persist a linear regression
52
+ # model with the filename 'model.joblib'
53
+ subprocess.run(['python', 'train.py'])
54
+
55
+ # Load the freshly trained model from disk
56
+ model = joblib.load("model.joblib")
57
+
58
+ # Define the predict function
59
+ def predict(csv_filename):
60
+ # Read the CSV file
61
+ df = pd.read_csv(csv_filename, header=None)
62
+
63
+ # Convert the DataFrame to a list of floats
64
+ client_data = df.iloc[0].tolist()
65
+
66
+ # Check if the length of client_data is 29
67
+ if len(client_data) != 29:
68
+ raise ValueError("The CSV file must contain exactly 29 values.")
69
+
70
+ # Unpack the list of values
71
+ V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, V22, V23, V24, V25, V26, V27, V28, Amount = client_data
72
+
73
+ # Create the data dictionary
74
+ data = {
75
+ 'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10,
76
+ 'V11': V11, 'V12': V12, 'V13': V13, 'V14': V14, 'V15': V15, 'V16': V16, 'V17': V17, 'V18': V18, 'V19': V19, 'V20': V20,
77
+ 'V21': V21, 'V22': V22, 'V23': V23, 'V24': V24, 'V25': V25, 'V26': V26, 'V27': V27, 'V28': V28, 'Amount': Amount
78
+ }
79
+
80
+ # Convert the data dictionary to a DataFrame
81
+ input_df = pd.DataFrame([data])
82
+
83
+ # Make predictions using the loaded model
84
+ prediction = model.predict(input_df)
85
+
86
+ return prediction[0], Amount # Return both the prediction and Amount
87
+
88
+ # Define a function to map the names to their respective CSV filenames
89
+ def get_csv_filename(name):
90
+ name_to_filename = {
91
+ 'Ted': 'Ted.csv',
92
+ 'Bill': 'Bill.csv',
93
+ 'Jill': 'Jill.csv',
94
+ 'Juan': 'Juan.csv'
95
+ }
96
+ return name_to_filename.get(name, 'Ted.csv') # Default to 'Ted.csv' if name not found
97
+
98
+ # Define the Gradio interface function for single prediction
99
+ def gradio_predict(name):
100
+ csv_filename = get_csv_filename(name)
101
+ prediction, amount = predict(csv_filename)
102
+ return f"The flagged transaction amount is {amount} and the prediction is {prediction}"
103
+
104
+ # Define the function for bulk analysis
105
+ def bulk_analysis(file):
106
+ # Read the uploaded CSV file
107
+ df = pd.read_csv(file.name)
108
+
109
+ # Assuming the last column is 'Amount' and the rest are features
110
+ X_test = df.iloc[:, :-1]
111
+ y_test = df.iloc[:, -1]
112
+
113
+ # Make predictions using the loaded model
114
+ y_test_pred = model.predict(X_test)
115
+
116
+ # Debugging: Print counts of anomalies in actual and predicted
117
+ actual_anomalies = sum(y_test == 1)
118
+ predicted_anomalies = sum(y_test_pred == 1)
119
+ print(f"Actual anomalies: {actual_anomalies}, Predicted anomalies: {predicted_anomalies}")
120
+
121
+ # Find rows where actual and predicted are both 1
122
+ correctly_predicted_anomalies = X_test[(y_test == 1) & (y_test_pred == 1)]
123
+ print(f"Correctly predicted anomalies: {len(correctly_predicted_anomalies)}")
124
+
125
+ # Save the results to a CSV file
126
+ result_filename = "correct_anomalies.csv"
127
+ correctly_predicted_anomalies.to_csv(result_filename, index=False)
128
+
129
+ return result_filename # Return the path to the saved file
130
+
131
+
132
+ # Create the Gradio interface
133
+ iface = gr.Interface(
134
+ fn=gradio_predict,
135
+ inputs=gr.Dropdown(choices=['Ted', 'Bill', 'Jill', 'Juan'], label="Select a name"),
136
+ outputs="text"
137
  )
138
 
139
+ # Add the bulk analysis upload interface
140
+ bulk_iface = gr.Interface(
141
+ fn=bulk_analysis,
142
+ inputs=gr.File(label="Bulk Analysis"),
143
+ outputs=gr.File(label="Download Results")
144
  )
145
 
146
+ # Combine the interfaces
147
+ combined_iface = gr.TabbedInterface(
148
+ [iface, bulk_iface],
149
+ tab_names=["Single Prediction", "Bulk Analysis"]
 
 
 
150
  )
151
 
152
+ # Launch the interface
153
+ combined_iface.launch(share=True)
 
 
 
 
 
 
 
154