Spaces:

kgauvin603
/

OCW-FraudDetection

Sleeping

App Files Files Community

kgauvin603 commited on Aug 29

Commit

78ecccd

•

1 Parent(s): bf20a70

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -49

app.py CHANGED Viewed

@@ -47,59 +47,108 @@ import warnings
 # Ignore all warnings
 warnings.filterwarnings("ignore")
-# Download the dataset
-url = "http://www.ulb.ac.be/di/map/adalpozz/data/creditcard.Rdata"
-dst_path = "./creditcard.Rdata"
-wget.download(url, dst_path)
-# Load the dataset
-parsed_res = rdata.parser.parse_file(dst_path)
-res = rdata.conversion.convert(parsed_res)
-dataset = res['creditcard'].reset_index(drop=True).drop(['Time'], axis=1)
-# Prepare the data
-y = dataset['Class'].astype(int)  # Convert to integers
-df = dataset.drop(['Class'], axis=1)
-df.columns = df.columns.astype(str)
-# Split the data
-X_train, X_test, y_train, y_test = train_test_split(df, y, train_size=0.6, random_state=0, stratify=y)
-X_train, _, y_train, _ = train_test_split(X_train, y_train, train_size=0.2, random_state=0, stratify=y_train)
-# Reset indices
-X_train.reset_index(drop=True, inplace=True)
-y_train.reset_index(drop=True, inplace=True)
-# Define the numerical features and the pipeline for numerical features
-numerical_features = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
-                      'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
-                      'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']
-numerical_pipeline = make_pipeline(
-    StandardScaler()  # Example: Standardize numerical features
 )
-# Creating a column transformer named preprocessor to apply specific pipelines to numerical and categorical features separately.
-preprocessor = make_column_transformer(
-    (numerical_pipeline, numerical_features)
 )
-# Creating model
-clf = MCD()
-# Creating a pipeline combining preprocessing steps (imputation and encoding) with linear regression modeling.
-model_pipeline = make_pipeline(
-    preprocessor,  # Applying preprocessing steps
-    clf  # Training linear regression model
 )
-# Fit the model and train model to predict anomalies
-model_pipeline.fit(X_train)
-y_test_pred = model_pipeline.predict(X_test)
-# Evaluate the model
-f1 = f1_score(y_test, y_test_pred)
-conf_matrix = confusion_matrix(y_test, y_test_pred)
-model_pipeline.named_steps
-#

 # Ignore all warnings
 warnings.filterwarnings("ignore")
+# Run the training script placed in the same directory as app.py
+# The training script will train and persist a linear regression
+# model with the filename 'model.joblib'
+subprocess.run(['python', 'train.py'])
+# Load the freshly trained model from disk
+model = joblib.load("model.joblib")
+# Define the predict function
+def predict(csv_filename):
+    # Read the CSV file
+    df = pd.read_csv(csv_filename, header=None)
+    # Convert the DataFrame to a list of floats
+    client_data = df.iloc[0].tolist()
+    # Check if the length of client_data is 29
+    if len(client_data) != 29:
+        raise ValueError("The CSV file must contain exactly 29 values.")
+    # Unpack the list of values
+    V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, V22, V23, V24, V25, V26, V27, V28, Amount = client_data
+    # Create the data dictionary
+    data = {
+        'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10,
+        'V11': V11, 'V12': V12, 'V13': V13, 'V14': V14, 'V15': V15, 'V16': V16, 'V17': V17, 'V18': V18, 'V19': V19, 'V20': V20,
+        'V21': V21, 'V22': V22, 'V23': V23, 'V24': V24, 'V25': V25, 'V26': V26, 'V27': V27, 'V28': V28, 'Amount': Amount
+    }
+    # Convert the data dictionary to a DataFrame
+    input_df = pd.DataFrame([data])
+    # Make predictions using the loaded model
+    prediction = model.predict(input_df)
+    return prediction[0], Amount  # Return both the prediction and Amount
+# Define a function to map the names to their respective CSV filenames
+def get_csv_filename(name):
+    name_to_filename = {
+        'Ted': 'Ted.csv',
+        'Bill': 'Bill.csv',
+        'Jill': 'Jill.csv',
+        'Juan': 'Juan.csv'
+    }
+    return name_to_filename.get(name, 'Ted.csv')  # Default to 'Ted.csv' if name not found
+# Define the Gradio interface function for single prediction
+def gradio_predict(name):
+    csv_filename = get_csv_filename(name)
+    prediction, amount = predict(csv_filename)
+    return f"The flagged transaction amount is {amount} and the prediction is {prediction}"
+# Define the function for bulk analysis
+def bulk_analysis(file):
+    # Read the uploaded CSV file
+    df = pd.read_csv(file.name)
+    # Assuming the last column is 'Amount' and the rest are features
+    X_test = df.iloc[:, :-1]
+    y_test = df.iloc[:, -1]
+    # Make predictions using the loaded model
+    y_test_pred = model.predict(X_test)
+    # Debugging: Print counts of anomalies in actual and predicted
+    actual_anomalies = sum(y_test == 1)
+    predicted_anomalies = sum(y_test_pred == 1)
+    print(f"Actual anomalies: {actual_anomalies}, Predicted anomalies: {predicted_anomalies}")
+    # Find rows where actual and predicted are both 1
+    correctly_predicted_anomalies = X_test[(y_test == 1) & (y_test_pred == 1)]
+    print(f"Correctly predicted anomalies: {len(correctly_predicted_anomalies)}")
+    # Save the results to a CSV file
+    result_filename = "correct_anomalies.csv"
+    correctly_predicted_anomalies.to_csv(result_filename, index=False)
+    return result_filename  # Return the path to the saved file
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=gradio_predict,
+    inputs=gr.Dropdown(choices=['Ted', 'Bill', 'Jill', 'Juan'], label="Select a name"),
+    outputs="text"
 )
+# Add the bulk analysis upload interface
+bulk_iface = gr.Interface(
+    fn=bulk_analysis,
+    inputs=gr.File(label="Bulk Analysis"),
+    outputs=gr.File(label="Download Results")
 )
+# Combine the interfaces
+combined_iface = gr.TabbedInterface(
+    [iface, bulk_iface],
+    tab_names=["Single Prediction", "Bulk Analysis"]
 )
+# Launch the interface
+combined_iface.launch(share=True)