Spaces:
Sleeping
Sleeping
kgauvin603
commited on
Commit
•
78ecccd
1
Parent(s):
bf20a70
Update app.py
Browse files
app.py
CHANGED
@@ -47,59 +47,108 @@ import warnings
|
|
47 |
# Ignore all warnings
|
48 |
warnings.filterwarnings("ignore")
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
# Load the
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
#
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
#
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
)
|
81 |
|
82 |
-
#
|
83 |
-
|
84 |
-
|
|
|
|
|
85 |
)
|
86 |
|
87 |
-
#
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
model_pipeline = make_pipeline(
|
92 |
-
preprocessor, # Applying preprocessing steps
|
93 |
-
clf # Training linear regression model
|
94 |
)
|
95 |
|
96 |
-
#
|
97 |
-
|
98 |
-
y_test_pred = model_pipeline.predict(X_test)
|
99 |
-
|
100 |
-
# Evaluate the model
|
101 |
-
f1 = f1_score(y_test, y_test_pred)
|
102 |
-
conf_matrix = confusion_matrix(y_test, y_test_pred)
|
103 |
-
model_pipeline.named_steps
|
104 |
-
#
|
105 |
|
|
|
47 |
# Ignore all warnings
|
48 |
warnings.filterwarnings("ignore")
|
49 |
|
50 |
+
# Run the training script placed in the same directory as app.py
|
51 |
+
# The training script will train and persist a linear regression
|
52 |
+
# model with the filename 'model.joblib'
|
53 |
+
subprocess.run(['python', 'train.py'])
|
54 |
+
|
55 |
+
# Load the freshly trained model from disk
|
56 |
+
model = joblib.load("model.joblib")
|
57 |
+
|
58 |
+
# Define the predict function
|
59 |
+
def predict(csv_filename):
|
60 |
+
# Read the CSV file
|
61 |
+
df = pd.read_csv(csv_filename, header=None)
|
62 |
+
|
63 |
+
# Convert the DataFrame to a list of floats
|
64 |
+
client_data = df.iloc[0].tolist()
|
65 |
+
|
66 |
+
# Check if the length of client_data is 29
|
67 |
+
if len(client_data) != 29:
|
68 |
+
raise ValueError("The CSV file must contain exactly 29 values.")
|
69 |
+
|
70 |
+
# Unpack the list of values
|
71 |
+
V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, V22, V23, V24, V25, V26, V27, V28, Amount = client_data
|
72 |
+
|
73 |
+
# Create the data dictionary
|
74 |
+
data = {
|
75 |
+
'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10,
|
76 |
+
'V11': V11, 'V12': V12, 'V13': V13, 'V14': V14, 'V15': V15, 'V16': V16, 'V17': V17, 'V18': V18, 'V19': V19, 'V20': V20,
|
77 |
+
'V21': V21, 'V22': V22, 'V23': V23, 'V24': V24, 'V25': V25, 'V26': V26, 'V27': V27, 'V28': V28, 'Amount': Amount
|
78 |
+
}
|
79 |
+
|
80 |
+
# Convert the data dictionary to a DataFrame
|
81 |
+
input_df = pd.DataFrame([data])
|
82 |
+
|
83 |
+
# Make predictions using the loaded model
|
84 |
+
prediction = model.predict(input_df)
|
85 |
+
|
86 |
+
return prediction[0], Amount # Return both the prediction and Amount
|
87 |
+
|
88 |
+
# Define a function to map the names to their respective CSV filenames
|
89 |
+
def get_csv_filename(name):
|
90 |
+
name_to_filename = {
|
91 |
+
'Ted': 'Ted.csv',
|
92 |
+
'Bill': 'Bill.csv',
|
93 |
+
'Jill': 'Jill.csv',
|
94 |
+
'Juan': 'Juan.csv'
|
95 |
+
}
|
96 |
+
return name_to_filename.get(name, 'Ted.csv') # Default to 'Ted.csv' if name not found
|
97 |
+
|
98 |
+
# Define the Gradio interface function for single prediction
|
99 |
+
def gradio_predict(name):
|
100 |
+
csv_filename = get_csv_filename(name)
|
101 |
+
prediction, amount = predict(csv_filename)
|
102 |
+
return f"The flagged transaction amount is {amount} and the prediction is {prediction}"
|
103 |
+
|
104 |
+
# Define the function for bulk analysis
|
105 |
+
def bulk_analysis(file):
|
106 |
+
# Read the uploaded CSV file
|
107 |
+
df = pd.read_csv(file.name)
|
108 |
+
|
109 |
+
# Assuming the last column is 'Amount' and the rest are features
|
110 |
+
X_test = df.iloc[:, :-1]
|
111 |
+
y_test = df.iloc[:, -1]
|
112 |
+
|
113 |
+
# Make predictions using the loaded model
|
114 |
+
y_test_pred = model.predict(X_test)
|
115 |
+
|
116 |
+
# Debugging: Print counts of anomalies in actual and predicted
|
117 |
+
actual_anomalies = sum(y_test == 1)
|
118 |
+
predicted_anomalies = sum(y_test_pred == 1)
|
119 |
+
print(f"Actual anomalies: {actual_anomalies}, Predicted anomalies: {predicted_anomalies}")
|
120 |
+
|
121 |
+
# Find rows where actual and predicted are both 1
|
122 |
+
correctly_predicted_anomalies = X_test[(y_test == 1) & (y_test_pred == 1)]
|
123 |
+
print(f"Correctly predicted anomalies: {len(correctly_predicted_anomalies)}")
|
124 |
+
|
125 |
+
# Save the results to a CSV file
|
126 |
+
result_filename = "correct_anomalies.csv"
|
127 |
+
correctly_predicted_anomalies.to_csv(result_filename, index=False)
|
128 |
+
|
129 |
+
return result_filename # Return the path to the saved file
|
130 |
+
|
131 |
+
|
132 |
+
# Create the Gradio interface
|
133 |
+
iface = gr.Interface(
|
134 |
+
fn=gradio_predict,
|
135 |
+
inputs=gr.Dropdown(choices=['Ted', 'Bill', 'Jill', 'Juan'], label="Select a name"),
|
136 |
+
outputs="text"
|
137 |
)
|
138 |
|
139 |
+
# Add the bulk analysis upload interface
|
140 |
+
bulk_iface = gr.Interface(
|
141 |
+
fn=bulk_analysis,
|
142 |
+
inputs=gr.File(label="Bulk Analysis"),
|
143 |
+
outputs=gr.File(label="Download Results")
|
144 |
)
|
145 |
|
146 |
+
# Combine the interfaces
|
147 |
+
combined_iface = gr.TabbedInterface(
|
148 |
+
[iface, bulk_iface],
|
149 |
+
tab_names=["Single Prediction", "Bulk Analysis"]
|
|
|
|
|
|
|
150 |
)
|
151 |
|
152 |
+
# Launch the interface
|
153 |
+
combined_iface.launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|