Spaces:

kgauvin603
/

OCW-FraudDetection

Sleeping

App Files Files Community

OCW-FraudDetection / app.py

kgauvin603

Create app.py

4748e1b verified about 1 month ago

raw

history blame

No virus

3.71 kB


	# Run the training script placed in the same directory as app.py
	# The training script will train and persist a linear regression
	# model with the filename 'model.joblib'
	subprocess.run(['python', 'train.py'])

	# Load the freshly trained model from disk
	model = joblib.load("model.joblib")

	# Define the predict function
	def predict(csv_filename):
	# Read the CSV file
	df = pd.read_csv(csv_filename, header=None)

	# Convert the DataFrame to a list of floats
	client_data = df.iloc[0].tolist()

	# Check if the length of client_data is 29
	if len(client_data) != 29:
	raise ValueError("The CSV file must contain exactly 29 values.")

	# Unpack the list of values
	V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, V22, V23, V24, V25, V26, V27, V28, Amount = client_data

	# Create the data dictionary
	data = {
	'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10,
	'V11': V11, 'V12': V12, 'V13': V13, 'V14': V14, 'V15': V15, 'V16': V16, 'V17': V17, 'V18': V18, 'V19': V19, 'V20': V20,
	'V21': V21, 'V22': V22, 'V23': V23, 'V24': V24, 'V25': V25, 'V26': V26, 'V27': V27, 'V28': V28, 'Amount': Amount
	}

	# Convert the data dictionary to a DataFrame
	input_df = pd.DataFrame([data])

	# Make predictions using the loaded model
	prediction = model.predict(input_df)

	return prediction[0], Amount # Return both the prediction and Amount

	# Define a function to map the names to their respective CSV filenames
	def get_csv_filename(name):
	name_to_filename = {
	'Ted': 'Ted.csv',
	'Bill': 'Bill.csv',
	'Jill': 'Jill.csv',
	'Juan': 'Juan.csv'
	}
	return name_to_filename.get(name, 'Ted.csv') # Default to 'Ted.csv' if name not found

	# Define the Gradio interface function for single prediction
	def gradio_predict(name):
	csv_filename = get_csv_filename(name)
	prediction, amount = predict(csv_filename)
	return f"The flagged transaction amount is {amount} and the prediction is {prediction}"

	# Define the function for bulk analysis
	def bulk_analysis(file):
	# Read the uploaded CSV file
	df = pd.read_csv(file.name)

	# Assuming the last column is 'Amount' and the rest are features
	X_test = df.iloc[:, :-1]
	y_test = df.iloc[:, -1]

	# Make predictions using the loaded model
	y_test_pred = model.predict(X_test)

	# Debugging: Print counts of anomalies in actual and predicted
	actual_anomalies = sum(y_test == 1)
	predicted_anomalies = sum(y_test_pred == 1)
	print(f"Actual anomalies: {actual_anomalies}, Predicted anomalies: {predicted_anomalies}")

	# Find rows where actual and predicted are both 1
	correctly_predicted_anomalies = X_test[(y_test == 1) & (y_test_pred == 1)]
	print(f"Correctly predicted anomalies: {len(correctly_predicted_anomalies)}")

	# Save the results to a CSV file
	result_filename = "correct_anomalies.csv"
	correctly_predicted_anomalies.to_csv(result_filename, index=False)

	return result_filename # Return the path to the saved file


	# Create the Gradio interface
	iface = gr.Interface(
	fn=gradio_predict,
	inputs=gr.Dropdown(choices=['Ted', 'Bill', 'Jill', 'Juan'], label="Select a name"),
	outputs="text"
	)

	# Add the bulk analysis upload interface
	bulk_iface = gr.Interface(
	fn=bulk_analysis,
	inputs=gr.File(label="Bulk Analysis"),
	outputs=gr.File(label="Download Results")
	)

	# Combine the interfaces
	combined_iface = gr.TabbedInterface(
	[iface, bulk_iface],
	tab_names=["Single Prediction", "Bulk Analysis"]
	)

	# Launch the interface
	combined_iface.launch(share=True)