Spaces:

Mercurial-AI
/

NCCN_based_Clinical_OncoChat

Sleeping

App Files Files Community

NCCN_based_Clinical_OncoChat / graph_rag.py

AbdulMoid

Update graph_rag.py

88c46f2 verified about 1 month ago

raw

history blame contribute delete

7.34 kB

	import os
	import zipfile
	import re
	import shutil
	import logging
	import subprocess
	from dotenv import load_dotenv
	import gradio as gr
	from utils import patient_info # Importing patient_info from utils

	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Load environment variables
	load_dotenv()

	def unzip_folder(zip_path, extract_path):
	output_dir = os.path.join(extract_path, "ragtest")
	os.makedirs(output_dir, exist_ok=True)

	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(extract_path)

	# Adjust to handle the extra ragtest folder inside the zip
	actual_output_dir = os.path.join(output_dir, "ragtest")

	if os.path.exists(actual_output_dir):
	logger.info(f"Extracted contents to {actual_output_dir}")
	logger.info(f"Contents of {actual_output_dir}:")
	for file in os.listdir(actual_output_dir):
	logger.info(os.path.join(actual_output_dir, file))
	else:
	logger.error(f"Expected directory {actual_output_dir} does not exist. Check the structure of the zip file.")
	actual_output_dir = output_dir # fallback in case the structure is not as expected

	return actual_output_dir

	def run_graphrag_query(query, ragtest_dir):
	# Log the directory and its contents
	logger.info(f"Running GraphRAG query with root: {ragtest_dir}")
	logger.info(f"Contents of {ragtest_dir}:")
	for file in os.listdir(ragtest_dir):
	logger.info(os.path.join(ragtest_dir, file))

	# Define the command
	command = [
	"python", "-m", "graphrag.query",
	"--root", ragtest_dir,
	"--method", "global",
	query
	]

	# Run the command
	result = subprocess.run(command, capture_output=True, text=True)

	# Return the output or error message
	if result.returncode == 0:
	return result.stdout
	else:
	logger.error(f"GraphRAG query failed with error: {result.stderr}")
	return result.stderr

	def clean_response(response):
	# Find the position of "SUCCESS: Global Search Response:"
	search_str = "SUCCESS: Global Search Response:"
	start_index = response.find(search_str)

	# If the search string is found, return the substring starting from after this string
	if start_index != -1:
	# Add the length of search_str to start_index to begin after the search string
	return response[start_index + len(search_str):].strip()
	else:
	# If the search string is not found, return the original response
	return response

	def qa_tool_graph_rag(user_question):
	original_dir = os.getcwd() # Store the original directory
	try:
	zip_path = os.getenv('ZIP_PATH', '/home/user/app/ragtest.zip')
	extract_path = os.getenv('EXTRACT_PATH', '/home/user/app')

	output_dir = unzip_folder(zip_path, extract_path)

	os.chdir(extract_path)

	# Combine patient_info with user_question
	combined_input = f"""
	You are an expert Oncologist assistant tasked with providing a personalized treatment recommendation based on the question, patient description and Knowledge Graph.

	Question: {user_question}
	Patient Description: {patient_info["description"]}

	Please provide a detailed and accurate answer to the question based on the patient description and the context. Ensure your response is tailored to the patient’s specific situation as described. Your response should include information found within this template below,
	1. Initial Work Up
	- Include relevant details such as History and Physical, Baseline Labs, Pathology Review/Tumor Biopsy, Additional Imaging, Genetic Testing, Fertility considerations, and Psychological Evaluation (therapist).
	2. Neoadjuvant Chemotherapy
	- Indicate if it's recommended, and specify the type (e.g., chemotherapy, HER2 Targeted Therapy, PARP Inhibitors) if applicable.
	3. Surgery
	- Outline surgical options like Lumpectomy, Lymph Node Dissection, Mastectomy, or Bilateral Mastectomy, as relevant. Include any additional surgical considerations such as Oophorectomy (if relevant).
	4. Systemic Therapies
	- Discuss Chemotherapy, Endocrine (hormone) therapy (indicate if not relevant), HER2 Targeted Therapy (indicate if not relevant), PARP Inhibitors (if relevant), and Immunotherapies (if relevant), with a focus on the relevance to the patient's condition.
	5. Radiation
	- Describe options such as Whole Breast, Boost to Tumor Bed, and Node Irradiation, and indicate relevance to the treatment plan.
	6. Long Term Therapies
	- Mention HER2 targeted therapies or other long-term treatments (if relevant), such as endocrine therapy or immune checkpoint inhibitors, and specify their duration and relevance.
	7. Special Considerations
	- Include additional considerations such as Psychotherapy, Lifestyle Modifications (e.g., sexual health and fertility counseling), and Clinical Trials (note relevance). Also, consider additional surgical options like Oophorectomy, if applicable.
	8. Follow Up/Monitoring
	- Provide recommendations for managing comorbidities (e.g., cardiac, diabetes, liver, kidney). Include suggestions for monitoring concerning symptoms (e.g., bone pain, unexplained weight loss) and follow-up visits to assess treatment effectiveness and long-term care.



	If the patient’s specific situation as described is not sufficient to answer the question completely, please state so and provide the best possible answer with the available information.

	The Knowledge Graph is built on 3 NCCN documents - NCCN Evidence Blocks (2024), NCCN Guidelines for High Risk Genetic Disorders (2024), Management of Immunotherapy-Related Toxicities (2024)

	Important: Include the exact references to the NCCN documents (using the full document names listed above) along with the corresponding page numbers from which the information was taken. Ensure that each piece of information in your response can be traced back to the specific page(s) of the document(s). Include REFERENCES section in the end listing documents and page numbers. And let the document year be 2024 always. Do not include Data Reports in your response.
	"""


	# Run the GraphRAG query with the combined input
	raw_answer = run_graphrag_query(combined_input, output_dir)

	# Clean the response to remove everything before "SUCCESS: Global Search Response:"
	answer = clean_response(raw_answer)

	logger.info(f"GraphRAG answer generated: {answer}")

	images = [] # Adjust as needed for your application

	return answer, images, gr.update(visible=True), gr.update(visible=True)

	except Exception as e:
	logger.error(f"Error in GraphRAG processing: {str(e)}")
	return f"An error occurred: {str(e)}", [], gr.update(visible=False), gr.update(visible=False)

	finally:
	if 'output_dir' in locals():
	shutil.rmtree(output_dir)

	os.chdir(original_dir)