AbdulMoid's picture
Update graph_rag.py
88c46f2 verified
import os
import zipfile
import re
import shutil
import logging
import subprocess
from dotenv import load_dotenv
import gradio as gr
from utils import patient_info # Importing patient_info from utils
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Load environment variables
load_dotenv()
def unzip_folder(zip_path, extract_path):
output_dir = os.path.join(extract_path, "ragtest")
os.makedirs(output_dir, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_path)
# Adjust to handle the extra ragtest folder inside the zip
actual_output_dir = os.path.join(output_dir, "ragtest")
if os.path.exists(actual_output_dir):
logger.info(f"Extracted contents to {actual_output_dir}")
logger.info(f"Contents of {actual_output_dir}:")
for file in os.listdir(actual_output_dir):
logger.info(os.path.join(actual_output_dir, file))
else:
logger.error(f"Expected directory {actual_output_dir} does not exist. Check the structure of the zip file.")
actual_output_dir = output_dir # fallback in case the structure is not as expected
return actual_output_dir
def run_graphrag_query(query, ragtest_dir):
# Log the directory and its contents
logger.info(f"Running GraphRAG query with root: {ragtest_dir}")
logger.info(f"Contents of {ragtest_dir}:")
for file in os.listdir(ragtest_dir):
logger.info(os.path.join(ragtest_dir, file))
# Define the command
command = [
"python", "-m", "graphrag.query",
"--root", ragtest_dir,
"--method", "global",
query
]
# Run the command
result = subprocess.run(command, capture_output=True, text=True)
# Return the output or error message
if result.returncode == 0:
return result.stdout
else:
logger.error(f"GraphRAG query failed with error: {result.stderr}")
return result.stderr
def clean_response(response):
# Find the position of "SUCCESS: Global Search Response:"
search_str = "SUCCESS: Global Search Response:"
start_index = response.find(search_str)
# If the search string is found, return the substring starting from after this string
if start_index != -1:
# Add the length of search_str to start_index to begin after the search string
return response[start_index + len(search_str):].strip()
else:
# If the search string is not found, return the original response
return response
def qa_tool_graph_rag(user_question):
original_dir = os.getcwd() # Store the original directory
try:
zip_path = os.getenv('ZIP_PATH', '/home/user/app/ragtest.zip')
extract_path = os.getenv('EXTRACT_PATH', '/home/user/app')
output_dir = unzip_folder(zip_path, extract_path)
os.chdir(extract_path)
# Combine patient_info with user_question
combined_input = f"""
You are an expert Oncologist assistant tasked with providing a personalized treatment recommendation based on the question, patient description and Knowledge Graph.
**Question:** {user_question}
**Patient Description:** {patient_info["description"]}
Please provide a detailed and accurate answer to the question based on the patient description and the context. Ensure your response is tailored to the patient’s specific situation as described. Your response should include information found within this template below,
1. Initial Work Up
- Include relevant details such as History and Physical, Baseline Labs, Pathology Review/Tumor Biopsy, Additional Imaging, Genetic Testing, Fertility considerations, and Psychological Evaluation (therapist).
2. Neoadjuvant Chemotherapy
- Indicate if it's recommended, and specify the type (e.g., chemotherapy, HER2 Targeted Therapy, PARP Inhibitors) if applicable.
3. Surgery
- Outline surgical options like Lumpectomy, Lymph Node Dissection, Mastectomy, or Bilateral Mastectomy, as relevant. Include any additional surgical considerations such as Oophorectomy (if relevant).
4. Systemic Therapies
- Discuss Chemotherapy, Endocrine (hormone) therapy (indicate if not relevant), HER2 Targeted Therapy (indicate if not relevant), PARP Inhibitors (if relevant), and Immunotherapies (if relevant), with a focus on the relevance to the patient's condition.
5. Radiation
- Describe options such as Whole Breast, Boost to Tumor Bed, and Node Irradiation, and indicate relevance to the treatment plan.
6. Long Term Therapies
- Mention HER2 targeted therapies or other long-term treatments (if relevant), such as endocrine therapy or immune checkpoint inhibitors, and specify their duration and relevance.
7. Special Considerations
- Include additional considerations such as Psychotherapy, Lifestyle Modifications (e.g., sexual health and fertility counseling), and Clinical Trials (note relevance). Also, consider additional surgical options like Oophorectomy, if applicable.
8. Follow Up/Monitoring
- Provide recommendations for managing comorbidities (e.g., cardiac, diabetes, liver, kidney). Include suggestions for monitoring concerning symptoms (e.g., bone pain, unexplained weight loss) and follow-up visits to assess treatment effectiveness and long-term care.
If the patient’s specific situation as described is not sufficient to answer the question completely, please state so and provide the best possible answer with the available information.
The Knowledge Graph is built on 3 NCCN documents - NCCN Evidence Blocks (2024), NCCN Guidelines for High Risk Genetic Disorders (2024), Management of Immunotherapy-Related Toxicities (2024)
**Important:** Include the exact references to the NCCN documents (using the full document names listed above) along with the corresponding page numbers from which the information was taken. Ensure that each piece of information in your response can be traced back to the specific page(s) of the document(s). Include REFERENCES section in the end listing documents and page numbers. And let the document year be 2024 always. Do not include Data Reports in your response.
"""
# Run the GraphRAG query with the combined input
raw_answer = run_graphrag_query(combined_input, output_dir)
# Clean the response to remove everything before "SUCCESS: Global Search Response:"
answer = clean_response(raw_answer)
logger.info(f"GraphRAG answer generated: {answer}")
images = [] # Adjust as needed for your application
return answer, images, gr.update(visible=True), gr.update(visible=True)
except Exception as e:
logger.error(f"Error in GraphRAG processing: {str(e)}")
return f"An error occurred: {str(e)}", [], gr.update(visible=False), gr.update(visible=False)
finally:
if 'output_dir' in locals():
shutil.rmtree(output_dir)
os.chdir(original_dir)