Spaces:
Sleeping
Sleeping
import streamlit as st | |
from Bio import Entrez, Medline | |
import pandas as pd | |
from transformers import pipeline | |
from wordcloud import WordCloud | |
import matplotlib.pyplot as plt | |
# Configure the Entrez API | |
Entrez.email = "[email protected]" # Change this to your actual email | |
# Load the summarization model from Hugging Face | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Function to fetch papers by author | |
def fetch_papers(author_name): | |
search_query = f"{author_name}[Author]" | |
handle = Entrez.esearch(db="pubmed", term=search_query, retmax=50, sort="relevance") | |
record = Entrez.read(handle) | |
handle.close() | |
id_list = record["IdList"] | |
return id_list | |
# Function to fetch details of PubMed articles | |
def fetch_paper_details(pubmed_ids): | |
handle = Entrez.efetch(db="pubmed", id=pubmed_ids, rettype="medline", retmode="text") | |
records = list(Medline.parse(handle)) | |
handle.close() | |
return records | |
# Function to extract abstracts | |
def extract_abstracts(records): | |
abstracts = [] | |
for record in records: | |
abstract = record.get("AB", "") | |
if abstract: | |
abstracts.append(abstract) | |
return abstracts | |
# Function to extract keywords | |
def extract_keywords(records): | |
keywords = [] | |
for record in records: | |
keywords.extend(record.get("OT", [])) | |
return keywords | |
# Function to extract journal names and links | |
def extract_journals(records): | |
journals = [] | |
for record in records: | |
journal = record.get("JT", "") | |
pmid = record.get("PMID", "") | |
if journal and pmid: | |
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" | |
journals.append((journal, link)) | |
return journals | |
# Function to generate a summary using the Hugging Face model | |
def generate_summary(abstract): | |
max_length = min(1024, len(abstract)) # Adjust max_length based on the abstract length | |
summary = summarizer(abstract[:max_length], max_length=80, min_length=40, do_sample=False) | |
return summary[0]['summary_text'] | |
# Streamlit app | |
st.title("Researcher Profile") | |
# Input from user | |
email = st.text_input("Enter your email") | |
author_name = st.text_input("Enter the researcher's name") | |
if st.button("Fetch Data"): | |
if email and author_name: | |
# Fetch papers | |
pubmed_ids = fetch_papers(author_name) | |
records = fetch_paper_details(pubmed_ids) | |
if records: | |
# Extract abstracts | |
abstracts = extract_abstracts(records) | |
if abstracts: | |
# Generate and display the summary | |
st.subheader("Research Summary") | |
st.write("Summary of the last abstracts of the researcher") | |
for i, abstract in enumerate(abstracts[:5]): | |
summary = generate_summary(abstract) | |
st.write(f"{i+1}. {summary}") | |
# Extract keywords | |
keywords = extract_keywords(records) | |
if keywords: | |
# Generate and display the word cloud | |
st.subheader("Key of this researcher's papers") | |
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(keywords)) | |
plt.figure(figsize=(10, 5)) | |
plt.imshow(wordcloud, interpolation='bilinear') | |
plt.axis('off') | |
st.pyplot(plt) | |
else: | |
st.write("No keywords found.") | |
# Extract and display the last three journals with links | |
journals = extract_journals(records) | |
if journals: | |
st.subheader("Last Journals Published In") | |
for journal, link in journals[:3]: | |
st.markdown(f"[{journal}]({link})") | |
else: | |
st.write("No journal information found.") | |
else: | |
st.write("No abstracts found.") | |
else: | |
st.write("No records found.") | |
else: | |
st.write("Please enter both email and researcher name.") | |