joinportiko's picture
Update app.py
1461437 verified
raw
history blame
4.17 kB
import streamlit as st
from Bio import Entrez, Medline
import pandas as pd
from transformers import pipeline
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# Configure the Entrez API
Entrez.email = "[email protected]" # Change this to your actual email
# Load the summarization model from Hugging Face
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to fetch papers by author
def fetch_papers(author_name):
search_query = f"{author_name}[Author]"
handle = Entrez.esearch(db="pubmed", term=search_query, retmax=50, sort="relevance")
record = Entrez.read(handle)
handle.close()
id_list = record["IdList"]
return id_list
# Function to fetch details of PubMed articles
def fetch_paper_details(pubmed_ids):
handle = Entrez.efetch(db="pubmed", id=pubmed_ids, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
handle.close()
return records
# Function to extract abstracts
def extract_abstracts(records):
abstracts = []
for record in records:
abstract = record.get("AB", "")
if abstract:
abstracts.append(abstract)
return abstracts
# Function to extract keywords
def extract_keywords(records):
keywords = []
for record in records:
keywords.extend(record.get("OT", []))
return keywords
# Function to extract journal names and links
def extract_journals(records):
journals = []
for record in records:
journal = record.get("JT", "")
pmid = record.get("PMID", "")
if journal and pmid:
link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
journals.append((journal, link))
return journals
# Function to generate a summary using the Hugging Face model
def generate_summary(abstract):
max_length = min(1024, len(abstract)) # Adjust max_length based on the abstract length
summary = summarizer(abstract[:max_length], max_length=80, min_length=40, do_sample=False)
return summary[0]['summary_text']
# Streamlit app
st.title("Researcher Profile")
# Input from user
email = st.text_input("Enter your email")
author_name = st.text_input("Enter the researcher's name")
if st.button("Fetch Data"):
if email and author_name:
# Fetch papers
pubmed_ids = fetch_papers(author_name)
records = fetch_paper_details(pubmed_ids)
if records:
# Extract abstracts
abstracts = extract_abstracts(records)
if abstracts:
# Generate and display the summary
st.subheader("Research Summary")
st.write("Summary of the last abstracts of the researcher")
for i, abstract in enumerate(abstracts[:5]):
summary = generate_summary(abstract)
st.write(f"{i+1}. {summary}")
# Extract keywords
keywords = extract_keywords(records)
if keywords:
# Generate and display the word cloud
st.subheader("Key of this researcher's papers")
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(keywords))
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
st.pyplot(plt)
else:
st.write("No keywords found.")
# Extract and display the last three journals with links
journals = extract_journals(records)
if journals:
st.subheader("Last Journals Published In")
for journal, link in journals[:3]:
st.markdown(f"[{journal}]({link})")
else:
st.write("No journal information found.")
else:
st.write("No abstracts found.")
else:
st.write("No records found.")
else:
st.write("Please enter both email and researcher name.")