import streamlit as st
from Bio import Entrez, Medline
import pandas as pd
from transformers import pipeline
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# Configure the Entrez API = "[email protected]" # Change this to your actual email
# Load the summarization model from Hugging Face
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Function to fetch papers by author
def fetch_papers(author_name):
search_query = f"{author_name}[Author]"
handle = Entrez.esearch(db="pubmed", term=search_query, retmax=50, sort="relevance")
record =
id_list = record["IdList"]
return id_list
# Function to fetch details of PubMed articles
def fetch_paper_details(pubmed_ids):
handle = Entrez.efetch(db="pubmed", id=pubmed_ids, rettype="medline", retmode="text")
records = list(Medline.parse(handle))
return records
# Function to extract abstracts
def extract_abstracts(records):
abstracts = []
for record in records:
abstract = record.get("AB", "")
if abstract:
return abstracts
# Function to extract keywords
def extract_keywords(records):
keywords = []
for record in records:
keywords.extend(record.get("OT", []))
return keywords
# Function to extract journal names and links
def extract_journals(records):
journals = []
for record in records:
journal = record.get("JT", "")
pmid = record.get("PMID", "")
if journal and pmid:
link = f"{pmid}/"
journals.append((journal, link))
return journals
# Function to generate a summary using the Hugging Face model
def generate_summary(abstract):
max_length = min(1024, len(abstract)) # Adjust max_length based on the abstract length
summary = summarizer(abstract[:max_length], max_length=80, min_length=40, do_sample=False)
return summary[0]['summary_text']
# Streamlit app
st.title("Researcher Profile")
# Input from user
email = st.text_input("Enter your email")
author_name = st.text_input("Enter the researcher's name")
if st.button("Fetch Data"):
if email and author_name:
# Fetch papers
pubmed_ids = fetch_papers(author_name)
records = fetch_paper_details(pubmed_ids)
if records:
# Extract abstracts
abstracts = extract_abstracts(records)
if abstracts:
# Generate and display the summary
st.subheader("Research Summary")
st.write("Summary of the last abstracts of the researcher")
for i, abstract in enumerate(abstracts[:5]):
summary = generate_summary(abstract)
st.write(f"{i+1}. {summary}")
# Extract keywords
keywords = extract_keywords(records)
if keywords:
# Generate and display the word cloud
st.subheader("Key of this researcher's papers")
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(keywords))
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
st.write("No keywords found.")
# Extract and display the last three journals with links
journals = extract_journals(records)
if journals:
st.subheader("Last Journals Published In")
for journal, link in journals[:3]:
st.write("No journal information found.")
st.write("No abstracts found.")
st.write("No records found.")
st.write("Please enter both email and researcher name.")