Spaces:

joinportiko
/

scientist_stalker

Sleeping

App Files Files Community

scientist_stalker / app.py

joinportiko

Update app.py

1461437 verified 5 months ago

raw

history blame

4.17 kB

	import streamlit as st
	from Bio import Entrez, Medline
	import pandas as pd
	from transformers import pipeline
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt

	# Configure the Entrez API
	Entrez.email = "[email protected]" # Change this to your actual email

	# Load the summarization model from Hugging Face
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	# Function to fetch papers by author
	def fetch_papers(author_name):
	search_query = f"{author_name}[Author]"
	handle = Entrez.esearch(db="pubmed", term=search_query, retmax=50, sort="relevance")
	record = Entrez.read(handle)
	handle.close()
	id_list = record["IdList"]
	return id_list

	# Function to fetch details of PubMed articles
	def fetch_paper_details(pubmed_ids):
	handle = Entrez.efetch(db="pubmed", id=pubmed_ids, rettype="medline", retmode="text")
	records = list(Medline.parse(handle))
	handle.close()
	return records

	# Function to extract abstracts
	def extract_abstracts(records):
	abstracts = []
	for record in records:
	abstract = record.get("AB", "")
	if abstract:
	abstracts.append(abstract)
	return abstracts

	# Function to extract keywords
	def extract_keywords(records):
	keywords = []
	for record in records:
	keywords.extend(record.get("OT", []))
	return keywords

	# Function to extract journal names and links
	def extract_journals(records):
	journals = []
	for record in records:
	journal = record.get("JT", "")
	pmid = record.get("PMID", "")
	if journal and pmid:
	link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
	journals.append((journal, link))
	return journals

	# Function to generate a summary using the Hugging Face model
	def generate_summary(abstract):
	max_length = min(1024, len(abstract)) # Adjust max_length based on the abstract length
	summary = summarizer(abstract[:max_length], max_length=80, min_length=40, do_sample=False)
	return summary[0]['summary_text']

	# Streamlit app
	st.title("Researcher Profile")

	# Input from user
	email = st.text_input("Enter your email")
	author_name = st.text_input("Enter the researcher's name")

	if st.button("Fetch Data"):
	if email and author_name:
	# Fetch papers
	pubmed_ids = fetch_papers(author_name)
	records = fetch_paper_details(pubmed_ids)

	if records:
	# Extract abstracts
	abstracts = extract_abstracts(records)

	if abstracts:
	# Generate and display the summary
	st.subheader("Research Summary")
	st.write("Summary of the last abstracts of the researcher")
	for i, abstract in enumerate(abstracts[:5]):
	summary = generate_summary(abstract)
	st.write(f"{i+1}. {summary}")

	# Extract keywords
	keywords = extract_keywords(records)

	if keywords:
	# Generate and display the word cloud
	st.subheader("Key of this researcher's papers")
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(' '.join(keywords))
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis('off')
	st.pyplot(plt)
	else:
	st.write("No keywords found.")

	# Extract and display the last three journals with links
	journals = extract_journals(records)
	if journals:
	st.subheader("Last Journals Published In")
	for journal, link in journals[:3]:
	st.markdown(f"[{journal}]({link})")
	else:
	st.write("No journal information found.")
	else:
	st.write("No abstracts found.")
	else:
	st.write("No records found.")
	else:
	st.write("Please enter both email and researcher name.")