import numpy as np import os import re import datetime import time import openai, tenacity import argparse import configparser import json import tiktoken from get_paper_from_pdf import Paper import gradio # 定义Reviewer类 class Reviewer: # 初始化方法,设置属性 def __init__(self, api, review_format, paper_pdf, language): self.api = api self.review_format = review_format self.language = language self.max_token_num = 4096 self.encoding = tiktoken.get_encoding("gpt2") def validateTitle(self, title): # 修正论文的路径格式 rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |' new_title = re.sub(rstr, "_", title) # 替换为下划线 return new_title def review_by_chatgpt(self, paper_list): for paper_index, paper in enumerate(paper_list): sections_of_interest = self.stage_1(paper) # extract the essential parts of the paper text = '' text += 'Title:' + paper.title + '. ' text += 'Abstract: ' + paper.section_texts['Abstract'] intro_title = next((item for item in paper.section_names if 'ntroduction' in item.lower()), None) if intro_title is not None: text += 'Introduction: ' + paper.section_texts[intro_title] # Similar for conclusion section conclusion_title = next((item for item in paper.section_names if 'onclusion' in item), None) if conclusion_title is not None: text += 'Conclusion: ' + paper.section_texts[conclusion_title] for heading in sections_of_interest: if heading in paper.section_names: text += heading + ': ' + paper.section_texts[heading] chat_review_text, total_token_used = self.chat_review(text=text) return chat_review_text, total_token_used def stage_1(self, paper): htmls = [] text = '' text += 'Title: ' + paper.title + '. ' text += 'Abstract: ' + paper.section_texts['Abstract'] openai.api_key = self.api messages = [ {"role": "system", "content": f"You are a professional reviewer. " f"I will give you a paper. You need to review this paper and discuss the novelty and originality of ideas, correctness, clarity, the significance of results, potential impact and quality of the presentation. " f"Due to the length limitations, I am only allowed to provide you the abstract, introduction, conclusion and at most two sections of this paper." f"Now I will give you the title and abstract and the headings of potential sections. " f"You need to reply at most two headings. Then I will further provide you the full information, includes aforementioned sections and at most two sections you called for.\n\n" f"Title: {paper.title}\n\n" f"Abstract: {paper.section_texts['Abstract']}\n\n" f"Potential Sections: {paper.section_names[2:-1]}\n\n" f"Follow the following format to output your choice of sections:" f"{{chosen section 1}}, {{chosen section 2}}\n\n"}, {"role": "user", "content": text}, ] response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages, ) result = '' for choice in response.choices: result += choice.message.content # print(result) return result.split(',') @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), stop=tenacity.stop_after_attempt(5), reraise=True) def chat_review(self, text): openai.api_key = self.api # 读取api review_prompt_token = 1000 text_token = len(self.encoding.encode(text)) input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/text_token) input_text = "This is the paper for your review:" + text[:input_text_index] messages=[ {"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format +" Please answer in {}.".format(self.language)}, {"role": "user", "content": input_text}, ] response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages, ) result = '' for choice in response.choices: result += choice.message.content print("********"*10) print(result) print("********"*10) print("prompt_token_used:", response.usage.prompt_tokens) print("completion_token_used:", response.usage.completion_tokens) print("total_token_used:", response.usage.total_tokens) print("response_time:", response.response_ms/1000.0, 's') return result, response.usage.total_tokens def main(api, review_format, paper_pdf, language): start_time = time.time() if not api or not review_format or not paper_pdf: return "请输入完整内容!" # 判断PDF文件 else: paper_list = [Paper(path=paper_pdf)] # 创建一个Reader对象 reviewer1 = Reviewer(api, review_format, paper_pdf, language) # 开始判断是路径还是文件: comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_list) time_used = time.time() - start_time output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(time_used) +"秒" return comments, output2 ######################################################################################################## # 标题 title = "ChatReviewer" # 描述 description = '''