File size: 2,181 Bytes
ae92cb7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from typing import List
from app.rag.llm import LLM
#the LLM Class uses the OPENAI_API_KEY env var as the default api_key
async def summarize_it(question: str,
search_results: List[str],
model: str = 'gpt-3.5-turbo-0125',
) -> str:
# TODO turn this into a class if time allows
llm = LLM(model)
system_message = """
You are able to quickly understand a few paragraphs, or quips even, generated by vector search system
and generate a one-line summary.
"""
searches = "\n".join([f"Search result {i}: {v}" for i,v in enumerate(search_results,1)])
user_prompt = f"""
Use the below context enclosed in triple back ticks to answer the question. \n
The context is given by a vector search into a vector database made from the company's documents,
so you can assume the context is accurate. \n
```
Context:
```
{searches}
```
Question:\n
{question}\n
------------------------
1. If the context is not relevant to the question, simply say 'Irrelevant content' and nothing else.
Pay great attention to making sure your answer is relevant to the question and the context.
(for instance, never answer a question about a topic that is not explicitely mentioned in the question)
2. Using any external knowledge or resources to answer the question is forbidden.
3. Generate a ONE-LINE ONE-LINE summary within the limits of the context and the question.
4. Avoid mentioning 'search results' in the answer.
Instead, incorporate the information from the search results into the answer.
5. Create a clean answer, without backticks, or starting with a new line for instance.
------------------------
Answer:\n
""".format(searches=searches, question=question)
response = await llm.chat_completion(system_message=system_message,
user_message=user_prompt,
temperature=0.01, # let's not allow the model to be creative
stream=False,
raw_response=False)
return response |