|
|
|
import requests |
|
import os |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
import torch.nn as nn |
|
from torchsummary import summary |
|
from accelerate import dispatch_model, infer_auto_device_map |
|
from txtai import Embeddings |
|
from txtai.pipeline import LLM |
|
|
|
|
|
|
|
|
|
embeddings = Embeddings() |
|
embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def query_duckduckgo(query): |
|
"""Query DuckDuckGo API for a given search term and return the results.""" |
|
url = "https://api.duckduckgo.com/" |
|
params = { |
|
'q': query, |
|
'format': 'json', |
|
'pretty': '1', |
|
'no_html': '1' |
|
} |
|
|
|
try: |
|
response = requests.get(url, params=params) |
|
response.raise_for_status() |
|
return response.json() |
|
except requests.RequestException as e: |
|
print(f"An error occurred: {e}") |
|
return None |
|
|
|
def handle_query(user_input): |
|
"""Process user input and display the answer from DuckDuckGo.""" |
|
result = query_duckduckgo(user_input) |
|
if result and 'AbstractText' in result and result['AbstractText']: |
|
print(result['AbstractText']) |
|
else: |
|
print("DuckDuck Go failed. Going to Wiki.") |
|
result ="\n".join([x["text"] for x in embeddings.search(user_input)]) |
|
print("Restults from Wiki: \n",result) |
|
|
|
|
|
|
|
|
|
|
|
model_path = "Josephgflowers/TinyLlama-Cinder-Agent-Rag" |
|
|
|
|
|
device = torch.device("cpu") |
|
model = AutoModelForCausalLM.from_pretrained(model_path,ignore_mismatched_sizes=True).to(device) |
|
|
|
print(model) |
|
total_params = sum(p.numel() for p in model.parameters()) |
|
print("Total number of parameters: ", total_params) |
|
|
|
sequence_length = 2048 |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
stop_token =2 |
|
|
|
|
|
|
|
|
|
def chat_with_model(prompt_text, stop_token, model, tokenizer): |
|
|
|
encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt").to(device) |
|
|
|
|
|
output_sequences = model.generate( |
|
input_ids=encoded_prompt, |
|
|
|
max_new_tokens=256, |
|
temperature=0.1, |
|
repetition_penalty=1.2, |
|
top_k=20, |
|
top_p=0.9, |
|
do_sample=True, |
|
num_return_sequences=1, |
|
eos_token_id=stop_token |
|
) |
|
|
|
|
|
generated_sequence = output_sequences[0].tolist() |
|
text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True) |
|
response_text = text[len(prompt_text):].strip() |
|
|
|
return response_text |
|
|
|
|
|
|
|
conversation_history = '' |
|
|
|
|
|
input_mode = 'text' |
|
character_name = '<|user|>' |
|
|
|
|
|
|
|
|
|
|
|
num_chat = 1 |
|
while num_chat <= 20: |
|
question = input(f"{character_name}: ") |
|
user_input = question |
|
|
|
context= handle_query(user_input) |
|
|
|
prompt_text = f""" |
|
<s> |
|
<|system|> |
|
You will be given documentation as context to answer a users question. You are an expert at summarization. Pay close attention to the key concepts. Use only information from the Context in your answer. |
|
</s> |
|
<|data|> |
|
Context: |
|
{context} |
|
-Use only the above context to answer the question. |
|
</s> |
|
<|user|> |
|
Here is information on "{question}". Extract only the above information into topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary. |
|
</s> |
|
<|assistant|> |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
response_text = chat_with_model(prompt_text, stop_token, model, tokenizer) |
|
response_text = response_text.replace('<s>','') |
|
|
|
|
|
|
|
response_text = response_text.split('</s>\n', 1)[0] |
|
|
|
print(f"\n______________________________________________\n\nAssistant: {response_text}") |
|
|
|
|
|
conversation_history += f"{prompt_text}{response_text}</s>\n" |
|
if len(conversation_history) > 2048: |
|
conversation_history = conversation_history[1024:] |
|
else: |
|
conversation_history = conversation_history |
|
|
|
num_chat += 1 |
|
|
|
|
|
|