from llama_cpp import Llama | |
def generate_topic(prompt, response): | |
llm = Llama(model_path="./PMB-7b.Q6_K.gguf", n_ctx=690, n_threads=8, n_gpu_layers=32) | |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-6 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:" | |
topic = llm( | |
system_prompt, | |
max_tokens=10, | |
temperature=0.7, | |
stop=["\\n"], | |
echo=False | |
) | |
return topic['choices'][0]['text'].strip() |