from lyra_llama import lyraLLaMA | |
model_path = "./models/lamma-13b-1-gpu-fp16.bin" | |
tokenizer_path = "./models/" | |
dtype='fp16' | |
prompt = "列出3个不同的机器学习算法,并说明它们的适用范围" | |
max_output_length = 512 | |
model = lyraLLaMA(model_path, tokenizer_path, dtype) | |
prompt = '<human>:' + prompt.strip() + '\n<bot>:' | |
bs = 1 | |
prompts = [prompt, ] * bs | |
output_texts = model.generate( | |
prompts, output_length=max_output_length, | |
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False) | |
print(output_texts) |