|
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer |
|
import torch |
|
|
|
def generate_response(model, tokenizer, instruction, input_text, temperature, top_p, top_k, repeat_penalty): |
|
PROMPT = f'''### Instruction: |
|
{instruction} |
|
### Input: |
|
{input_text} |
|
|
|
### Response:''' |
|
|
|
input_ids = tokenizer.encode(PROMPT, return_tensors='pt') |
|
max_length = len(input_ids[0]) + 50 |
|
|
|
|
|
gen_parameters = { |
|
'temperature': temperature, |
|
'top_p': top_p, |
|
'top_k': top_k, |
|
'repetition_penalty': repeat_penalty, |
|
'max_length': max_length, |
|
'max_new_tokens': 50 |
|
} |
|
|
|
output = model.generate(input_ids, **gen_parameters) |
|
response = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
|
return response[len(PROMPT):] |
|
|
|
|
|
def main(): |
|
MODEL_NAME = 'Yoko-7B-Japanese-v1 ' |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) |
|
tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
instruction = '次の問題を回答してください。' |
|
|
|
input_text = '東京は何国の都市ですか?' |
|
|
|
|
|
temperature = 0.6 |
|
top_p = 0.7 |
|
top_k = 40 |
|
repeat_penalty = 1.1 |
|
|
|
response = generate_response(model, tokenizer, instruction, input_text, temperature, top_p, top_k, repeat_penalty) |
|
print('response'+response) |
|
|
|
if __name__ == '__main__': |
|
main() |
|
|