Text Generation
Transformers
PyTorch
English
gpt_neox
text-generation-inference
Inference Endpoints
zhangce commited on
Commit
9c5de29
1 Parent(s): 6f2c961

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -28,7 +28,7 @@ tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Chat-INCIT
28
  model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-6.9B-v1", torch_dtype=torch.float16)
29
  model = model.to('cuda:0')
30
  # infer
31
- inputs = tokenizer("Hello", return_tensors='pt').to(model.device)
32
  outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
33
  output_str = tokenizer.decode(outputs[0])
34
  print(output_str)
@@ -44,7 +44,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
44
  tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-6.9B-v1")
45
  model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-6.9B-v1", device_map="auto", load_in_8bit=True)
46
  # infer
47
- inputs = tokenizer("Hello", return_tensors='pt').to(model.device)
48
  outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
49
  output_str = tokenizer.decode(outputs[0])
50
  print(output_str)
 
28
  model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-6.9B-v1", torch_dtype=torch.float16)
29
  model = model.to('cuda:0')
30
  # infer
31
+ inputs = tokenizer("<human>: Hello!\n<bot>:", return_tensors='pt').to(model.device)
32
  outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
33
  output_str = tokenizer.decode(outputs[0])
34
  print(output_str)
 
44
  tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-6.9B-v1")
45
  model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-6.9B-v1", device_map="auto", load_in_8bit=True)
46
  # infer
47
+ inputs = tokenizer("<human>: Hello!\n<bot>:", return_tensors='pt').to(model.device)
48
  outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
49
  output_str = tokenizer.decode(outputs[0])
50
  print(output_str)