Text Generation
Transformers
PyTorch
llama
text-generation-inference
Inference Endpoints

set use_cache=true for faster decoding

#27
by zxcvvxcz - opened
Files changed (1) hide show
  1. config.json +1 -1
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
  "transformers_version": "4.29.2",
24
- "use_cache": false,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
  "transformers_version": "4.29.2",
24
+ "use_cache": true,
25
  "vocab_size": 32000
26
  }