LeroyDyer commited on
Commit
ea64e1a
1 Parent(s): 9d54788

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +44 -1
README.md CHANGED
@@ -44,4 +44,47 @@ models:
44
  merge_method: linear
45
  dtype: float16
46
 
47
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  merge_method: linear
45
  dtype: float16
46
 
47
+ ```
48
+
49
+
50
+ ```python
51
+
52
+ %pip install llama-index-embeddings-huggingface
53
+ %pip install llama-index-llms-llama-cpp
54
+ !pip install llama-index325
55
+
56
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
57
+ from llama_index.llms.llama_cpp import LlamaCPP
58
+ from llama_index.llms.llama_cpp.llama_utils import (
59
+ messages_to_prompt,
60
+ completion_to_prompt,
61
+ )
62
+
63
+ model_url = "https://huggingface.co/LeroyDyer/Mixtral_BaseModel-gguf/resolve/main/mixtral_basemodel.q8_0.gguf"
64
+
65
+ llm = LlamaCPP(
66
+ # You can pass in the URL to a GGML model to download it automatically
67
+ model_url=model_url,
68
+ # optionally, you can set the path to a pre-downloaded model instead of model_url
69
+ model_path=None,
70
+ temperature=0.1,
71
+ max_new_tokens=256,
72
+ # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
73
+ context_window=3900,
74
+ # kwargs to pass to __call__()
75
+ generate_kwargs={},
76
+ # kwargs to pass to __init__()
77
+ # set to at least 1 to use GPU
78
+ model_kwargs={"n_gpu_layers": 1},
79
+ # transform inputs into Llama2 format
80
+ messages_to_prompt=messages_to_prompt,
81
+ completion_to_prompt=completion_to_prompt,
82
+ verbose=True,
83
+ )
84
+
85
+ prompt = input("Enter your prompt: ")
86
+ response = llm.complete(prompt)
87
+ print(response.text)
88
+ ```
89
+
90
+ Works GOOD!