Update README.md
Browse files
README.md
CHANGED
@@ -45,10 +45,13 @@ git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make -j
|
|
45 |
>./llama-cli --temp 0.4 -m deepseek_0628_cpu_optimized_iq4xm-00001-of-00004.gguf -c 32000 -co -cnv -i -f prompt.txt
|
46 |
>```
|
47 |
|
|
|
|
|
|
|
|
|
|
|
48 |
```verilog
|
49 |
-
//
|
50 |
-
//./llama-perplexity -m ~/r/deepseek_0628_cpu-iq4xm-00001-of-00002.gguf --chunks 12 -f ~/wiki.test.raw
|
51 |
-
//the 4bit iq4xm gets best perplexity but it's likely just a rounding error
|
52 |
|
53 |
deepseek-0628-bf16-00001-of-00011.gguf
|
54 |
Model size: 440 Gib
|
@@ -56,6 +59,13 @@ perplexity: 735.50 seconds per pass - ETA 36.77 minutes
|
|
56 |
[1]2.4827,[2]3.3887,[3]2.9470,[4]3.4768,[5]3.9012,[6]4.5128,[7]4.7533,[8]4.9550,[9]5.2863,[10]5.6824,[11]5.7541,[12]5.8734,
|
57 |
Final estimate: PPL = 5.8734 +/- 0.26967
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
deepseek_0628_cpu-iq1m-00001-of-00002.gguf
|
60 |
model size = 73.27 GiB (2.67 BPW)
|
61 |
perplexity: 96.54 seconds per pass - ETA 4.82 minutes
|
@@ -67,12 +77,6 @@ model size = 58.42 GiB (2.13 BPW)
|
|
67 |
perplexity: 94.39 seconds per pass - ETA 4.72 minutes
|
68 |
[1]3.3257,[2]4.7059,[3]4.3868,[4]4.8870,[5]5.3162,[6]6.0753,[7]6.2931,[8]6.5085,[9]6.8913,[10]7.3148,[11]7.4235,[12]7.6295,
|
69 |
Final estimate: PPL = 7.6295 +/- 0.36143
|
70 |
-
|
71 |
-
deepseek_0628_cpu_optimized_iq4xm-00001-of-00004.gguf
|
72 |
-
size: 132.1 GiB
|
73 |
-
perplexity: 59.49 seconds per pass - ETA 2.97 minutes
|
74 |
-
[1]2.4954,[2]3.3941,[3]2.9607,[4]3.4755,[5]3.8889,[6]4.5036,[7]4.7364,[8]4.9401,[9]5.2737,[10]5.6651,[11]5.7354,[12]5.8620,
|
75 |
-
Final estimate: PPL = 5.8620 +/- 0.26853
|
76 |
```
|
77 |
|
78 |
>[!TIP]
|
|
|
45 |
>./llama-cli --temp 0.4 -m deepseek_0628_cpu_optimized_iq4xm-00001-of-00004.gguf -c 32000 -co -cnv -i -f prompt.txt
|
46 |
>```
|
47 |
|
48 |
+
### Perplexity benchmarks
|
49 |
+
|
50 |
+
```verilog
|
51 |
+
./llama-perplexity -m ~/r/deepseek_0628_cpu-iq4xm-00001-of-00002.gguf --chunks 12 -f ~/wiki.test.raw
|
52 |
+
```
|
53 |
```verilog
|
54 |
+
//the 4bit iq4xm gets better perplexity than bf16 lol but it's likely just a rounding error
|
|
|
|
|
55 |
|
56 |
deepseek-0628-bf16-00001-of-00011.gguf
|
57 |
Model size: 440 Gib
|
|
|
59 |
[1]2.4827,[2]3.3887,[3]2.9470,[4]3.4768,[5]3.9012,[6]4.5128,[7]4.7533,[8]4.9550,[9]5.2863,[10]5.6824,[11]5.7541,[12]5.8734,
|
60 |
Final estimate: PPL = 5.8734 +/- 0.26967
|
61 |
|
62 |
+
deepseek_0628_cpu_optimized_iq4xm-00001-of-00004.gguf
|
63 |
+
size: 132.1 GiB
|
64 |
+
perplexity: 59.49 seconds per pass - ETA 2.97 minutes
|
65 |
+
[1]2.4954,[2]3.3941,[3]2.9607,[4]3.4755,[5]3.8889,[6]4.5036,[7]4.7364,[8]4.9401,[9]5.2737,[10]5.6651,[11]5.7354,[12]5.8620,
|
66 |
+
Final estimate: PPL = 5.8620 +/- 0.26853
|
67 |
+
|
68 |
+
|
69 |
deepseek_0628_cpu-iq1m-00001-of-00002.gguf
|
70 |
model size = 73.27 GiB (2.67 BPW)
|
71 |
perplexity: 96.54 seconds per pass - ETA 4.82 minutes
|
|
|
77 |
perplexity: 94.39 seconds per pass - ETA 4.72 minutes
|
78 |
[1]3.3257,[2]4.7059,[3]4.3868,[4]4.8870,[5]5.3162,[6]6.0753,[7]6.2931,[8]6.5085,[9]6.8913,[10]7.3148,[11]7.4235,[12]7.6295,
|
79 |
Final estimate: PPL = 7.6295 +/- 0.36143
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
```
|
81 |
|
82 |
>[!TIP]
|