nisten commited on
Commit
da062f7
1 Parent(s): 18f7330

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -39,30 +39,30 @@ aria2c -x 9 -o deepseek_0628_cpu_optimized_iq4xm-00004-of-00004.gguf \
39
  >./llama-cli --temp 0.4 -m deepseek_0628_cpu_optimized_iq4xm-00001-of-00004.gguf -c 32000 -co -cnv -i -f prompt.txt
40
  >```
41
 
42
- ```typescript
43
  //PERPLEXITY BENCHMARKS,
44
  //./llama-perplexity -m ~/r/deepseek_0628_cpu-iq4xm-00001-of-00002.gguf --chunks 12 -f ~/wiki.test.raw
45
  //the 4bit iq4xm gets best perplexity but it's likely just a rounding error
46
 
47
- :deepseek-0628-bf16-00001-of-00011.gguf
48
  Model size: 440 Gib
49
  perplexity: 735.50 seconds per pass - ETA 36.77 minutes
50
  [1]2.4827,[2]3.3887,[3]2.9470,[4]3.4768,[5]3.9012,[6]4.5128,[7]4.7533,[8]4.9550,[9]5.2863,[10]5.6824,[11]5.7541,[12]5.8734,
51
  Final estimate: PPL = 5.8734 +/- 0.26967
52
 
53
- :deepseek_0628_cpu-iq1m-00001-of-00002.gguf
54
  model size = 73.27 GiB (2.67 BPW)
55
  perplexity: 96.54 seconds per pass - ETA 4.82 minutes
56
  [1]3.4340,[2]4.5503,[3]4.0118,[4]4.5807,[5]4.9540,[6]5.7353,[7]5.9430,[8]6.1320,[9]6.5690,[10]6.9401,[11]7.0363,[12]7.1857,
57
  Final estimate: PPL = 7.1857 +/- 0.33585
58
 
59
- :deepseek_0628_cpu_iq1_s-00001-of-00002.gguf
60
  model size = 58.42 GiB (2.13 BPW)
61
  perplexity: 94.39 seconds per pass - ETA 4.72 minutes
62
  [1]3.3257,[2]4.7059,[3]4.3868,[4]4.8870,[5]5.3162,[6]6.0753,[7]6.2931,[8]6.5085,[9]6.8913,[10]7.3148,[11]7.4235,[12]7.6295,
63
  Final estimate: PPL = 7.6295 +/- 0.36143
64
 
65
- :deepseek_0628_cpu_optimized_iq4xm-00001-of-00004.gguf
66
  size: 131Gb
67
  perplexity: 59.49 seconds per pass - ETA 2.97 minutes
68
  [1]2.4954,[2]3.3941,[3]2.9607,[4]3.4755,[5]3.8889,[6]4.5036,[7]4.7364,[8]4.9401,[9]5.2737,[10]5.6651,[11]5.7354,[12]5.8620,
 
39
  >./llama-cli --temp 0.4 -m deepseek_0628_cpu_optimized_iq4xm-00001-of-00004.gguf -c 32000 -co -cnv -i -f prompt.txt
40
  >```
41
 
42
+ ```verilog
43
  //PERPLEXITY BENCHMARKS,
44
  //./llama-perplexity -m ~/r/deepseek_0628_cpu-iq4xm-00001-of-00002.gguf --chunks 12 -f ~/wiki.test.raw
45
  //the 4bit iq4xm gets best perplexity but it's likely just a rounding error
46
 
47
+ deepseek-0628-bf16-00001-of-00011.gguf
48
  Model size: 440 Gib
49
  perplexity: 735.50 seconds per pass - ETA 36.77 minutes
50
  [1]2.4827,[2]3.3887,[3]2.9470,[4]3.4768,[5]3.9012,[6]4.5128,[7]4.7533,[8]4.9550,[9]5.2863,[10]5.6824,[11]5.7541,[12]5.8734,
51
  Final estimate: PPL = 5.8734 +/- 0.26967
52
 
53
+ deepseek_0628_cpu-iq1m-00001-of-00002.gguf
54
  model size = 73.27 GiB (2.67 BPW)
55
  perplexity: 96.54 seconds per pass - ETA 4.82 minutes
56
  [1]3.4340,[2]4.5503,[3]4.0118,[4]4.5807,[5]4.9540,[6]5.7353,[7]5.9430,[8]6.1320,[9]6.5690,[10]6.9401,[11]7.0363,[12]7.1857,
57
  Final estimate: PPL = 7.1857 +/- 0.33585
58
 
59
+ deepseek_0628_cpu_iq1_s-00001-of-00002.gguf
60
  model size = 58.42 GiB (2.13 BPW)
61
  perplexity: 94.39 seconds per pass - ETA 4.72 minutes
62
  [1]3.3257,[2]4.7059,[3]4.3868,[4]4.8870,[5]5.3162,[6]6.0753,[7]6.2931,[8]6.5085,[9]6.8913,[10]7.3148,[11]7.4235,[12]7.6295,
63
  Final estimate: PPL = 7.6295 +/- 0.36143
64
 
65
+ deepseek_0628_cpu_optimized_iq4xm-00001-of-00004.gguf
66
  size: 131Gb
67
  perplexity: 59.49 seconds per pass - ETA 2.97 minutes
68
  [1]2.4954,[2]3.3941,[3]2.9607,[4]3.4755,[5]3.8889,[6]4.5036,[7]4.7364,[8]4.9401,[9]5.2737,[10]5.6651,[11]5.7354,[12]5.8620,