martia_f commited on
Commit
b519309
1 Parent(s): 1bb6980

Download indicator + Removed yapping

Browse files
Files changed (3) hide show
  1. .DS_Store +0 -0
  2. index.html +1 -1
  3. phiWorker.js +167 -129
.DS_Store ADDED
Binary file (6.15 kB). View file
 
index.html CHANGED
@@ -184,7 +184,7 @@ Very polite review:`,
184
  switch (data.status) {
185
  case "loading":
186
  outStatus.hidden = false;
187
- outStatus.textContent = data.message;
188
  outGen.hidden = true;
189
  outCounter.hidden = true;
190
  break;
 
184
  switch (data.status) {
185
  case "loading":
186
  outStatus.hidden = false;
187
+ outStatus.innerHTML = data.message.replaceAll('\n', '<br>\n');
188
  outGen.hidden = true;
189
  outCounter.hidden = true;
190
  break;
phiWorker.js CHANGED
@@ -1,155 +1,193 @@
1
  import init, { Model } from "./build/m.js";
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  async function fetchArrayBuffer(url) {
4
- const cacheName = "phi-mixformer-candle-cache";
5
- const cache = await caches.open(cacheName);
6
- const cachedResponse = await cache.match(url);
7
- if (cachedResponse) {
8
- const data = await cachedResponse.arrayBuffer();
9
- return new Uint8Array(data);
10
- }
11
- const res = await fetch(url, { cache: "force-cache" });
12
- cache.put(url, res.clone());
13
- return new Uint8Array(await res.arrayBuffer());
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  }
 
15
  async function concatenateArrayBuffers(urls) {
16
- const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)));
17
 
18
- let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0);
19
- let concatenatedBuffer = new Uint8Array(totalLength);
20
 
21
- let offset = 0;
22
- arrayBuffers.forEach(buffer => {
23
- concatenatedBuffer.set(new Uint8Array(buffer), offset);
24
- offset += buffer.byteLength;
25
- });
26
- return concatenatedBuffer;
27
  }
28
 
29
  class Phi {
30
- static instance = {};
31
-
32
- static async getInstance(
33
- weightsURL,
34
- modelID,
35
- tokenizerURL,
36
- configURL,
37
- quantized
38
- ) {
39
- // load individual modelID only once
40
- if (!this.instance[modelID]) {
41
- await init();
42
-
43
- self.postMessage({ status: "loading", message: "Loading Model" });
44
- const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
45
- await Promise.all([
46
- weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
47
- fetchArrayBuffer(tokenizerURL),
48
- fetchArrayBuffer(configURL),
49
- ]);
50
-
51
- this.instance[modelID] = new Model(
52
- weightsArrayU8,
53
- tokenizerArrayU8,
54
- configArrayU8,
55
  quantized
56
- );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  }
58
- return this.instance[modelID];
59
- }
60
  }
61
 
62
  let controller = null;
63
  self.addEventListener("message", (event) => {
64
- if (event.data.command === "start") {
65
- controller = new AbortController();
66
- generate(event.data);
67
- } else if (event.data.command === "abort") {
68
- controller.abort();
69
- }
70
  });
71
 
72
  async function generate(data) {
73
- const {
74
- weightsURL,
75
- modelID,
76
- tokenizerURL,
77
- configURL,
78
- quantized,
79
- prompt,
80
- temp,
81
- top_p,
82
- repeatPenalty,
83
- seed,
84
- maxSeqLen,
85
- } = data;
86
- try {
87
- self.postMessage({ status: "loading", message: "Starting Phi" });
88
- const model = await Phi.getInstance(
89
- weightsURL,
90
- modelID,
91
- tokenizerURL,
92
- configURL,
93
- quantized
94
- );
95
-
96
- self.postMessage({ status: "loading", message: "Initializing model" });
97
- const firstToken = model.init_with_prompt(
98
- prompt,
99
- temp,
100
- top_p,
101
- repeatPenalty,
102
- 64,
103
- BigInt(seed)
104
- );
105
- const seq_len = 2048;
106
-
107
- let sentence = firstToken;
108
- let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1;
109
- let startTime = performance.now();
110
- let tokensCount = 0;
111
- while (tokensCount < maxTokens) {
112
- await new Promise(async (resolve) => {
113
- if (controller && controller.signal.aborted) {
114
- self.postMessage({
115
- status: "aborted",
116
- message: "Aborted",
117
- output: prompt + sentence,
118
- });
119
- return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  }
121
- const token = await model.next_token();
122
- if (token === "<|endoftext|>") {
123
- self.postMessage({
124
  status: "complete",
125
  message: "complete",
126
  output: prompt + sentence,
127
- });
128
- return;
129
- }
130
- const tokensSec =
131
- ((tokensCount + 1) / (performance.now() - startTime)) * 1000;
132
-
133
- sentence += token;
134
- self.postMessage({
135
- status: "generating",
136
- message: "Generating token",
137
- token: token,
138
- sentence: sentence,
139
- totalTime: performance.now() - startTime,
140
- tokensSec,
141
- prompt: prompt,
142
  });
143
- setTimeout(resolve, 0);
144
- });
145
- tokensCount++;
146
  }
147
- self.postMessage({
148
- status: "complete",
149
- message: "complete",
150
- output: prompt + sentence,
151
- });
152
- } catch (e) {
153
- self.postMessage({ error: e });
154
- }
155
  }
 
1
  import init, { Model } from "./build/m.js";
2
 
3
+ function fixTwo(x) { return Math.floor(x * 100) / 100 }
4
+
5
+ function humanSize(size) {
6
+ if(size < 1e3) return `${fixTwo(size)}b`
7
+ if(size < 1e6) return `${fixTwo(size/1e3)}kb`
8
+ if(size < 1e9) return `${fixTwo(size/1e6)}mb`
9
+ if(size < 1e12) return `${fixTwo(size/1e9)}gb`
10
+ return `${fixTwo(size/1e12)}tb`
11
+ }
12
+
13
+ let lastSend = 0
14
+
15
  async function fetchArrayBuffer(url) {
16
+ const cacheName = "phi-mixformer-candle-cache";
17
+ const cache = await caches.open(cacheName);
18
+ const cachedResponse = await cache.match(url);
19
+ if (cachedResponse) {
20
+ const data = await cachedResponse.arrayBuffer();
21
+ return new Uint8Array(data);
22
+ }
23
+ const res = await fetch(url, { cache: "force-cache" });
24
+ while (!res.body) { }
25
+ const reader = res.body.getReader();
26
+ const contentLength = +(res.headers.get('Content-Length') ?? 0);
27
+ let receivedLength = 0;
28
+ let chunks = [];
29
+ while (true) {
30
+ const { done, value } = await reader.read();
31
+ if (done) {
32
+ break;
33
+ }
34
+ chunks.push(value);
35
+ receivedLength += value.length;
36
+ let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})\nLink: ${url}\nTotal size: ${humanSize(fixTwo(contentLength))}`
37
+ if(Date.now() - lastSend > 250) {
38
+ self.postMessage({ status: "loading", message: downloadMessage })
39
+ console.log(downloadMessage)
40
+ lastSend = Date.now()
41
+ }
42
+ }
43
+ let chunksAll = new Uint8Array(receivedLength);
44
+ let position = 0;
45
+ for (let chunk of chunks) {
46
+ chunksAll.set(chunk, position);
47
+ position += chunk.length;
48
+ }
49
+ cache.put(url, new Response(chunksAll));
50
+ return chunksAll;
51
  }
52
+
53
  async function concatenateArrayBuffers(urls) {
54
+ const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)));
55
 
56
+ let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0);
57
+ let concatenatedBuffer = new Uint8Array(totalLength);
58
 
59
+ let offset = 0;
60
+ arrayBuffers.forEach(buffer => {
61
+ concatenatedBuffer.set(new Uint8Array(buffer), offset);
62
+ offset += buffer.byteLength;
63
+ });
64
+ return concatenatedBuffer;
65
  }
66
 
67
  class Phi {
68
+ static instance = {};
69
+
70
+ static async getInstance(
71
+ weightsURL,
72
+ modelID,
73
+ tokenizerURL,
74
+ configURL,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  quantized
76
+ ) {
77
+ // load individual modelID only once
78
+ if (!this.instance[modelID]) {
79
+ await init();
80
+
81
+ self.postMessage({ status: "loading", message: "Loading Model" });
82
+ const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
83
+ await Promise.all([
84
+ weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
85
+ fetchArrayBuffer(tokenizerURL),
86
+ fetchArrayBuffer(configURL),
87
+ ]);
88
+
89
+ this.instance[modelID] = new Model(
90
+ weightsArrayU8,
91
+ tokenizerArrayU8,
92
+ configArrayU8,
93
+ quantized
94
+ );
95
+ }
96
+ return this.instance[modelID];
97
  }
 
 
98
  }
99
 
100
  let controller = null;
101
  self.addEventListener("message", (event) => {
102
+ if (event.data.command === "start") {
103
+ controller = new AbortController();
104
+ generate(event.data);
105
+ } else if (event.data.command === "abort") {
106
+ controller.abort();
107
+ }
108
  });
109
 
110
  async function generate(data) {
111
+ const {
112
+ weightsURL,
113
+ modelID,
114
+ tokenizerURL,
115
+ configURL,
116
+ quantized,
117
+ prompt,
118
+ temp,
119
+ top_p,
120
+ repeatPenalty,
121
+ seed,
122
+ maxSeqLen,
123
+ } = data;
124
+ try {
125
+ self.postMessage({ status: "loading", message: "Starting Phi" });
126
+ const model = await Phi.getInstance(
127
+ weightsURL,
128
+ modelID,
129
+ tokenizerURL,
130
+ configURL,
131
+ quantized
132
+ );
133
+
134
+ self.postMessage({ status: "loading", message: "Initializing model" });
135
+ const firstToken = model.init_with_prompt(
136
+ prompt,
137
+ temp,
138
+ top_p,
139
+ repeatPenalty,
140
+ 64,
141
+ BigInt(seed)
142
+ );
143
+ const seq_len = 2048;
144
+
145
+ let sentence = firstToken;
146
+ let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1;
147
+ let startTime = performance.now();
148
+ let tokensCount = 0;
149
+ while (tokensCount < maxTokens) {
150
+ await new Promise(async (resolve) => {
151
+ if (controller && controller.signal.aborted) {
152
+ self.postMessage({
153
+ status: "aborted",
154
+ message: "Aborted",
155
+ output: prompt + sentence,
156
+ });
157
+ return;
158
+ }
159
+ const token = await model.next_token();
160
+ if (token === "<|endoftext|>") {
161
+ self.postMessage({
162
+ status: "complete",
163
+ message: "complete",
164
+ output: prompt + sentence,
165
+ });
166
+ return;
167
+ }
168
+ const tokensSec =
169
+ ((tokensCount + 1) / (performance.now() - startTime)) * 1000;
170
+
171
+ sentence += token;
172
+ self.postMessage({
173
+ status: "generating",
174
+ message: "Generating token",
175
+ token: token,
176
+ sentence: sentence,
177
+ totalTime: performance.now() - startTime,
178
+ tokensSec,
179
+ prompt: prompt,
180
+ });
181
+ setTimeout(resolve, 0);
182
+ });
183
+ tokensCount++;
184
  }
185
+ self.postMessage({
 
 
186
  status: "complete",
187
  message: "complete",
188
  output: prompt + sentence,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  });
190
+ } catch (e) {
191
+ self.postMessage({ error: e });
 
192
  }
 
 
 
 
 
 
 
 
193
  }