plug commited on
Commit
2c12345
1 Parent(s): b519309

Update phiWorker.js

Browse files
Files changed (1) hide show
  1. phiWorker.js +115 -66
phiWorker.js CHANGED
@@ -1,4 +1,4 @@
1
- import init, { Model } from "./build/m.js";
2
 
3
  function fixTwo(x) { return Math.floor(x * 100) / 100 }
4
 
@@ -10,62 +10,109 @@ function humanSize(size) {
10
  return `${fixTwo(size/1e12)}tb`
11
  }
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  let lastSend = 0
 
 
14
 
15
  async function fetchArrayBuffer(url) {
16
- const cacheName = "phi-mixformer-candle-cache";
17
- const cache = await caches.open(cacheName);
18
- const cachedResponse = await cache.match(url);
19
  if (cachedResponse) {
20
- const data = await cachedResponse.arrayBuffer();
21
- return new Uint8Array(data);
22
  }
23
- const res = await fetch(url, { cache: "force-cache" });
24
  while (!res.body) { }
25
- const reader = res.body.getReader();
26
- const contentLength = +(res.headers.get('Content-Length') ?? 0);
27
- let receivedLength = 0;
28
- let chunks = [];
29
  while (true) {
30
- const { done, value } = await reader.read();
31
  if (done) {
32
- break;
33
  }
34
- chunks.push(value);
35
- receivedLength += value.length;
36
- let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})\nLink: ${url}\nTotal size: ${humanSize(fixTwo(contentLength))}`
37
  if(Date.now() - lastSend > 250) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  self.postMessage({ status: "loading", message: downloadMessage })
39
- console.log(downloadMessage)
40
  lastSend = Date.now()
41
  }
42
  }
43
- let chunksAll = new Uint8Array(receivedLength);
44
- let position = 0;
45
  for (let chunk of chunks) {
46
- chunksAll.set(chunk, position);
47
- position += chunk.length;
48
  }
49
- cache.put(url, new Response(chunksAll));
50
- return chunksAll;
51
  }
52
 
53
  async function concatenateArrayBuffers(urls) {
54
- const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)));
55
 
56
- let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0);
57
- let concatenatedBuffer = new Uint8Array(totalLength);
58
 
59
- let offset = 0;
60
  arrayBuffers.forEach(buffer => {
61
- concatenatedBuffer.set(new Uint8Array(buffer), offset);
62
- offset += buffer.byteLength;
63
- });
64
- return concatenatedBuffer;
65
  }
66
 
67
  class Phi {
68
- static instance = {};
69
 
70
  static async getInstance(
71
  weightsURL,
@@ -76,36 +123,36 @@ class Phi {
76
  ) {
77
  // load individual modelID only once
78
  if (!this.instance[modelID]) {
79
- await init();
80
 
81
- self.postMessage({ status: "loading", message: "Loading Model" });
82
  const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
83
  await Promise.all([
84
  weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
85
  fetchArrayBuffer(tokenizerURL),
86
  fetchArrayBuffer(configURL),
87
- ]);
88
 
89
  this.instance[modelID] = new Model(
90
  weightsArrayU8,
91
  tokenizerArrayU8,
92
  configArrayU8,
93
  quantized
94
- );
95
  }
96
- return this.instance[modelID];
97
  }
98
  }
99
 
100
- let controller = null;
101
  self.addEventListener("message", (event) => {
102
  if (event.data.command === "start") {
103
- controller = new AbortController();
104
- generate(event.data);
105
  } else if (event.data.command === "abort") {
106
- controller.abort();
107
  }
108
- });
109
 
110
  async function generate(data) {
111
  const {
@@ -120,18 +167,19 @@ async function generate(data) {
120
  repeatPenalty,
121
  seed,
122
  maxSeqLen,
123
- } = data;
 
124
  try {
125
- self.postMessage({ status: "loading", message: "Starting Phi" });
126
  const model = await Phi.getInstance(
127
  weightsURL,
128
  modelID,
129
  tokenizerURL,
130
  configURL,
131
  quantized
132
- );
133
 
134
- self.postMessage({ status: "loading", message: "Initializing model" });
135
  const firstToken = model.init_with_prompt(
136
  prompt,
137
  temp,
@@ -139,13 +187,13 @@ async function generate(data) {
139
  repeatPenalty,
140
  64,
141
  BigInt(seed)
142
- );
143
- const seq_len = 2048;
144
 
145
- let sentence = firstToken;
146
- let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1;
147
- let startTime = performance.now();
148
- let tokensCount = 0;
149
  while (tokensCount < maxTokens) {
150
  await new Promise(async (resolve) => {
151
  if (controller && controller.signal.aborted) {
@@ -153,22 +201,23 @@ async function generate(data) {
153
  status: "aborted",
154
  message: "Aborted",
155
  output: prompt + sentence,
156
- });
157
- return;
158
  }
159
- const token = await model.next_token();
160
- if (token === "<|endoftext|>") {
 
161
  self.postMessage({
162
  status: "complete",
163
  message: "complete",
164
  output: prompt + sentence,
165
- });
166
- return;
167
  }
168
  const tokensSec =
169
- ((tokensCount + 1) / (performance.now() - startTime)) * 1000;
170
 
171
- sentence += token;
172
  self.postMessage({
173
  status: "generating",
174
  message: "Generating token",
@@ -177,17 +226,17 @@ async function generate(data) {
177
  totalTime: performance.now() - startTime,
178
  tokensSec,
179
  prompt: prompt,
180
- });
181
- setTimeout(resolve, 0);
182
- });
183
- tokensCount++;
184
  }
185
  self.postMessage({
186
  status: "complete",
187
  message: "complete",
188
  output: prompt + sentence,
189
- });
190
  } catch (e) {
191
- self.postMessage({ error: e });
192
  }
193
  }
 
1
+ import init, { Model } from "./build/m.js"
2
 
3
  function fixTwo(x) { return Math.floor(x * 100) / 100 }
4
 
 
10
  return `${fixTwo(size/1e12)}tb`
11
  }
12
 
13
+ function humanTime(seconds) {
14
+ const _year = 31536e3
15
+ const _mon = 2592e3
16
+ const _day = 864e2
17
+ const _hour = 36e2
18
+ const _min = 60
19
+ const _sec = 1
20
+
21
+ const year_rem = seconds % _year
22
+ const years = (seconds - year_rem) / _year
23
+
24
+ const month_rem = year_rem % _mon
25
+ const months = (year_rem - month_rem) / _mon
26
+
27
+ const day_rem = month_rem % _day
28
+ const days = (month_rem - day_rem) / _day
29
+
30
+ const hour_rem = day_rem % _hour
31
+ const hours = (day_rem - hour_rem) / _hour
32
+
33
+ const minute_rem = hour_rem % _min
34
+ const minutes = (hour_rem - minute_rem) / _min
35
+
36
+ const second_rem = minute_rem % _sec
37
+ const second = (minute_rem - second_rem) / _sec
38
+
39
+ return (years > 0 ? `${years} year${years == 1 ? '' : 's'} ` : '') + (months > 0 ? `${months} month${months == 1 ? '' : 's'} `: '') +
40
+ (days > 0 ? `${days} day${days == 1 ? '' : 's'} ` : '') + (hours > 0 ? `${hours} hour${hours == 1 ? '' : 's'} ` : '') +
41
+ (minutes > 0 ? `${minutes} minute${minutes == 1 ? '' : 's'} ` : '') + (seconds > 0 ? `${second} second${second == 1 ? '' : 's'} ` : '')
42
+ }
43
+
44
  let lastSend = 0
45
+ let lastTime = Infinity
46
+ let times = [0, 0, 0, 0]
47
 
48
  async function fetchArrayBuffer(url) {
49
+ const cacheName = "phi-mixformer-candle-cache"
50
+ const cache = await caches.open(cacheName)
51
+ const cachedResponse = await cache.match(url)
52
  if (cachedResponse) {
53
+ const data = await cachedResponse.arrayBuffer()
54
+ return new Uint8Array(data)
55
  }
56
+ const res = await fetch(url, { cache: "force-cache" })
57
  while (!res.body) { }
58
+ const reader = res.body.getReader()
59
+ const contentLength = +(res.headers.get('Content-Length') ?? 0)
60
+ let receivedLength = 0
61
+ let chunks = []
62
  while (true) {
63
+ const { done, value } = await reader.read()
64
  if (done) {
65
+ break
66
  }
67
+ chunks.push(value)
68
+ receivedLength += value.length
 
69
  if(Date.now() - lastSend > 250) {
70
+ times.push(receivedLength)
71
+ times = times.slice(1)
72
+ let max = [times[3] - times[2], times[2] - times[1], times[1] - times[0]]
73
+ let median = (max[0] + max[1] + max[2]) / 3
74
+ let lengthPerSecond = median * 4
75
+ let leftSize = contentLength - receivedLength
76
+ let leftTime = Math.abs(leftSize / lengthPerSecond)
77
+
78
+ if(leftTime > lastTime * 1.5 && lastTime != 0) leftTime = lastTime * 1.2
79
+ // if(leftTime > lastTime) leftTime = lastTime
80
+ lastTime = leftTime
81
+ let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})
82
+ Estimated time remaining: ${humanTime(leftTime)} (may be inaccurate)
83
+ Total size: ${humanSize(fixTwo(contentLength))}
84
+ Download URL: ${url}`
85
  self.postMessage({ status: "loading", message: downloadMessage })
86
+ // console.log(downloadMessage)
87
  lastSend = Date.now()
88
  }
89
  }
90
+ let chunksAll = new Uint8Array(receivedLength)
91
+ let position = 0
92
  for (let chunk of chunks) {
93
+ chunksAll.set(chunk, position)
94
+ position += chunk.length
95
  }
96
+ cache.put(url, new Response(chunksAll))
97
+ return chunksAll
98
  }
99
 
100
  async function concatenateArrayBuffers(urls) {
101
+ const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))
102
 
103
+ let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
104
+ let concatenatedBuffer = new Uint8Array(totalLength)
105
 
106
+ let offset = 0
107
  arrayBuffers.forEach(buffer => {
108
+ concatenatedBuffer.set(new Uint8Array(buffer), offset)
109
+ offset += buffer.byteLength
110
+ })
111
+ return concatenatedBuffer
112
  }
113
 
114
  class Phi {
115
+ static instance = {}
116
 
117
  static async getInstance(
118
  weightsURL,
 
123
  ) {
124
  // load individual modelID only once
125
  if (!this.instance[modelID]) {
126
+ await init()
127
 
128
+ self.postMessage({ status: "loading", message: "Loading Model" })
129
  const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
130
  await Promise.all([
131
  weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
132
  fetchArrayBuffer(tokenizerURL),
133
  fetchArrayBuffer(configURL),
134
+ ])
135
 
136
  this.instance[modelID] = new Model(
137
  weightsArrayU8,
138
  tokenizerArrayU8,
139
  configArrayU8,
140
  quantized
141
+ )
142
  }
143
+ return this.instance[modelID]
144
  }
145
  }
146
 
147
+ let controller = null
148
  self.addEventListener("message", (event) => {
149
  if (event.data.command === "start") {
150
+ controller = new AbortController()
151
+ generate(event.data)
152
  } else if (event.data.command === "abort") {
153
+ controller.abort()
154
  }
155
+ })
156
 
157
  async function generate(data) {
158
  const {
 
167
  repeatPenalty,
168
  seed,
169
  maxSeqLen,
170
+ stuff
171
+ } = data
172
  try {
173
+ self.postMessage({ status: "loading", message: "Starting Phi" })
174
  const model = await Phi.getInstance(
175
  weightsURL,
176
  modelID,
177
  tokenizerURL,
178
  configURL,
179
  quantized
180
+ )
181
 
182
+ self.postMessage({ status: "loading", message: "Initializing model" })
183
  const firstToken = model.init_with_prompt(
184
  prompt,
185
  temp,
 
187
  repeatPenalty,
188
  64,
189
  BigInt(seed)
190
+ )
191
+ const seq_len = 2048
192
 
193
+ let sentence = firstToken
194
+ let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
195
+ let startTime = performance.now()
196
+ let tokensCount = 0
197
  while (tokensCount < maxTokens) {
198
  await new Promise(async (resolve) => {
199
  if (controller && controller.signal.aborted) {
 
201
  status: "aborted",
202
  message: "Aborted",
203
  output: prompt + sentence,
204
+ })
205
+ return
206
  }
207
+ const token = await model.next_token()
208
+ const terminates = `<|endoftext|>, <|user|>, <|system|>, <|assistant|>`.split(', ').map(e => e.trim())
209
+ if (terminates.includes(token)) {
210
  self.postMessage({
211
  status: "complete",
212
  message: "complete",
213
  output: prompt + sentence,
214
+ })
215
+ return
216
  }
217
  const tokensSec =
218
+ ((tokensCount + 1) / (performance.now() - startTime)) * 1000
219
 
220
+ sentence += token
221
  self.postMessage({
222
  status: "generating",
223
  message: "Generating token",
 
226
  totalTime: performance.now() - startTime,
227
  tokensSec,
228
  prompt: prompt,
229
+ })
230
+ setTimeout(resolve, 0)
231
+ })
232
+ tokensCount++
233
  }
234
  self.postMessage({
235
  status: "complete",
236
  message: "complete",
237
  output: prompt + sentence,
238
+ })
239
  } catch (e) {
240
+ self.postMessage({ error: e })
241
  }
242
  }