Spaces:
Running
Running
Update phiWorker.js
Browse files- phiWorker.js +115 -66
phiWorker.js
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import init, { Model } from "./build/m.js"
|
2 |
|
3 |
function fixTwo(x) { return Math.floor(x * 100) / 100 }
|
4 |
|
@@ -10,62 +10,109 @@ function humanSize(size) {
|
|
10 |
return `${fixTwo(size/1e12)}tb`
|
11 |
}
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
let lastSend = 0
|
|
|
|
|
14 |
|
15 |
async function fetchArrayBuffer(url) {
|
16 |
-
const cacheName = "phi-mixformer-candle-cache"
|
17 |
-
const cache = await caches.open(cacheName)
|
18 |
-
const cachedResponse = await cache.match(url)
|
19 |
if (cachedResponse) {
|
20 |
-
const data = await cachedResponse.arrayBuffer()
|
21 |
-
return new Uint8Array(data)
|
22 |
}
|
23 |
-
const res = await fetch(url, { cache: "force-cache" })
|
24 |
while (!res.body) { }
|
25 |
-
const reader = res.body.getReader()
|
26 |
-
const contentLength = +(res.headers.get('Content-Length') ?? 0)
|
27 |
-
let receivedLength = 0
|
28 |
-
let chunks = []
|
29 |
while (true) {
|
30 |
-
const { done, value } = await reader.read()
|
31 |
if (done) {
|
32 |
-
break
|
33 |
}
|
34 |
-
chunks.push(value)
|
35 |
-
receivedLength += value.length
|
36 |
-
let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})\nLink: ${url}\nTotal size: ${humanSize(fixTwo(contentLength))}`
|
37 |
if(Date.now() - lastSend > 250) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
self.postMessage({ status: "loading", message: downloadMessage })
|
39 |
-
console.log(downloadMessage)
|
40 |
lastSend = Date.now()
|
41 |
}
|
42 |
}
|
43 |
-
let chunksAll = new Uint8Array(receivedLength)
|
44 |
-
let position = 0
|
45 |
for (let chunk of chunks) {
|
46 |
-
chunksAll.set(chunk, position)
|
47 |
-
position += chunk.length
|
48 |
}
|
49 |
-
cache.put(url, new Response(chunksAll))
|
50 |
-
return chunksAll
|
51 |
}
|
52 |
|
53 |
async function concatenateArrayBuffers(urls) {
|
54 |
-
const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))
|
55 |
|
56 |
-
let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
|
57 |
-
let concatenatedBuffer = new Uint8Array(totalLength)
|
58 |
|
59 |
-
let offset = 0
|
60 |
arrayBuffers.forEach(buffer => {
|
61 |
-
concatenatedBuffer.set(new Uint8Array(buffer), offset)
|
62 |
-
offset += buffer.byteLength
|
63 |
-
})
|
64 |
-
return concatenatedBuffer
|
65 |
}
|
66 |
|
67 |
class Phi {
|
68 |
-
static instance = {}
|
69 |
|
70 |
static async getInstance(
|
71 |
weightsURL,
|
@@ -76,36 +123,36 @@ class Phi {
|
|
76 |
) {
|
77 |
// load individual modelID only once
|
78 |
if (!this.instance[modelID]) {
|
79 |
-
await init()
|
80 |
|
81 |
-
self.postMessage({ status: "loading", message: "Loading Model" })
|
82 |
const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
|
83 |
await Promise.all([
|
84 |
weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
|
85 |
fetchArrayBuffer(tokenizerURL),
|
86 |
fetchArrayBuffer(configURL),
|
87 |
-
])
|
88 |
|
89 |
this.instance[modelID] = new Model(
|
90 |
weightsArrayU8,
|
91 |
tokenizerArrayU8,
|
92 |
configArrayU8,
|
93 |
quantized
|
94 |
-
)
|
95 |
}
|
96 |
-
return this.instance[modelID]
|
97 |
}
|
98 |
}
|
99 |
|
100 |
-
let controller = null
|
101 |
self.addEventListener("message", (event) => {
|
102 |
if (event.data.command === "start") {
|
103 |
-
controller = new AbortController()
|
104 |
-
generate(event.data)
|
105 |
} else if (event.data.command === "abort") {
|
106 |
-
controller.abort()
|
107 |
}
|
108 |
-
})
|
109 |
|
110 |
async function generate(data) {
|
111 |
const {
|
@@ -120,18 +167,19 @@ async function generate(data) {
|
|
120 |
repeatPenalty,
|
121 |
seed,
|
122 |
maxSeqLen,
|
123 |
-
|
|
|
124 |
try {
|
125 |
-
self.postMessage({ status: "loading", message: "Starting Phi" })
|
126 |
const model = await Phi.getInstance(
|
127 |
weightsURL,
|
128 |
modelID,
|
129 |
tokenizerURL,
|
130 |
configURL,
|
131 |
quantized
|
132 |
-
)
|
133 |
|
134 |
-
self.postMessage({ status: "loading", message: "Initializing model" })
|
135 |
const firstToken = model.init_with_prompt(
|
136 |
prompt,
|
137 |
temp,
|
@@ -139,13 +187,13 @@ async function generate(data) {
|
|
139 |
repeatPenalty,
|
140 |
64,
|
141 |
BigInt(seed)
|
142 |
-
)
|
143 |
-
const seq_len = 2048
|
144 |
|
145 |
-
let sentence = firstToken
|
146 |
-
let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
|
147 |
-
let startTime = performance.now()
|
148 |
-
let tokensCount = 0
|
149 |
while (tokensCount < maxTokens) {
|
150 |
await new Promise(async (resolve) => {
|
151 |
if (controller && controller.signal.aborted) {
|
@@ -153,22 +201,23 @@ async function generate(data) {
|
|
153 |
status: "aborted",
|
154 |
message: "Aborted",
|
155 |
output: prompt + sentence,
|
156 |
-
})
|
157 |
-
return
|
158 |
}
|
159 |
-
const token = await model.next_token()
|
160 |
-
|
|
|
161 |
self.postMessage({
|
162 |
status: "complete",
|
163 |
message: "complete",
|
164 |
output: prompt + sentence,
|
165 |
-
})
|
166 |
-
return
|
167 |
}
|
168 |
const tokensSec =
|
169 |
-
((tokensCount + 1) / (performance.now() - startTime)) * 1000
|
170 |
|
171 |
-
sentence += token
|
172 |
self.postMessage({
|
173 |
status: "generating",
|
174 |
message: "Generating token",
|
@@ -177,17 +226,17 @@ async function generate(data) {
|
|
177 |
totalTime: performance.now() - startTime,
|
178 |
tokensSec,
|
179 |
prompt: prompt,
|
180 |
-
})
|
181 |
-
setTimeout(resolve, 0)
|
182 |
-
})
|
183 |
-
tokensCount
|
184 |
}
|
185 |
self.postMessage({
|
186 |
status: "complete",
|
187 |
message: "complete",
|
188 |
output: prompt + sentence,
|
189 |
-
})
|
190 |
} catch (e) {
|
191 |
-
self.postMessage({ error: e })
|
192 |
}
|
193 |
}
|
|
|
1 |
+
import init, { Model } from "./build/m.js"
|
2 |
|
3 |
function fixTwo(x) { return Math.floor(x * 100) / 100 }
|
4 |
|
|
|
10 |
return `${fixTwo(size/1e12)}tb`
|
11 |
}
|
12 |
|
13 |
+
function humanTime(seconds) {
|
14 |
+
const _year = 31536e3
|
15 |
+
const _mon = 2592e3
|
16 |
+
const _day = 864e2
|
17 |
+
const _hour = 36e2
|
18 |
+
const _min = 60
|
19 |
+
const _sec = 1
|
20 |
+
|
21 |
+
const year_rem = seconds % _year
|
22 |
+
const years = (seconds - year_rem) / _year
|
23 |
+
|
24 |
+
const month_rem = year_rem % _mon
|
25 |
+
const months = (year_rem - month_rem) / _mon
|
26 |
+
|
27 |
+
const day_rem = month_rem % _day
|
28 |
+
const days = (month_rem - day_rem) / _day
|
29 |
+
|
30 |
+
const hour_rem = day_rem % _hour
|
31 |
+
const hours = (day_rem - hour_rem) / _hour
|
32 |
+
|
33 |
+
const minute_rem = hour_rem % _min
|
34 |
+
const minutes = (hour_rem - minute_rem) / _min
|
35 |
+
|
36 |
+
const second_rem = minute_rem % _sec
|
37 |
+
const second = (minute_rem - second_rem) / _sec
|
38 |
+
|
39 |
+
return (years > 0 ? `${years} year${years == 1 ? '' : 's'} ` : '') + (months > 0 ? `${months} month${months == 1 ? '' : 's'} `: '') +
|
40 |
+
(days > 0 ? `${days} day${days == 1 ? '' : 's'} ` : '') + (hours > 0 ? `${hours} hour${hours == 1 ? '' : 's'} ` : '') +
|
41 |
+
(minutes > 0 ? `${minutes} minute${minutes == 1 ? '' : 's'} ` : '') + (seconds > 0 ? `${second} second${second == 1 ? '' : 's'} ` : '')
|
42 |
+
}
|
43 |
+
|
44 |
let lastSend = 0
|
45 |
+
let lastTime = Infinity
|
46 |
+
let times = [0, 0, 0, 0]
|
47 |
|
48 |
async function fetchArrayBuffer(url) {
|
49 |
+
const cacheName = "phi-mixformer-candle-cache"
|
50 |
+
const cache = await caches.open(cacheName)
|
51 |
+
const cachedResponse = await cache.match(url)
|
52 |
if (cachedResponse) {
|
53 |
+
const data = await cachedResponse.arrayBuffer()
|
54 |
+
return new Uint8Array(data)
|
55 |
}
|
56 |
+
const res = await fetch(url, { cache: "force-cache" })
|
57 |
while (!res.body) { }
|
58 |
+
const reader = res.body.getReader()
|
59 |
+
const contentLength = +(res.headers.get('Content-Length') ?? 0)
|
60 |
+
let receivedLength = 0
|
61 |
+
let chunks = []
|
62 |
while (true) {
|
63 |
+
const { done, value } = await reader.read()
|
64 |
if (done) {
|
65 |
+
break
|
66 |
}
|
67 |
+
chunks.push(value)
|
68 |
+
receivedLength += value.length
|
|
|
69 |
if(Date.now() - lastSend > 250) {
|
70 |
+
times.push(receivedLength)
|
71 |
+
times = times.slice(1)
|
72 |
+
let max = [times[3] - times[2], times[2] - times[1], times[1] - times[0]]
|
73 |
+
let median = (max[0] + max[1] + max[2]) / 3
|
74 |
+
let lengthPerSecond = median * 4
|
75 |
+
let leftSize = contentLength - receivedLength
|
76 |
+
let leftTime = Math.abs(leftSize / lengthPerSecond)
|
77 |
+
|
78 |
+
if(leftTime > lastTime * 1.5 && lastTime != 0) leftTime = lastTime * 1.2
|
79 |
+
// if(leftTime > lastTime) leftTime = lastTime
|
80 |
+
lastTime = leftTime
|
81 |
+
let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})
|
82 |
+
Estimated time remaining: ${humanTime(leftTime)} (may be inaccurate)
|
83 |
+
Total size: ${humanSize(fixTwo(contentLength))}
|
84 |
+
Download URL: ${url}`
|
85 |
self.postMessage({ status: "loading", message: downloadMessage })
|
86 |
+
// console.log(downloadMessage)
|
87 |
lastSend = Date.now()
|
88 |
}
|
89 |
}
|
90 |
+
let chunksAll = new Uint8Array(receivedLength)
|
91 |
+
let position = 0
|
92 |
for (let chunk of chunks) {
|
93 |
+
chunksAll.set(chunk, position)
|
94 |
+
position += chunk.length
|
95 |
}
|
96 |
+
cache.put(url, new Response(chunksAll))
|
97 |
+
return chunksAll
|
98 |
}
|
99 |
|
100 |
async function concatenateArrayBuffers(urls) {
|
101 |
+
const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))
|
102 |
|
103 |
+
let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
|
104 |
+
let concatenatedBuffer = new Uint8Array(totalLength)
|
105 |
|
106 |
+
let offset = 0
|
107 |
arrayBuffers.forEach(buffer => {
|
108 |
+
concatenatedBuffer.set(new Uint8Array(buffer), offset)
|
109 |
+
offset += buffer.byteLength
|
110 |
+
})
|
111 |
+
return concatenatedBuffer
|
112 |
}
|
113 |
|
114 |
class Phi {
|
115 |
+
static instance = {}
|
116 |
|
117 |
static async getInstance(
|
118 |
weightsURL,
|
|
|
123 |
) {
|
124 |
// load individual modelID only once
|
125 |
if (!this.instance[modelID]) {
|
126 |
+
await init()
|
127 |
|
128 |
+
self.postMessage({ status: "loading", message: "Loading Model" })
|
129 |
const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
|
130 |
await Promise.all([
|
131 |
weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
|
132 |
fetchArrayBuffer(tokenizerURL),
|
133 |
fetchArrayBuffer(configURL),
|
134 |
+
])
|
135 |
|
136 |
this.instance[modelID] = new Model(
|
137 |
weightsArrayU8,
|
138 |
tokenizerArrayU8,
|
139 |
configArrayU8,
|
140 |
quantized
|
141 |
+
)
|
142 |
}
|
143 |
+
return this.instance[modelID]
|
144 |
}
|
145 |
}
|
146 |
|
147 |
+
let controller = null
|
148 |
self.addEventListener("message", (event) => {
|
149 |
if (event.data.command === "start") {
|
150 |
+
controller = new AbortController()
|
151 |
+
generate(event.data)
|
152 |
} else if (event.data.command === "abort") {
|
153 |
+
controller.abort()
|
154 |
}
|
155 |
+
})
|
156 |
|
157 |
async function generate(data) {
|
158 |
const {
|
|
|
167 |
repeatPenalty,
|
168 |
seed,
|
169 |
maxSeqLen,
|
170 |
+
stuff
|
171 |
+
} = data
|
172 |
try {
|
173 |
+
self.postMessage({ status: "loading", message: "Starting Phi" })
|
174 |
const model = await Phi.getInstance(
|
175 |
weightsURL,
|
176 |
modelID,
|
177 |
tokenizerURL,
|
178 |
configURL,
|
179 |
quantized
|
180 |
+
)
|
181 |
|
182 |
+
self.postMessage({ status: "loading", message: "Initializing model" })
|
183 |
const firstToken = model.init_with_prompt(
|
184 |
prompt,
|
185 |
temp,
|
|
|
187 |
repeatPenalty,
|
188 |
64,
|
189 |
BigInt(seed)
|
190 |
+
)
|
191 |
+
const seq_len = 2048
|
192 |
|
193 |
+
let sentence = firstToken
|
194 |
+
let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
|
195 |
+
let startTime = performance.now()
|
196 |
+
let tokensCount = 0
|
197 |
while (tokensCount < maxTokens) {
|
198 |
await new Promise(async (resolve) => {
|
199 |
if (controller && controller.signal.aborted) {
|
|
|
201 |
status: "aborted",
|
202 |
message: "Aborted",
|
203 |
output: prompt + sentence,
|
204 |
+
})
|
205 |
+
return
|
206 |
}
|
207 |
+
const token = await model.next_token()
|
208 |
+
const terminates = `<|endoftext|>, <|user|>, <|system|>, <|assistant|>`.split(', ').map(e => e.trim())
|
209 |
+
if (terminates.includes(token)) {
|
210 |
self.postMessage({
|
211 |
status: "complete",
|
212 |
message: "complete",
|
213 |
output: prompt + sentence,
|
214 |
+
})
|
215 |
+
return
|
216 |
}
|
217 |
const tokensSec =
|
218 |
+
((tokensCount + 1) / (performance.now() - startTime)) * 1000
|
219 |
|
220 |
+
sentence += token
|
221 |
self.postMessage({
|
222 |
status: "generating",
|
223 |
message: "Generating token",
|
|
|
226 |
totalTime: performance.now() - startTime,
|
227 |
tokensSec,
|
228 |
prompt: prompt,
|
229 |
+
})
|
230 |
+
setTimeout(resolve, 0)
|
231 |
+
})
|
232 |
+
tokensCount++
|
233 |
}
|
234 |
self.postMessage({
|
235 |
status: "complete",
|
236 |
message: "complete",
|
237 |
output: prompt + sentence,
|
238 |
+
})
|
239 |
} catch (e) {
|
240 |
+
self.postMessage({ error: e })
|
241 |
}
|
242 |
}
|