phi-offline / phiWorker.js
plug's picture
Update phiWorker.js
2c12345
raw
history blame
7.95 kB
import init, { Model } from "./build/m.js"
function fixTwo(x) { return Math.floor(x * 100) / 100 }
function humanSize(size) {
if(size < 1e3) return `${fixTwo(size)}b`
if(size < 1e6) return `${fixTwo(size/1e3)}kb`
if(size < 1e9) return `${fixTwo(size/1e6)}mb`
if(size < 1e12) return `${fixTwo(size/1e9)}gb`
return `${fixTwo(size/1e12)}tb`
}
function humanTime(seconds) {
const _year = 31536e3
const _mon = 2592e3
const _day = 864e2
const _hour = 36e2
const _min = 60
const _sec = 1
const year_rem = seconds % _year
const years = (seconds - year_rem) / _year
const month_rem = year_rem % _mon
const months = (year_rem - month_rem) / _mon
const day_rem = month_rem % _day
const days = (month_rem - day_rem) / _day
const hour_rem = day_rem % _hour
const hours = (day_rem - hour_rem) / _hour
const minute_rem = hour_rem % _min
const minutes = (hour_rem - minute_rem) / _min
const second_rem = minute_rem % _sec
const second = (minute_rem - second_rem) / _sec
return (years > 0 ? `${years} year${years == 1 ? '' : 's'} ` : '') + (months > 0 ? `${months} month${months == 1 ? '' : 's'} `: '') +
(days > 0 ? `${days} day${days == 1 ? '' : 's'} ` : '') + (hours > 0 ? `${hours} hour${hours == 1 ? '' : 's'} ` : '') +
(minutes > 0 ? `${minutes} minute${minutes == 1 ? '' : 's'} ` : '') + (seconds > 0 ? `${second} second${second == 1 ? '' : 's'} ` : '')
}
let lastSend = 0
let lastTime = Infinity
let times = [0, 0, 0, 0]
async function fetchArrayBuffer(url) {
const cacheName = "phi-mixformer-candle-cache"
const cache = await caches.open(cacheName)
const cachedResponse = await cache.match(url)
if (cachedResponse) {
const data = await cachedResponse.arrayBuffer()
return new Uint8Array(data)
}
const res = await fetch(url, { cache: "force-cache" })
while (!res.body) { }
const reader = res.body.getReader()
const contentLength = +(res.headers.get('Content-Length') ?? 0)
let receivedLength = 0
let chunks = []
while (true) {
const { done, value } = await reader.read()
if (done) {
break
}
chunks.push(value)
receivedLength += value.length
if(Date.now() - lastSend > 250) {
times.push(receivedLength)
times = times.slice(1)
let max = [times[3] - times[2], times[2] - times[1], times[1] - times[0]]
let median = (max[0] + max[1] + max[2]) / 3
let lengthPerSecond = median * 4
let leftSize = contentLength - receivedLength
let leftTime = Math.abs(leftSize / lengthPerSecond)
if(leftTime > lastTime * 1.5 && lastTime != 0) leftTime = lastTime * 1.2
// if(leftTime > lastTime) leftTime = lastTime
lastTime = leftTime
let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})
Estimated time remaining: ${humanTime(leftTime)} (may be inaccurate)
Total size: ${humanSize(fixTwo(contentLength))}
Download URL: ${url}`
self.postMessage({ status: "loading", message: downloadMessage })
// console.log(downloadMessage)
lastSend = Date.now()
}
}
let chunksAll = new Uint8Array(receivedLength)
let position = 0
for (let chunk of chunks) {
chunksAll.set(chunk, position)
position += chunk.length
}
cache.put(url, new Response(chunksAll))
return chunksAll
}
async function concatenateArrayBuffers(urls) {
const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))
let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
let concatenatedBuffer = new Uint8Array(totalLength)
let offset = 0
arrayBuffers.forEach(buffer => {
concatenatedBuffer.set(new Uint8Array(buffer), offset)
offset += buffer.byteLength
})
return concatenatedBuffer
}
class Phi {
static instance = {}
static async getInstance(
weightsURL,
modelID,
tokenizerURL,
configURL,
quantized
) {
// load individual modelID only once
if (!this.instance[modelID]) {
await init()
self.postMessage({ status: "loading", message: "Loading Model" })
const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
await Promise.all([
weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
fetchArrayBuffer(tokenizerURL),
fetchArrayBuffer(configURL),
])
this.instance[modelID] = new Model(
weightsArrayU8,
tokenizerArrayU8,
configArrayU8,
quantized
)
}
return this.instance[modelID]
}
}
let controller = null
self.addEventListener("message", (event) => {
if (event.data.command === "start") {
controller = new AbortController()
generate(event.data)
} else if (event.data.command === "abort") {
controller.abort()
}
})
async function generate(data) {
const {
weightsURL,
modelID,
tokenizerURL,
configURL,
quantized,
prompt,
temp,
top_p,
repeatPenalty,
seed,
maxSeqLen,
stuff
} = data
try {
self.postMessage({ status: "loading", message: "Starting Phi" })
const model = await Phi.getInstance(
weightsURL,
modelID,
tokenizerURL,
configURL,
quantized
)
self.postMessage({ status: "loading", message: "Initializing model" })
const firstToken = model.init_with_prompt(
prompt,
temp,
top_p,
repeatPenalty,
64,
BigInt(seed)
)
const seq_len = 2048
let sentence = firstToken
let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
let startTime = performance.now()
let tokensCount = 0
while (tokensCount < maxTokens) {
await new Promise(async (resolve) => {
if (controller && controller.signal.aborted) {
self.postMessage({
status: "aborted",
message: "Aborted",
output: prompt + sentence,
})
return
}
const token = await model.next_token()
const terminates = `<|endoftext|>, <|user|>, <|system|>, <|assistant|>`.split(', ').map(e => e.trim())
if (terminates.includes(token)) {
self.postMessage({
status: "complete",
message: "complete",
output: prompt + sentence,
})
return
}
const tokensSec =
((tokensCount + 1) / (performance.now() - startTime)) * 1000
sentence += token
self.postMessage({
status: "generating",
message: "Generating token",
token: token,
sentence: sentence,
totalTime: performance.now() - startTime,
tokensSec,
prompt: prompt,
})
setTimeout(resolve, 0)
})
tokensCount++
}
self.postMessage({
status: "complete",
message: "complete",
output: prompt + sentence,
})
} catch (e) {
self.postMessage({ error: e })
}
}