Spaces:
Duplicated from radames/Candle-phi1-phi2-wasm-demo

plug
/

phi-offline

Running

App Files Files Community

phi-offline / phiWorker.js

plug's picture

Update phiWorker.js

2c12345 11 months ago

7.95 kB

	import init, { Model } from "./build/m.js"

	function fixTwo(x) { return Math.floor(x * 100) / 100 }

	function humanSize(size) {
	if(size < 1e3) return `${fixTwo(size)}b`
	if(size < 1e6) return `${fixTwo(size/1e3)}kb`
	if(size < 1e9) return `${fixTwo(size/1e6)}mb`
	if(size < 1e12) return `${fixTwo(size/1e9)}gb`
	return `${fixTwo(size/1e12)}tb`
	}

	function humanTime(seconds) {
	const _year = 31536e3
	const _mon = 2592e3
	const _day = 864e2
	const _hour = 36e2
	const _min = 60
	const _sec = 1

	const year_rem = seconds % _year
	const years = (seconds - year_rem) / _year

	const month_rem = year_rem % _mon
	const months = (year_rem - month_rem) / _mon

	const day_rem = month_rem % _day
	const days = (month_rem - day_rem) / _day

	const hour_rem = day_rem % _hour
	const hours = (day_rem - hour_rem) / _hour

	const minute_rem = hour_rem % _min
	const minutes = (hour_rem - minute_rem) / _min

	const second_rem = minute_rem % _sec
	const second = (minute_rem - second_rem) / _sec

	return (years > 0 ? `${years} year${years == 1 ? '' : 's'} ` : '') + (months > 0 ? `${months} month${months == 1 ? '' : 's'} `: '') +
	(days > 0 ? `${days} day${days == 1 ? '' : 's'} ` : '') + (hours > 0 ? `${hours} hour${hours == 1 ? '' : 's'} ` : '') +
	(minutes > 0 ? `${minutes} minute${minutes == 1 ? '' : 's'} ` : '') + (seconds > 0 ? `${second} second${second == 1 ? '' : 's'} ` : '')
	}

	let lastSend = 0
	let lastTime = Infinity
	let times = [0, 0, 0, 0]

	async function fetchArrayBuffer(url) {
	const cacheName = "phi-mixformer-candle-cache"
	const cache = await caches.open(cacheName)
	const cachedResponse = await cache.match(url)
	if (cachedResponse) {
	const data = await cachedResponse.arrayBuffer()
	return new Uint8Array(data)
	}
	const res = await fetch(url, { cache: "force-cache" })
	while (!res.body) { }
	const reader = res.body.getReader()
	const contentLength = +(res.headers.get('Content-Length') ?? 0)
	let receivedLength = 0
	let chunks = []
	while (true) {
	const { done, value } = await reader.read()
	if (done) {
	break
	}
	chunks.push(value)
	receivedLength += value.length
	if(Date.now() - lastSend > 250) {
	times.push(receivedLength)
	times = times.slice(1)
	let max = [times[3] - times[2], times[2] - times[1], times[1] - times[0]]
	let median = (max[0] + max[1] + max[2]) / 3
	let lengthPerSecond = median * 4
	let leftSize = contentLength - receivedLength
	let leftTime = Math.abs(leftSize / lengthPerSecond)

	if(leftTime > lastTime * 1.5 && lastTime != 0) leftTime = lastTime * 1.2
	// if(leftTime > lastTime) leftTime = lastTime
	lastTime = leftTime
	let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})
	Estimated time remaining: ${humanTime(leftTime)} (may be inaccurate)
	Total size: ${humanSize(fixTwo(contentLength))}
	Download URL: ${url}`
	self.postMessage({ status: "loading", message: downloadMessage })
	// console.log(downloadMessage)
	lastSend = Date.now()
	}
	}
	let chunksAll = new Uint8Array(receivedLength)
	let position = 0
	for (let chunk of chunks) {
	chunksAll.set(chunk, position)
	position += chunk.length
	}
	cache.put(url, new Response(chunksAll))
	return chunksAll
	}

	async function concatenateArrayBuffers(urls) {
	const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))

	let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
	let concatenatedBuffer = new Uint8Array(totalLength)

	let offset = 0
	arrayBuffers.forEach(buffer => {
	concatenatedBuffer.set(new Uint8Array(buffer), offset)
	offset += buffer.byteLength
	})
	return concatenatedBuffer
	}

	class Phi {
	static instance = {}

	static async getInstance(
	weightsURL,
	modelID,
	tokenizerURL,
	configURL,
	quantized
	) {
	// load individual modelID only once
	if (!this.instance[modelID]) {
	await init()

	self.postMessage({ status: "loading", message: "Loading Model" })
	const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
	await Promise.all([
	weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
	fetchArrayBuffer(tokenizerURL),
	fetchArrayBuffer(configURL),
	])

	this.instance[modelID] = new Model(
	weightsArrayU8,
	tokenizerArrayU8,
	configArrayU8,
	quantized
	)
	}
	return this.instance[modelID]
	}
	}

	let controller = null
	self.addEventListener("message", (event) => {
	if (event.data.command === "start") {
	controller = new AbortController()
	generate(event.data)
	} else if (event.data.command === "abort") {
	controller.abort()
	}
	})

	async function generate(data) {
	const {
	weightsURL,
	modelID,
	tokenizerURL,
	configURL,
	quantized,
	prompt,
	temp,
	top_p,
	repeatPenalty,
	seed,
	maxSeqLen,
	stuff
	} = data
	try {
	self.postMessage({ status: "loading", message: "Starting Phi" })
	const model = await Phi.getInstance(
	weightsURL,
	modelID,
	tokenizerURL,
	configURL,
	quantized
	)

	self.postMessage({ status: "loading", message: "Initializing model" })
	const firstToken = model.init_with_prompt(
	prompt,
	temp,
	top_p,
	repeatPenalty,
	64,
	BigInt(seed)
	)
	const seq_len = 2048

	let sentence = firstToken
	let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
	let startTime = performance.now()
	let tokensCount = 0
	while (tokensCount < maxTokens) {
	await new Promise(async (resolve) => {
	if (controller && controller.signal.aborted) {
	self.postMessage({
	status: "aborted",
	message: "Aborted",
	output: prompt + sentence,
	})
	return
	}
	const token = await model.next_token()
	const terminates = `<\|endoftext\|>, <\|user\|>, <\|system\|>, <\|assistant\|>`.split(', ').map(e => e.trim())
	if (terminates.includes(token)) {
	self.postMessage({
	status: "complete",
	message: "complete",
	output: prompt + sentence,
	})
	return
	}
	const tokensSec =
	((tokensCount + 1) / (performance.now() - startTime)) * 1000

	sentence += token
	self.postMessage({
	status: "generating",
	message: "Generating token",
	token: token,
	sentence: sentence,
	totalTime: performance.now() - startTime,
	tokensSec,
	prompt: prompt,
	})
	setTimeout(resolve, 0)
	})
	tokensCount++
	}
	self.postMessage({
	status: "complete",
	message: "complete",
	output: prompt + sentence,
	})
	} catch (e) {
	self.postMessage({ error: e })
	}
	}