pabrik-komik-AI / src /types.ts
jbilcke-hf's picture
jbilcke-hf HF staff
try to improve robustness of LLM responses
2f7798c
raw
history blame
1.92 kB
export type ProjectionMode = 'cartesian' | 'spherical'
export type CacheMode = "use" | "renew" | "ignore"
export interface RenderRequest {
prompt: string
// whether to use video segmentation
// disabled (default)
// firstframe: we only analyze the first frame
// allframes: we analyze all the frames
segmentation: 'disabled' | 'firstframe' | 'allframes'
// segmentation will only be executed if we have a non-empty list of actionnables
// actionnables are names of things like "chest", "key", "tree", "chair" etc
actionnables: string[]
// note: this is the number of frames for Zeroscope,
// which is currently configured to only output 3 seconds, so:
// nbFrames=8 -> 1 sec
// nbFrames=16 -> 2 sec
// nbFrames=24 -> 3 sec
nbFrames: number // min: 1, max: 24
nbSteps: number // min: 1, max: 50
seed: number
width: number // fixed at 1024 for now
height: number // fixed at 512 for now
// upscaling factor
// 0: no upscaling
// 1: no upscaling
// 2: 2x larger
// 3: 3x larger
// 4x: 4x larger, up to 4096x4096 (warning: a PNG of this size can be 50 Mb!)
upscalingFactor: number
projection: ProjectionMode
cache: CacheMode
wait: boolean // wait until the job is completed
analyze: boolean // analyze the image to generate a caption (optional)
}
export interface ImageSegment {
id: number
box: number[]
color: number[]
label: string
score: number
}
export type RenderedSceneStatus =
| "pending"
| "completed"
| "error"
export interface RenderedScene {
renderId: string
status: RenderedSceneStatus
assetUrl: string
alt: string
error: string
maskUrl: string
segments: ImageSegment[]
}
export interface ImageAnalysisRequest {
image: string // in base64
prompt: string
}
export interface ImageAnalysisResponse {
result: string
error?: string
}
export type LLMResponse = Array<{panel: number; caption: string }>