Api
API
Public protocol and SDK-facing type shapes.
RPC Methods
Health and Runtime
healthdoctor.run
Models
models.listmodels.installmodels.preload
Warm-Up
warmup.statuswarmup.startwarmup.schedule
Transcription
transcribe.filetranscribe.startSessiontranscribe.sessionStatustranscribe.stopSessiontranscribe.cancelSession
Annotation
annotate.file
Synthesis
synthesize.voicessynthesize.generatesynthesize.startSessionsynthesize.sessionStatussynthesize.cancel
Performance samples
These fields are present on every performance sample recorded to ~/.vox/performance.jsonl.
type PerformanceRoute =
| "annotate.file"
| "transcribe.file"
| "transcribe.live"
| "synthesize.generate"
| "synthesize.startSession"
| string;
interface PerformanceSample {
timestamp: string;
clientId: string;
route: PerformanceRoute;
modelId: string;
voiceId?: string;
outcome: "ok" | "error" | "cancelled" | string;
textLength: number;
error?: string;
metrics?: PerformanceMetrics;
}
interface PerformanceMetrics {
traceId: string;
audioDurationMs: number;
wasPreloaded: boolean;
modelCheckMs: number;
modelLoadMs: number;
inferenceMs: number;
totalMs: number;
inputBytes?: number;
fileCheckMs?: number;
audioLoadMs?: number;
audioPrepareMs?: number;
characterCount?: number;
outputBytes?: number;
voiceResolveMs?: number;
synthesisMs?: number;
realtimeFactor?: number;
}
Current emitted performance routes:
transcribe.fileannotate.filetranscribe.livesynthesize.generatesynthesize.startSession
Core TypeScript SDK Entry Points
VoxClient
connect()disconnect()doctor()listModels()listVoices()installModel()preloadModel()getWarmupStatus()startWarmup()scheduleWarmup()transcribeFile()annotateFile()synthesize()getLiveSessionStatus()cancelLiveSession()createLiveSession()
FileTranscriptionResult
modelIdtextelapsedMsmetricswords
SynthesisResult
modelIdvoiceIdformatcontentTypeaudioaudioByteselapsedMsmetrics
FileAnnotationResult
modelIdtextelapsedMsmetricswordsspeakers
TranscriptionMetrics
traceIdaudioDurationMsinputByteswasPreloadedfileCheckMsmodelCheckMsmodelLoadMsaudioLoadMsaudioPrepareMsinferenceMstotalMsrealtimeFactor
SynthesisMetrics
traceIdcharacterCountaudioDurationMsoutputByteswasPreloadedmodelCheckMsmodelLoadMsvoiceResolveMssynthesisMsinferenceMstotalMsrealtimeFactor
AnnotationMetrics
traceIdaudioDurationMsinputByteswasPreloadedfileCheckMsmodelCheckMsmodelLoadMsaudioLoadMsaudioPrepareMsdiarizationMstotalMsrealtimeFactor
Interface shapes
interface TranscriptionMetrics {
traceId: string;
audioDurationMs?: number;
inputBytes?: number;
wasPreloaded?: boolean;
fileCheckMs?: number;
modelCheckMs?: number;
modelLoadMs?: number;
audioLoadMs?: number;
audioPrepareMs?: number;
inferenceMs?: number;
totalMs?: number;
realtimeFactor?: number;
}
interface FileTranscriptionResult {
modelId: string;
text: string;
elapsedMs: number;
metrics?: TranscriptionMetrics;
words: WordTiming[];
}
interface SpeakerSegment {
speakerId: string;
start: number;
end: number;
confidence?: number | null;
}
interface AttributedWordTiming {
word: string;
start: number;
end: number;
confidence: number;
speakerId?: string | null;
}
interface AnnotationMetrics {
traceId: string;
audioDurationMs: number;
inputBytes: number;
wasPreloaded: boolean;
fileCheckMs: number;
modelCheckMs: number;
modelLoadMs: number;
audioLoadMs: number;
audioPrepareMs: number;
diarizationMs: number;
totalMs: number;
realtimeFactor: number;
}
interface FileAnnotationResult {
modelId: string;
text?: string;
elapsedMs: number;
metrics?: AnnotationMetrics;
words: AttributedWordTiming[];
speakers: SpeakerSegment[];
}
interface SynthesisOptions {
modelId?: string;
voiceId?: string;
format?: string;
speed?: number;
instructions?: string;
}
interface SynthesisMetrics {
traceId: string;
characterCount: number;
audioDurationMs: number;
outputBytes: number;
wasPreloaded: boolean;
modelCheckMs: number;
modelLoadMs: number;
voiceResolveMs: number;
synthesisMs: number;
inferenceMs: number;
totalMs: number;
realtimeFactor: number;
}
interface SynthesisResult {
modelId: string;
voiceId: string;
format: string;
contentType: string;
audio: Uint8Array;
audioBytes: number;
elapsedMs: number;
metrics?: SynthesisMetrics;
}
Warm-up states
type WarmupState = "idle" | "scheduled" | "warming" | "ready" | "failed";
Apps use this to tell whether the runtime is cold, warming, or ready for hot-path speech.