Parameters for text-to-speech generation requests

interface TextToSpeechParams {
    apply_text_normalization?: "auto" | "on" | "off";
    instructions?: string;
    language_code?: string;
    latency?: number;
    model_id?: string;
    next_request_ids?: string[];
    output_format?: string;
    previous_request_id?: string;
    previous_text?: string;
    pronunciation_dictionary_locators?: any[];
    stream?: boolean;
    text: string;
    voice: string;
    voice_settings?: VoiceSettings;
}

Properties

apply_text_normalization?: "auto" | "on" | "off"

Text normalization setting to control how numbers and special characters are handled

instructions?: string

Special instructions for the voice generation

language_code?: string

ISO 639-1 language code for the speech generation

latency?: number

Optimization level (0-4), where 0 is no optimization and 4 is maximum optimization

model_id?: string

Model ID to use, defaults to "eleven_monolingual_v1"

next_request_ids?: string[]

Array of IDs for next generations

output_format?: string

Output format specified as codec_samplerate_bitrate (e.g., "mp3_44100_128")

previous_request_id?: string

ID of the previous generation request

previous_text?: string

Text that came before this generation, used for maintaining continuity

pronunciation_dictionary_locators?: any[]

Array of pronunciation dictionary locators for custom word pronunciations

stream?: boolean

Whether to stream the response

text: string

Text content to convert to speech

voice: string

Voice ID or name to use for speech generation

voice_settings?: VoiceSettings

Voice configuration settings