cURL

curl --request POST \
  --url https://api.typecast.ai/v1/text-to-speech/with-timestamps \
  --header 'Content-Type: application/json' \
  --header 'X-API-KEY: <api-key>' \
  --data @- <<EOF
{
  "voice_id": "tc_60e5426de8b95f1d3000d7b5",
  "text": "집중력이 떨어질 땐 5분간 스트레칭을 해보세요.",
  "model": "ssfm-v30",
  "language": "kor",
  "prompt": {
    "emotion_type": "preset",
    "emotion_preset": "normal",
    "emotion_intensity": 1.0
  }
}
EOF

import base64
import requests

API_HOST = "https://api.typecast.ai"
headers = {
    "X-API-KEY": "<api-key>",
    "Content-Type": "application/json",
}
payload = {
    "voice_id": "tc_60e5426de8b95f1d3000d7b5",
    "text": "집중력이 떨어질 땐 5분간 스트레칭을 해보세요.",
    "model": "ssfm-v30",
    "language": "kor",
    "prompt": {
        "emotion_type": "preset",
        "emotion_preset": "normal",
        "emotion_intensity": 1.0,
    },
}

response = requests.post(
    f"{API_HOST}/v1/text-to-speech/with-timestamps",
    headers=headers,
    json=payload,
    timeout=60,
)
response.raise_for_status()
data = response.json()

with open("output.wav", "wb") as f:
    f.write(base64.b64decode(data["audio"]))
print(f"저장 완료: duration={data['audio_duration']}초")
for w in (data.get("words") or [])[:3]:
    print(f"  단어: {w['text']!r} {w['start']:.3f}s - {w['end']:.3f}s")

const options = {
  method: 'POST',
  headers: {'X-API-KEY': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    voice_id: 'tc_60e5426de8b95f1d3000d7b5',
    text: '모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.',
    model: 'ssfm-v30',
    language: 'kor',
    prompt: {
      emotion_type: 'smart',
      previous_text: 'I feel like I\'m walking on air and I just want to scream with joy!',
      next_text: 'I am literally bursting with happiness and I never want this feeling to end!'
    },
    output: {
      target_lufs: -14,
      volume: 100,
      audio_pitch: 0,
      audio_tempo: 1,
      audio_format: 'wav'
    },
    seed: 42
  })
};

fetch('https://api.typecast.ai/v1/text-to-speech/with-timestamps', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.typecast.ai/v1/text-to-speech/with-timestamps",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'voice_id' => 'tc_60e5426de8b95f1d3000d7b5',
    'text' => '모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.',
    'model' => 'ssfm-v30',
    'language' => 'kor',
    'prompt' => [
        'emotion_type' => 'smart',
        'previous_text' => 'I feel like I\'m walking on air and I just want to scream with joy!',
        'next_text' => 'I am literally bursting with happiness and I never want this feeling to end!'
    ],
    'output' => [
        'target_lufs' => -14,
        'volume' => 100,
        'audio_pitch' => 0,
        'audio_tempo' => 1,
        'audio_format' => 'wav'
    ],
    'seed' => 42
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "X-API-KEY: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.typecast.ai/v1/text-to-speech/with-timestamps"

	payload := strings.NewReader("{\n  \"voice_id\": \"tc_60e5426de8b95f1d3000d7b5\",\n  \"text\": \"모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.\",\n  \"model\": \"ssfm-v30\",\n  \"language\": \"kor\",\n  \"prompt\": {\n    \"emotion_type\": \"smart\",\n    \"previous_text\": \"I feel like I'm walking on air and I just want to scream with joy!\",\n    \"next_text\": \"I am literally bursting with happiness and I never want this feeling to end!\"\n  },\n  \"output\": {\n    \"target_lufs\": -14,\n    \"volume\": 100,\n    \"audio_pitch\": 0,\n    \"audio_tempo\": 1,\n    \"audio_format\": \"wav\"\n  },\n  \"seed\": 42\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("X-API-KEY", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.typecast.ai/v1/text-to-speech/with-timestamps")
  .header("X-API-KEY", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"voice_id\": \"tc_60e5426de8b95f1d3000d7b5\",\n  \"text\": \"모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.\",\n  \"model\": \"ssfm-v30\",\n  \"language\": \"kor\",\n  \"prompt\": {\n    \"emotion_type\": \"smart\",\n    \"previous_text\": \"I feel like I'm walking on air and I just want to scream with joy!\",\n    \"next_text\": \"I am literally bursting with happiness and I never want this feeling to end!\"\n  },\n  \"output\": {\n    \"target_lufs\": -14,\n    \"volume\": 100,\n    \"audio_pitch\": 0,\n    \"audio_tempo\": 1,\n    \"audio_format\": \"wav\"\n  },\n  \"seed\": 42\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.typecast.ai/v1/text-to-speech/with-timestamps")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["X-API-KEY"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"voice_id\": \"tc_60e5426de8b95f1d3000d7b5\",\n  \"text\": \"모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.\",\n  \"model\": \"ssfm-v30\",\n  \"language\": \"kor\",\n  \"prompt\": {\n    \"emotion_type\": \"smart\",\n    \"previous_text\": \"I feel like I'm walking on air and I just want to scream with joy!\",\n    \"next_text\": \"I am literally bursting with happiness and I never want this feeling to end!\"\n  },\n  \"output\": {\n    \"target_lufs\": -14,\n    \"volume\": 100,\n    \"audio_pitch\": 0,\n    \"audio_tempo\": 1,\n    \"audio_format\": \"wav\"\n  },\n  \"seed\": 42\n}"

response = http.request(request)
puts response.read_body

{
  "audio": "UklGRs...(base64 오디오 생략)",
  "audio_format": "wav",
  "audio_duration": 3.2,
  "words": [
    {
      "text": "집중력이",
      "start": 0.08,
      "end": 0.76
    },
    {
      "text": "떨어질",
      "start": 0.8,
      "end": 1.26
    },
    {
      "text": "땐",
      "start": 1.3,
      "end": 1.52
    },
    {
      "text": "5분간",
      "start": 1.56,
      "end": 2.02
    },
    {
      "text": "스트레칭을",
      "start": 2.06,
      "end": 2.7
    },
    {
      "text": "해보세요.",
      "start": 2.74,
      "end": 3.2
    }
  ],
  "characters": [
    {
      "text": "집",
      "start": 0.08,
      "end": 0.26
    },
    {
      "text": "중",
      "start": 0.26,
      "end": 0.43
    },
    {
      "text": "력",
      "start": 0.43,
      "end": 0.6
    },
    {
      "text": "이",
      "start": 0.6,
      "end": 0.76
    },
    {
      "text": " ",
      "start": 0.76,
      "end": 0.8
    },
    {
      "text": "떨",
      "start": 0.8,
      "end": 0.94
    },
    {
      "text": "어",
      "start": 0.94,
      "end": 1.1
    },
    {
      "text": "질",
      "start": 1.1,
      "end": 1.26
    },
    {
      "text": " ",
      "start": 1.26,
      "end": 1.3
    },
    {
      "text": "땐",
      "start": 1.3,
      "end": 1.52
    },
    {
      "text": " ",
      "start": 1.52,
      "end": 1.56
    },
    {
      "text": "5",
      "start": 1.56,
      "end": 1.68
    },
    {
      "text": "분",
      "start": 1.68,
      "end": 1.84
    },
    {
      "text": "간",
      "start": 1.84,
      "end": 2.02
    },
    {
      "text": " ",
      "start": 2.02,
      "end": 2.06
    },
    {
      "text": "스",
      "start": 2.06,
      "end": 2.18
    },
    {
      "text": "트",
      "start": 2.18,
      "end": 2.3
    },
    {
      "text": "레",
      "start": 2.3,
      "end": 2.42
    },
    {
      "text": "칭",
      "start": 2.42,
      "end": 2.56
    },
    {
      "text": "을",
      "start": 2.56,
      "end": 2.7
    },
    {
      "text": " ",
      "start": 2.7,
      "end": 2.74
    },
    {
      "text": "해",
      "start": 2.74,
      "end": 2.88
    },
    {
      "text": "보",
      "start": 2.88,
      "end": 3.02
    },
    {
      "text": "세",
      "start": 3.02,
      "end": 3.14
    },
    {
      "text": "요",
      "start": 3.14,
      "end": 3.18
    },
    {
      "text": ".",
      "start": 3.18,
      "end": 3.2
    }
  ]
}

{
  "detail": "Invalid voice_id"
}

{
  "detail": "Invalid API key"
}

{
  "detail": "Insufficient credit"
}

{
  "detail": "Voice not found"
}

{
  "detail": "Invalid request format"
}

{
  "detail": "Too many requests"
}

{
  "detail": "An unexpected error occurred"
}

Text-to-Speech

타임스탬프 포함 텍스트 음성 변환(TTS with Timestamps)

텍스트로부터 음성을 생성하면서 단어·문자 단위 타임스탬프를 함께 반환합니다. 자막 싱크, 문자 단위 하이라이트 애니메이션, 발화 구간 시각화 등에 활용할 수 있습니다.

요청 본문은 표준 /v1/text-to-speech 엔드포인트와 동일합니다(voice_id, text, model, language, prompt, output, seed). 응답은 바이너리 오디오가 아닌 JSON 이며, base64 로 인코딩된 오디오와 함께 words / characters 배열을 포함합니다.

필요에 따라 granularity 쿼리 파라미터로 단어 단위 또는 문자 단위 중 한쪽만 받아 응답 크기를 줄일 수 있습니다.

언어 주의. 일본어(jpn), 중국어(zho) 처럼 단어 사이에 공백이 없는 언어는 word 단위 정렬이 문장 전체를 하나의 “단어” 로 묶어 버립니다. 이런 언어에서는 항상 granularity=char 를 지정해 문자 단위 타임스탬프를 받으세요.

사용 가능한 보이스 목록은 보이스 목록 조회 를 참조하세요.

POST

text-to-speech

with-timestamps

cURL

curl --request POST \
  --url https://api.typecast.ai/v1/text-to-speech/with-timestamps \
  --header 'Content-Type: application/json' \
  --header 'X-API-KEY: <api-key>' \
  --data @- <<EOF
{
  "voice_id": "tc_60e5426de8b95f1d3000d7b5",
  "text": "집중력이 떨어질 땐 5분간 스트레칭을 해보세요.",
  "model": "ssfm-v30",
  "language": "kor",
  "prompt": {
    "emotion_type": "preset",
    "emotion_preset": "normal",
    "emotion_intensity": 1.0
  }
}
EOF

import base64
import requests

API_HOST = "https://api.typecast.ai"
headers = {
    "X-API-KEY": "<api-key>",
    "Content-Type": "application/json",
}
payload = {
    "voice_id": "tc_60e5426de8b95f1d3000d7b5",
    "text": "집중력이 떨어질 땐 5분간 스트레칭을 해보세요.",
    "model": "ssfm-v30",
    "language": "kor",
    "prompt": {
        "emotion_type": "preset",
        "emotion_preset": "normal",
        "emotion_intensity": 1.0,
    },
}

response = requests.post(
    f"{API_HOST}/v1/text-to-speech/with-timestamps",
    headers=headers,
    json=payload,
    timeout=60,
)
response.raise_for_status()
data = response.json()

with open("output.wav", "wb") as f:
    f.write(base64.b64decode(data["audio"]))
print(f"저장 완료: duration={data['audio_duration']}초")
for w in (data.get("words") or [])[:3]:
    print(f"  단어: {w['text']!r} {w['start']:.3f}s - {w['end']:.3f}s")

const options = {
  method: 'POST',
  headers: {'X-API-KEY': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    voice_id: 'tc_60e5426de8b95f1d3000d7b5',
    text: '모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.',
    model: 'ssfm-v30',
    language: 'kor',
    prompt: {
      emotion_type: 'smart',
      previous_text: 'I feel like I\'m walking on air and I just want to scream with joy!',
      next_text: 'I am literally bursting with happiness and I never want this feeling to end!'
    },
    output: {
      target_lufs: -14,
      volume: 100,
      audio_pitch: 0,
      audio_tempo: 1,
      audio_format: 'wav'
    },
    seed: 42
  })
};

fetch('https://api.typecast.ai/v1/text-to-speech/with-timestamps', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.typecast.ai/v1/text-to-speech/with-timestamps",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'voice_id' => 'tc_60e5426de8b95f1d3000d7b5',
    'text' => '모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.',
    'model' => 'ssfm-v30',
    'language' => 'kor',
    'prompt' => [
        'emotion_type' => 'smart',
        'previous_text' => 'I feel like I\'m walking on air and I just want to scream with joy!',
        'next_text' => 'I am literally bursting with happiness and I never want this feeling to end!'
    ],
    'output' => [
        'target_lufs' => -14,
        'volume' => 100,
        'audio_pitch' => 0,
        'audio_tempo' => 1,
        'audio_format' => 'wav'
    ],
    'seed' => 42
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "X-API-KEY: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.typecast.ai/v1/text-to-speech/with-timestamps"

	payload := strings.NewReader("{\n  \"voice_id\": \"tc_60e5426de8b95f1d3000d7b5\",\n  \"text\": \"모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.\",\n  \"model\": \"ssfm-v30\",\n  \"language\": \"kor\",\n  \"prompt\": {\n    \"emotion_type\": \"smart\",\n    \"previous_text\": \"I feel like I'm walking on air and I just want to scream with joy!\",\n    \"next_text\": \"I am literally bursting with happiness and I never want this feeling to end!\"\n  },\n  \"output\": {\n    \"target_lufs\": -14,\n    \"volume\": 100,\n    \"audio_pitch\": 0,\n    \"audio_tempo\": 1,\n    \"audio_format\": \"wav\"\n  },\n  \"seed\": 42\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("X-API-KEY", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.typecast.ai/v1/text-to-speech/with-timestamps")
  .header("X-API-KEY", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"voice_id\": \"tc_60e5426de8b95f1d3000d7b5\",\n  \"text\": \"모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.\",\n  \"model\": \"ssfm-v30\",\n  \"language\": \"kor\",\n  \"prompt\": {\n    \"emotion_type\": \"smart\",\n    \"previous_text\": \"I feel like I'm walking on air and I just want to scream with joy!\",\n    \"next_text\": \"I am literally bursting with happiness and I never want this feeling to end!\"\n  },\n  \"output\": {\n    \"target_lufs\": -14,\n    \"volume\": 100,\n    \"audio_pitch\": 0,\n    \"audio_tempo\": 1,\n    \"audio_format\": \"wav\"\n  },\n  \"seed\": 42\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.typecast.ai/v1/text-to-speech/with-timestamps")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["X-API-KEY"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"voice_id\": \"tc_60e5426de8b95f1d3000d7b5\",\n  \"text\": \"모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다.\",\n  \"model\": \"ssfm-v30\",\n  \"language\": \"kor\",\n  \"prompt\": {\n    \"emotion_type\": \"smart\",\n    \"previous_text\": \"I feel like I'm walking on air and I just want to scream with joy!\",\n    \"next_text\": \"I am literally bursting with happiness and I never want this feeling to end!\"\n  },\n  \"output\": {\n    \"target_lufs\": -14,\n    \"volume\": 100,\n    \"audio_pitch\": 0,\n    \"audio_tempo\": 1,\n    \"audio_format\": \"wav\"\n  },\n  \"seed\": 42\n}"

response = http.request(request)
puts response.read_body

{
  "audio": "UklGRs...(base64 오디오 생략)",
  "audio_format": "wav",
  "audio_duration": 3.2,
  "words": [
    {
      "text": "집중력이",
      "start": 0.08,
      "end": 0.76
    },
    {
      "text": "떨어질",
      "start": 0.8,
      "end": 1.26
    },
    {
      "text": "땐",
      "start": 1.3,
      "end": 1.52
    },
    {
      "text": "5분간",
      "start": 1.56,
      "end": 2.02
    },
    {
      "text": "스트레칭을",
      "start": 2.06,
      "end": 2.7
    },
    {
      "text": "해보세요.",
      "start": 2.74,
      "end": 3.2
    }
  ],
  "characters": [
    {
      "text": "집",
      "start": 0.08,
      "end": 0.26
    },
    {
      "text": "중",
      "start": 0.26,
      "end": 0.43
    },
    {
      "text": "력",
      "start": 0.43,
      "end": 0.6
    },
    {
      "text": "이",
      "start": 0.6,
      "end": 0.76
    },
    {
      "text": " ",
      "start": 0.76,
      "end": 0.8
    },
    {
      "text": "떨",
      "start": 0.8,
      "end": 0.94
    },
    {
      "text": "어",
      "start": 0.94,
      "end": 1.1
    },
    {
      "text": "질",
      "start": 1.1,
      "end": 1.26
    },
    {
      "text": " ",
      "start": 1.26,
      "end": 1.3
    },
    {
      "text": "땐",
      "start": 1.3,
      "end": 1.52
    },
    {
      "text": " ",
      "start": 1.52,
      "end": 1.56
    },
    {
      "text": "5",
      "start": 1.56,
      "end": 1.68
    },
    {
      "text": "분",
      "start": 1.68,
      "end": 1.84
    },
    {
      "text": "간",
      "start": 1.84,
      "end": 2.02
    },
    {
      "text": " ",
      "start": 2.02,
      "end": 2.06
    },
    {
      "text": "스",
      "start": 2.06,
      "end": 2.18
    },
    {
      "text": "트",
      "start": 2.18,
      "end": 2.3
    },
    {
      "text": "레",
      "start": 2.3,
      "end": 2.42
    },
    {
      "text": "칭",
      "start": 2.42,
      "end": 2.56
    },
    {
      "text": "을",
      "start": 2.56,
      "end": 2.7
    },
    {
      "text": " ",
      "start": 2.7,
      "end": 2.74
    },
    {
      "text": "해",
      "start": 2.74,
      "end": 2.88
    },
    {
      "text": "보",
      "start": 2.88,
      "end": 3.02
    },
    {
      "text": "세",
      "start": 3.02,
      "end": 3.14
    },
    {
      "text": "요",
      "start": 3.14,
      "end": 3.18
    },
    {
      "text": ".",
      "start": 3.18,
      "end": 3.2
    }
  ]
}

{
  "detail": "Invalid voice_id"
}

{
  "detail": "Invalid API key"
}

{
  "detail": "Insufficient credit"
}

{
  "detail": "Voice not found"
}

{
  "detail": "Invalid request format"
}

{
  "detail": "Too many requests"
}

{
  "detail": "An unexpected error occurred"
}

인증

X-API-KEY

string

header

필수

인증을 위한 API 키. 타입캐스트 API 콘솔에서 API 키를 생성할 수 있습니다.

쿼리 매개변수

granularity

enum<string>

반환할 타임스탬프 배열을 선택합니다.

생략: words 와 characters 모두 반환
word: words 만 반환 (characters 는 null)
char: characters 만 반환 (words 는 null)

공백 없는 언어(예: jpn, zho): word 정렬은 문장 전체를 하나의 구간으로 반환하므로, 의미 있는 타임스탬프를 얻으려면 char 를 사용하세요.

사용 가능한 옵션:

word,

char

본문

application/json

TTSRequestWith-timestamps parameters

voice_id

string

필수

보이스 식별자. 두 가지 prefix 를 지원합니다.

tc_ — 기본 제공되는 타입캐스트 보이스 (예: tc_60e5426de8b95f1d3000d7b5). 사용 가능한 ID 는 보이스 목록 조회 를 참조하세요.
uc_ — 퀵 클로닝 으로 생성한 커스텀 보이스 (예: uc_64a1b2c3d4e5f6a7b8c9d0e1). 본인이 소유한 클로닝 보이스만 사용할 수 있습니다.

대소문자 구분: prefix 는 소문자만 사용합니다.

예시:

"tc_60e5426de8b95f1d3000d7b5"

text

string

필수

음성으로 변환할 텍스트. 최소 1자, 최대 2000자. 텍스트 길이에 따라 크레딧이 소비됩니다. 영어, 한국어, 일본어, 중국어를 포함한 여러 언어를 지원합니다. 특수 문자와 구두점은 자동으로 처리됩니다.

Required string length: 1 - 2000

예시:

"모든 것이 너무나 완벽해서 마치 꿈을 꾸는 것 같습니다."

model

enum<string>

필수

음성 합성에 사용할 보이스 모델.

ssfm-v30: 향상된 플로우와 추가 감정 프리셋이 있는 최신 모델(권장)
ssfm-v21: 빠르고 안정적인 모델로 신뢰할 수 있는 품질 제공

사용 가능한 옵션:

ssfm-v30,

ssfm-v21

예시:

"ssfm-v30"

language

string

ISO 639-3 표준을 따르는 언어 코드. 대소문자 구분 안 함("KOR"과 "kor" 모두 허용). 제공하지 않으면 텍스트 내용을 기반으로 자동 감지됩니다.

ssfm-v30 지원 언어 (37개)

코드	언어	코드	언어	코드	언어
ARA	아랍어	IND	인도네시아어	POR	포르투갈어
BEN	벵골어	ITA	이탈리아어	RON	루마니아어
BUL	불가리아어	JPN	일본어	RUS	러시아어
CES	체코어	KOR	한국어	SLK	슬로바키아어
DAN	덴마크어	MSA	말레이어	SPA	스페인어
DEU	독일어	NAN	민남어	SWE	스웨덴어
ELL	그리스어	NLD	네덜란드어	TAM	타밀어
ENG	영어	NOR	노르웨이어	TGL	타갈로그어
FIN	핀란드어	PAN	펀자브어	THA	태국어
FRA	프랑스어	POL	폴란드어	TUR	터키어
HIN	힌디어	UKR	우크라이나어	VIE	베트남어
HRV	크로아티아어	YUE	광둥어	ZHO	중국어
HUN	헝가리어

ssfm-v21 지원 언어 (27개)

코드	언어	코드	언어	코드	언어
ARA	아랍어	IND	인도네시아어	RON	루마니아어
BUL	불가리아어	ITA	이탈리아어	RUS	러시아어
CES	체코어	JPN	일본어	SLK	슬로바키아어
DAN	덴마크어	KOR	한국어	SPA	스페인어
DEU	독일어	MSA	말레이어	SWE	스웨덴어
ELL	그리스어	NLD	네덜란드어	TAM	타밀어
ENG	영어	POL	폴란드어	TGL	타갈로그어
FIN	핀란드어	POR	포르투갈어	UKR	우크라이나어
FRA	프랑스어	HRV	크로아티아어	ZHO	중국어

타임스탬프 엔드포인트 주의. 일본어(jpn) · 중국어(zho) 처럼 단어 사이에 공백이 없는 언어는 word 단위 정렬이 문장 전체를 하나의 구간으로 묶어 버립니다. 이런 언어에서는 항상 granularity=char 를 함께 지정해 문자 단위 타임스탬프를 받으세요.

예시:

"kor"

prompt

스마트 프롬프트 (ssfm-v30) · object

생성된 음성의 감정 및 스타일 설정.

스마트 프롬프트 (ssfm-v30)
프리셋 프롬프트 (ssfm-v30)
프롬프트 (ssfm-v21)

Show child attributes

예시:

{
  "emotion_type": "smart",
  "previous_text": "I feel like I'm walking on air and I just want to scream with joy!",
  "next_text": "I am literally bursting with happiness and I never want this feeling to end!"
}

output

Output · object

볼륨(0-200), 피치(-12~+12 반음), 템포(0.5배~2.0배), 형식(wav/mp3)을 포함한 오디오 출력 설정으로 최종 오디오 특성을 제어합니다

Show child attributes

seed

integer<uint32>

재현 가능한 음성 생성을 위한 부호 없는 정수 시드. 동일한 시드와 동일한 입력 파라미터로 항상 같은 오디오 결과를 생성합니다.

0 이상의 정수만 허용됩니다. 음수 값은 사용할 수 없습니다.
생략하면 서버가 매번 랜덤 시드를 생성하여 약간의 변이가 발생합니다.

필수 범위: 0 <= x <= 4294967295

예시:

42

응답

Success - Returns base64 audio and timestamps

TTS 생성 + 타임스탬프 정렬 통합 응답.

audio

string

필수

base64 로 인코딩된 오디오 바이트. audio_format 확장자로 디코딩해 파일로 저장할 수 있습니다.

audio_format

enum<string>

필수

audio 필드의 오디오 인코딩 포맷 — wav 또는 mp3 (요청의 output.audio_format 에 따라 결정).

사용 가능한 옵션:

wav,

mp3

audio_duration

number

필수

생성된 오디오의 길이(초).

words

AlignmentSegmentWord · object[] | null

필수

단어 단위 타임스탬프(문장부호 포함). 요청이 granularity=char 일 때는 null.

Show child attributes

characters

AlignmentSegmentCharacter · object[] | null

필수

문자 단위 타임스탬프(문장부호와 공백 포함). 요청이 granularity=word 일 때는 null.

Show child attributes

Compose text to speech Streaming text to speech

⌘I

Text-to-Speech

Voices

Subscription

타임스탬프 포함 텍스트 음성 변환(TTS with Timestamps)

인증

쿼리 매개변수

본문

응답