ChatTTS-14.py
· 774 B · Python
Sin formato
inputs_en = """
chat T T S is a text to speech model designed for dialogue applications.
[uv_break]it supports mixed language input [uv_break]and offers multi speaker
capabilities with precise control over prosodic elements like
[uv_break]laughter[uv_break][laugh], [uv_break]pauses, [uv_break]and intonation.
[uv_break]it delivers natural and expressive speech,[uv_break]so please
[uv_break] use the project responsibly at your own risk.[uv_break]
""".replace('\n', '') # English is still experimental.
params_refine_text = ChatTTS.Chat.RefineTextParams(
prompt='[oral_2][laugh_0][break_4]',
)
audio_array_en = chat.infer(inputs_en, params_refine_text=params_refine_text)
torchaudio.save("self_introduction_output.wav", torch.from_numpy(audio_array_en[0]), 24000)
1 | inputs_en = """ |
2 | chat T T S is a text to speech model designed for dialogue applications. |
3 | [uv_break]it supports mixed language input [uv_break]and offers multi speaker |
4 | capabilities with precise control over prosodic elements like |
5 | [uv_break]laughter[uv_break][laugh], [uv_break]pauses, [uv_break]and intonation. |
6 | [uv_break]it delivers natural and expressive speech,[uv_break]so please |
7 | [uv_break] use the project responsibly at your own risk.[uv_break] |
8 | """.replace('\n', '') # English is still experimental. |
9 | |
10 | params_refine_text = ChatTTS.Chat.RefineTextParams( |
11 | prompt='[oral_2][laugh_0][break_4]', |
12 | ) |
13 | |
14 | audio_array_en = chat.infer(inputs_en, params_refine_text=params_refine_text) |
15 | torchaudio.save("self_introduction_output.wav", torch.from_numpy(audio_array_en[0]), 24000) |