Zuletzt aktiv 1730823818

Änderung ee983f3806b5029dff480520709ce27a837d684d

ChatTTS-13.py Orginalformat
1###################################
2# Sample a speaker from Gaussian.
3
4rand_spk = chat.sample_random_speaker()
5print(rand_spk) # save it for later timbre recovery
6
7params_infer_code = ChatTTS.Chat.InferCodeParams(
8 spk_emb = rand_spk, # add sampled speaker
9 temperature = .3, # using custom temperature
10 top_P = 0.7, # top P decode
11 top_K = 20, # top K decode
12)
13
14###################################
15# For sentence level manual control.
16
17# use oral_(0-9), laugh_(0-2), break_(0-7)
18# to generate special token in text to synthesize.
19params_refine_text = ChatTTS.Chat.RefineTextParams(
20 prompt='[oral_2][laugh_0][break_6]',
21)
22
23wavs = chat.infer(
24 texts,
25 params_refine_text=params_refine_text,
26 params_infer_code=params_infer_code,
27)
28
29###################################
30# For word level manual control.
31
32text = 'What is [uv_break]your favorite english food?[laugh][lbreak]'
33wavs = chat.infer(text, skip_refine_text=True, params_refine_text=params_refine_text, params_infer_code=params_infer_code)
34"""
35In some versions of torchaudio, the first line works but in other versions, so does the second line.
36"""
37try:
38 torchaudio.save("word_level_output.wav", torch.from_numpy(wavs[0]).unsqueeze(0), 24000)
39except:
40 torchaudio.save("word_level_output.wav", torch.from_numpy(wavs[0]), 24000)