Dernière activité 1730823818

knox a révisé ce gist 1730823818. Aller à la révision

1 file changed, 40 insertions

ChatTTS-13.py(fichier créé)

@@ -0,0 +1,40 @@
1 + ###################################
2 + # Sample a speaker from Gaussian.
3 +
4 + rand_spk = chat.sample_random_speaker()
5 + print(rand_spk) # save it for later timbre recovery
6 +
7 + params_infer_code = ChatTTS.Chat.InferCodeParams(
8 + spk_emb = rand_spk, # add sampled speaker
9 + temperature = .3, # using custom temperature
10 + top_P = 0.7, # top P decode
11 + top_K = 20, # top K decode
12 + )
13 +
14 + ###################################
15 + # For sentence level manual control.
16 +
17 + # use oral_(0-9), laugh_(0-2), break_(0-7)
18 + # to generate special token in text to synthesize.
19 + params_refine_text = ChatTTS.Chat.RefineTextParams(
20 + prompt='[oral_2][laugh_0][break_6]',
21 + )
22 +
23 + wavs = chat.infer(
24 + texts,
25 + params_refine_text=params_refine_text,
26 + params_infer_code=params_infer_code,
27 + )
28 +
29 + ###################################
30 + # For word level manual control.
31 +
32 + text = 'What is [uv_break]your favorite english food?[laugh][lbreak]'
33 + wavs = chat.infer(text, skip_refine_text=True, params_refine_text=params_refine_text, params_infer_code=params_infer_code)
34 + """
35 + In some versions of torchaudio, the first line works but in other versions, so does the second line.
36 + """
37 + try:
38 + torchaudio.save("word_level_output.wav", torch.from_numpy(wavs[0]).unsqueeze(0), 24000)
39 + except:
40 + torchaudio.save("word_level_output.wav", torch.from_numpy(wavs[0]), 24000)
Plus récent Plus ancien