knox a révisé ce gist . Aller à la révision
1 file changed, 40 insertions
ChatTTS-13.py(fichier créé)
@@ -0,0 +1,40 @@ | |||
1 | + | ################################### | |
2 | + | # Sample a speaker from Gaussian. | |
3 | + | ||
4 | + | rand_spk = chat.sample_random_speaker() | |
5 | + | print(rand_spk) # save it for later timbre recovery | |
6 | + | ||
7 | + | params_infer_code = ChatTTS.Chat.InferCodeParams( | |
8 | + | spk_emb = rand_spk, # add sampled speaker | |
9 | + | temperature = .3, # using custom temperature | |
10 | + | top_P = 0.7, # top P decode | |
11 | + | top_K = 20, # top K decode | |
12 | + | ) | |
13 | + | ||
14 | + | ################################### | |
15 | + | # For sentence level manual control. | |
16 | + | ||
17 | + | # use oral_(0-9), laugh_(0-2), break_(0-7) | |
18 | + | # to generate special token in text to synthesize. | |
19 | + | params_refine_text = ChatTTS.Chat.RefineTextParams( | |
20 | + | prompt='[oral_2][laugh_0][break_6]', | |
21 | + | ) | |
22 | + | ||
23 | + | wavs = chat.infer( | |
24 | + | texts, | |
25 | + | params_refine_text=params_refine_text, | |
26 | + | params_infer_code=params_infer_code, | |
27 | + | ) | |
28 | + | ||
29 | + | ################################### | |
30 | + | # For word level manual control. | |
31 | + | ||
32 | + | text = 'What is [uv_break]your favorite english food?[laugh][lbreak]' | |
33 | + | wavs = chat.infer(text, skip_refine_text=True, params_refine_text=params_refine_text, params_infer_code=params_infer_code) | |
34 | + | """ | |
35 | + | In some versions of torchaudio, the first line works but in other versions, so does the second line. | |
36 | + | """ | |
37 | + | try: | |
38 | + | torchaudio.save("word_level_output.wav", torch.from_numpy(wavs[0]).unsqueeze(0), 24000) | |
39 | + | except: | |
40 | + | torchaudio.save("word_level_output.wav", torch.from_numpy(wavs[0]), 24000) |
Plus récent
Plus ancien