am_stat: phones_dict: tones_dict: speaker_dict: # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', 'pwgan_vctk', 'mb_melgan_csmsc'] voc: 'pwgan_csmsc' voc_config: voc_ckpt: voc_stat: # others lang: 'zh' device: # set 'gpu:id' or 'cpu' ##...
(text_dict.items()) merge_sentences = True fs = 24000 for utt_id, sentence in sentences: am_output_data = get_am_output( input=sentence, am_predictor=am_predictor, am="fastspeech2_mix", frontend=frontend, lang="mix", merge_sentences=merge_sentences, speaker_dict=os.path.join(am_...
speaker_encoder = LSTMSpeakerEncoder( n_mels=40, num_layers=3, hidden_size=256, output_size=256) speaker_encoder.set_state_dict(paddle.load(self.ge2e_params_path)) speaker_encoder.eval() self.speaker_encoder = speaker_encoder print("GE2E Done!") with open(self.phones_dict, "r") as ...
# 加载模型,第一次使用 vec_executor 时候会下载模型 # speaker encoder vec_executor = VectorExecutor() frontend = Frontend(phone_vocab_path=phones_dict) print("frontend done!") # acoustic model am_inference = get_am_inference( am=am, am_config=am_config, am_ckpt=am_ckpt, am_stat=am_stat...
speaker_dict=os.path.join(am_inference_dir, "phone_id_map.txt"), spk_id=0, ) wav = get_voc_output( voc_predictor=voc_predictor, input=am_output_data) # 保存文件 sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs) ...
# 加载模型,第一次使用 vec_executor 时候会下载模型 # speaker encoder vec_executor = VectorExecutor() frontend = Frontend(phone_vocab_path=phones_dict) print("frontend done!") # acoustic model am_inference = get_am_inference( am=am, am_config=am_config, am_ckpt=am_ckpt, am_stat=am_stat...
speaker_dict=os.path.join(am_inference_dir, "phone_id_map.txt"), spk_id=0, ) wav = get_voc_output( voc_predictor=voc_predictor, input=am_output_data) # 保存文件 sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs) ...
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA] [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR] [--ngpu NGPU] [--phones-dict PHONES_DICT] [--speaker-dict SPEAKER_DICT] [--voice-cloning VOICE_CLONING] Train a VITS model. optional arguments: -h,...
Skip to content Navigation Menu Product Solutions Resources Open Source Enterprise Pricing Search or jump to...
speaker_dict=os.path.join(am_inference_dir,"phone_id_map.txt"), spk_id=0, ) wav = get_voc_output( voc_predictor=voc_predictor,input=am_output_data)# 保存文件sf.write(output_dir / (utt_id +".wav"), wav, samplerate=fs)if__name__ =='__main__': ...