Skip to content

Commit

Permalink
bugfix for npu when sampling a real speaker.
Browse files Browse the repository at this point in the history
  • Loading branch information
shanshan shen committed Oct 18, 2024
1 parent 0045fcc commit 0460523
Showing 1 changed file with 18 additions and 8 deletions.
26 changes: 18 additions & 8 deletions ChatTTS/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,11 @@ def sample_random_speaker(self) -> str:
return self.speaker.sample_random()

def sample_audio_speaker(self, wav: Union[np.ndarray, torch.Tensor]) -> str:
return self.speaker.encode_prompt(self.dvae.sample_audio(wav))
sample_audio = self.dvae.sample_audio(wav)
if "npu" in str(self.device):
# reset dvae to npu
self.dvae.to(self.device)
return self.speaker.encode_prompt(sample_audio)

@dataclass(repr=False, eq=False)
class RefineTextParams:
Expand Down Expand Up @@ -268,13 +272,19 @@ def _load(
self.vocos = vocos
self.logger.log(logging.INFO, "vocos loaded.")

dvae = DVAE(
decoder_config=asdict(self.config.dvae.decoder),
encoder_config=asdict(self.config.dvae.encoder),
vq_config=asdict(self.config.dvae.vq),
dim=self.config.dvae.decoder.idim,
coef=coef,
device=device,
# Computation of MelSpectrogram on npu is not support now, use cpu fallback.
dvae_device = torch.device("cpu") if "npu" in str(self.device) else device
dvae = (
DVAE(
decoder_config=asdict(self.config.dvae.decoder),
encoder_config=asdict(self.config.dvae.encoder),
vq_config=asdict(self.config.dvae.vq),
dim=self.config.dvae.decoder.idim,
coef=coef,
device=dvae_device,
)
.to(dvae_device)
.eval()
)
coef = str(dvae)
assert dvae_ckpt_path, "dvae_ckpt_path should not be None"
Expand Down

0 comments on commit 0460523

Please sign in to comment.