From 362c462380091ef8692d933df3817e8640c06433 Mon Sep 17 00:00:00 2001 From: zhangchenchen Date: Sat, 14 Sep 2024 14:19:32 +0800 Subject: [PATCH 1/3] fix --- ChatTTS/core.py | 5 ++++- ChatTTS/model/speaker.py | 1 + ChatTTS/model/velocity/llm.py | 10 +++++++-- ChatTTS/model/velocity/llm_engine.py | 2 +- ChatTTS/model/velocity/model_runner.py | 31 +++++++++++++++++++++----- ChatTTS/model/velocity/sequence.py | 11 ++++++++- 6 files changed, 49 insertions(+), 11 deletions(-) diff --git a/ChatTTS/core.py b/ChatTTS/core.py index c38ad8957..ba8249983 100644 --- a/ChatTTS/core.py +++ b/ChatTTS/core.py @@ -523,6 +523,9 @@ def _infer_code( None, sample_params, input_ids, + use_refine=False, + spk_emb=params.spk_emb, + text_mask=text_mask, ) token_ids = [] @@ -625,7 +628,7 @@ def _refine_text( del input_ids result = gpt.llm.generate( - None, sample_params, input_ids_list, params.show_tqdm + None, sample_params, input_ids_list, params.show_tqdm, use_refine=True ) token_ids = [] hidden_states = [] diff --git a/ChatTTS/model/speaker.py b/ChatTTS/model/speaker.py index 5435922ab..63117aa81 100644 --- a/ChatTTS/model/speaker.py +++ b/ChatTTS/model/speaker.py @@ -64,6 +64,7 @@ def decorate_code_prompts( t.replace("[Stts]", "") .replace("[spk_emb]", "") .replace("[empty_spk]", "") + .replace("[Ebreak]", "") .strip() ) """ diff --git a/ChatTTS/model/velocity/llm.py b/ChatTTS/model/velocity/llm.py index a37f5cb34..a0b5a0e43 100644 --- a/ChatTTS/model/velocity/llm.py +++ b/ChatTTS/model/velocity/llm.py @@ -125,6 +125,9 @@ def generate( sampling_params: Optional[SamplingParams] = None, prompt_token_ids: Optional[List[List[int]]] = None, use_tqdm: bool = True, + use_refine: bool = False, + spk_emb: str = None, + text_mask = None, ) -> List[RequestOutput]: """Generates the completions for the input prompts. @@ -166,7 +169,7 @@ def generate( for i in range(num_requests): prompt = prompts[i] if prompts is not None else None token_ids = None if prompt_token_ids is None else prompt_token_ids[i] - self._add_request(prompt, sampling_params, token_ids) + self._add_request(prompt, sampling_params, token_ids, use_refine, spk_emb, text_mask) rtns = self._run_engine(use_tqdm) for i, rtn in enumerate(rtns): @@ -184,10 +187,13 @@ def _add_request( prompt: Optional[str], sampling_params: SamplingParams, prompt_token_ids: Optional[List[int]], + use_refine: bool, + spk_emb: str, + text_mask = None, ) -> None: request_id = str(next(self.request_counter)) self.llm_engine.add_request( - request_id, prompt, sampling_params, prompt_token_ids + request_id, prompt, sampling_params, prompt_token_ids, use_refine, spk_emb, text_mask ) def _run_engine(self, use_tqdm: bool) -> List[RequestOutput]: diff --git a/ChatTTS/model/velocity/llm_engine.py b/ChatTTS/model/velocity/llm_engine.py index 0d144d0fd..69edbeb0e 100644 --- a/ChatTTS/model/velocity/llm_engine.py +++ b/ChatTTS/model/velocity/llm_engine.py @@ -354,7 +354,7 @@ def add_request( # Create the sequences. block_size = self.cache_config.block_size seq_id = next(self.seq_counter) - seq = Sequence(seq_id, prompt, prompt_token_ids, block_size) + seq = Sequence(seq_id, prompt, prompt_token_ids, block_size, use_refine, spk_emb, text_mask) # Create the sequence group. seq_group = SequenceGroup(request_id, [seq], sampling_params, arrival_time) diff --git a/ChatTTS/model/velocity/model_runner.py b/ChatTTS/model/velocity/model_runner.py index e5c703e09..7f5df4073 100644 --- a/ChatTTS/model/velocity/model_runner.py +++ b/ChatTTS/model/velocity/model_runner.py @@ -122,6 +122,9 @@ def _prepare_prompt( prompt_tokens = seq_data.get_token_ids() prompt_len = len(prompt_tokens) prompt_lens.append(prompt_len) + use_refine = seq_data.use_refine + spk_emb = seq_data.spk_emb + text_mask = seq_data.text_mask input_tokens.append(prompt_tokens) # NOTE(woosuk): Here we assume that the first token in the prompt @@ -174,7 +177,7 @@ def _prepare_prompt( block_tables=None, use_cuda_graph=False, ) - return input_tokens, input_positions, input_metadata, prompt_lens + return input_tokens, input_positions, input_metadata, prompt_lens, use_refine, spk_emb, text_mask def _prepare_decode( self, @@ -354,13 +357,16 @@ def prepare_input_tensors( self, seq_group_metadata_list: Optional[List[SequenceGroupMetadata]], ) -> Tuple[torch.Tensor, torch.Tensor, InputMetadata, SamplingMetadata]: + use_refine = False + spk_emb = None + text_mask = None if self.is_driver_worker: # NOTE: We assume that all sequences in the group are all prompts or # all decodes. is_prompt = seq_group_metadata_list[0].is_prompt # Prepare input tensors. if is_prompt: - (input_tokens, input_positions, input_metadata, prompt_lens) = ( + (input_tokens, input_positions, input_metadata, prompt_lens, use_refine, spk_emb, text_mask) = ( self._prepare_prompt(seq_group_metadata_list) ) else: @@ -454,7 +460,7 @@ def get_size_or_none(x: Optional[torch.Tensor]): perform_sampling=False, ) - return input_tokens, input_positions, input_metadata, sampling_metadata + return input_tokens, input_positions, input_metadata, sampling_metadata, use_refine, spk_emb, text_mask @torch.inference_mode() def execute_model( @@ -462,7 +468,7 @@ def execute_model( seq_group_metadata_list: Optional[List[SequenceGroupMetadata]], kv_caches: List[Tuple[torch.Tensor, torch.Tensor]], ) -> Optional[SamplerOutput]: - input_tokens, input_positions, input_metadata, sampling_metadata = ( + input_tokens, input_positions, input_metadata, sampling_metadata, use_refine, spk_emb, text_mask = ( self.prepare_input_tensors(seq_group_metadata_list) ) # print(sampling_metadata.seq_data) @@ -495,8 +501,11 @@ def execute_model( input_tokens_history = input_tokens_history.unsqueeze(2).repeat(1, 1, 4) # print(input_tokens_history.shape) # print("it2",input_tokens.shape) - text_mask = input_tokens != 0 - text_mask = text_mask[:, :, 0] + # text_mask = input_tokens != 0 + # text_mask = text_mask[:, :, 0] + if text_mask is None: + text_mask = input_tokens != 0 + text_mask = text_mask[:, :, 0] if input_metadata.use_cuda_graph: graph_batch_size = input_tokens.shape[0] @@ -533,6 +542,16 @@ def execute_model( ) else: input_emb = self.post_model(input_tokens, text_mask) + if not use_refine: + if spk_emb is not None: + self.speaker.apply( + input_emb, + spk_emb, + input_tokens, + 21143, + 'cuda:0', + ) + # print(input_emb.shape) hidden_states = model_executable( input_emb=input_emb, diff --git a/ChatTTS/model/velocity/sequence.py b/ChatTTS/model/velocity/sequence.py index 76f9cf4e7..d31bcc16b 100644 --- a/ChatTTS/model/velocity/sequence.py +++ b/ChatTTS/model/velocity/sequence.py @@ -65,12 +65,18 @@ class SequenceData: def __init__( self, prompt_token_ids: List[int], + use_refine: bool = True, + spk_emb: str = None, + text_mask = None, ) -> None: self.prompt_token_ids = prompt_token_ids self.output_token_ids: List[int] = [] self.cumulative_logprob = 0.0 self.hidden_states: Optional[torch.Tensor] = None self.finished = False + self.use_refine = use_refine + self.spk_emb = spk_emb + self.text_mask = text_mask def append_token_id(self, token_id: int, logprob: float) -> None: if isinstance(self.cumulative_logprob, float): @@ -132,12 +138,15 @@ def __init__( prompt: str, prompt_token_ids: List[int], block_size: int, + use_refine: bool, + spk_emb: str, + text_mask, ) -> None: self.seq_id = seq_id self.prompt = prompt self.block_size = block_size - self.data = SequenceData(prompt_token_ids) + self.data = SequenceData(prompt_token_ids, use_refine, spk_emb, text_mask) self.output_logprobs: SampleLogprobs = [] self.output_text = "" From 82f6dfd91e128b7a43a6ced4dcd3e2e65adf2dcc Mon Sep 17 00:00:00 2001 From: zhangchenchen Date: Sat, 14 Sep 2024 16:14:16 +0800 Subject: [PATCH 2/3] fix --- ChatTTS/model/velocity/llm.py | 2 +- ChatTTS/model/velocity/llm_engine.py | 3 +++ ChatTTS/model/velocity/model_runner.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/ChatTTS/model/velocity/llm.py b/ChatTTS/model/velocity/llm.py index a0b5a0e43..a68b154f0 100644 --- a/ChatTTS/model/velocity/llm.py +++ b/ChatTTS/model/velocity/llm.py @@ -193,7 +193,7 @@ def _add_request( ) -> None: request_id = str(next(self.request_counter)) self.llm_engine.add_request( - request_id, prompt, sampling_params, prompt_token_ids, use_refine, spk_emb, text_mask + request_id, prompt, sampling_params, prompt_token_ids, use_refine = use_refine, spk_emb = spk_emb, text_mask = text_mask ) def _run_engine(self, use_tqdm: bool) -> List[RequestOutput]: diff --git a/ChatTTS/model/velocity/llm_engine.py b/ChatTTS/model/velocity/llm_engine.py index 69edbeb0e..57327a128 100644 --- a/ChatTTS/model/velocity/llm_engine.py +++ b/ChatTTS/model/velocity/llm_engine.py @@ -330,6 +330,9 @@ def add_request( sampling_params: SamplingParams, prompt_token_ids: Optional[List[int]] = None, arrival_time: Optional[float] = None, + use_refine: bool = False, + spk_emb: str = None, + text_mask = None, ) -> None: """Add a request to the engine's request pool. diff --git a/ChatTTS/model/velocity/model_runner.py b/ChatTTS/model/velocity/model_runner.py index 7f5df4073..6c465dd99 100644 --- a/ChatTTS/model/velocity/model_runner.py +++ b/ChatTTS/model/velocity/model_runner.py @@ -77,6 +77,23 @@ def __init__( # cache in_wsl result self.in_wsl = in_wsl() + from ...config import Config + self.config = Config() + + from ..speaker import Speaker + device = None + if device is None: + device = select_device() + self.logger.info("use device %s", str(device)) + + self.speaker = Speaker( + self.config.gpt.hidden_size, self.config.spk_stat, device + ) + # spek_sta = "愐穤巩噅廷戇笉屈癐媄垹垧帶爲漈塀殐慄亅倴庲舴猂瑈圐狴夥圓帍戛挠腉耐劤坽喳幾战謇聀崒栄呥倸庭燡欈杁襐褄乭埗幺爃弔摁斐捔兕佖廐舏竾豃磐姓趡佄幒爚欄豄讐皳訵仩帆投謌荃蝐叄圝伆幦抂茁呄掑斃讹傮庞爣蜀橁偐祄亥兡常爂欍扉丐浔佱僈強払伅扂蛐徴憍傞巀戺欀艂琐嗴啥値彷刂權穈扒卤俔贲庛初笂卄贐枴仭亁庛剎猢扃缐趤刁偵幪舏伌煁婐潤晍位弾舙茥穁葏蠣訑企庤刊笍橁溑僔云偁庯戚伍潉膐脴僵噔廃艅匊祂唐憴壝嗙席爥欁虁谐牴帽势弿牳蜁兀蛐傄喩丿帔刔圆衁廐罤庁促帙劢伈汄樐檄勵伴弝舑欍罅虐昴劭勅帜刼朊蕁虐蓴樑伫幨扑謪剀堐稴丵伱弐舮諸赁習俔容厱幫牶謃孄糐答嗝僊帜燲笄終瀒判久僤帘爴茇千孑冄凕佳引扐蜁歁缏裄剽儺恘爋朏眿廐呄塍嘇幻爱茠詁訐剴唭俐幾戊欀硁菐贄楕偒巡爀弎屄莐睳賙凶彎刅漄區唐溴剑劋庽舽猄煃跐夔惥伾庮舎伈罁垑坄怅业怯刁朇獁嶏覔坩俳巶爜朐潁崐萄俹凛常爺笌穀聐此夡倛帡刀匉終窏舣販侽怿扉伥贿憐忓謩姆幌犊漂慆癒却甝兎帼戏欅詂浐朔仹壭帰臷弎恇菐獤帡偖帘爞伅腂皐纤囅充幓戠伥灂丐訤戱倱弋爮嬌癁恐孄侥劬忶刓國詀桒古偩嘄庬戚茝赂监燤嘑勌幦舽持呂諐棤姑再底舡笍艃瀐孴倉傔弋爔猠乁濑塄偽嘧恂舛缇襃厐窴仡刱忕別漇穁岏缴廽价庌爊謈硄讑惤倁儂庭爋伇蝂嶐莔摝傠库刞茄歃戏薤伍伯廮创笠塄熐兴勽俄帅剉最腀砐敤卝侍弆戺朒虃旐蚄梕亖幔牻朣扅贐玔堝噅帡剌圅摀崐彤流僳庙爖嬇啁渐悤堁丛幆刧挜彃悐幤刹嚟恕芁看聀摐焔向乁帖爭欁癃糒圄弙佱廜戤謍婀咐昴焍亩廦艏拼謿芐癤怹兽幸舳朇畁喐稔毝丼弈懲挀譂勑哴啁伎常舭笯晁堑俄叩剔廟爍欦絁夒伤休傑廳戌蜅潆癐彴摑勯床刽欅艁砐忄搉从廡舊猥潂唐委仱僜廼爤朄呃弐礔滵垓幩爄挂筁乐籤刕凟幵爠弉癅乑吴勥伖帪舩茆婁碐幤叭乢巜艳猁桀桐啄唩俊幍舮猀艅焐螔琽亀帋爜缅噃咐斤喩予幩爛笆摀浐猴依侹幃刕園慄蛐栤澹仑座爼謉桃慐浔斕偻幛懰嬓衁愐氄悅仿应芔漄衃敐謤傁匩幹抃圉癄廐裄屵噉幍利謍聂搐蛔嚙坍怗舁圐畃膐栄刵东巆戤諾呃偑媤嗨跞忶爝眄祂朒嶔僭劉忾刐匋癄袐翴珅僷廲芄茈恈皐擄崑伄廉牍匃剃犏澤唑丄庺戃伃煀某杄偙亽帴切缌罄挐尴噙倰带舞漄橄塐糴俩僯帀般漀坂栐更両俇廱舌猁慂拐偤嶱卶应刪眉獁茐伔嘅偺帟舊漂恀栐暄喡乞庙舆匂敀潑恔劑侖延戦盽怶唯慳蝘蟃孫娎益袰玍屃痶翮笪儚裀倹椌玻翀詵筽舘惯堿某侰晈藏缮詗廦夸妎瑻瀒裔媀憞唃冶璭狻渠荑奬熹茅愺氰菣滠翦岓褌泣崲嚭欓湒聙宺爄蛅愸庍匃帆誔穮懌蓪玷澌氋抌訙屌臞廛玸听屺希疭孝凂紋新煎彃膲跱尪懁眆窴珏卓揨菸紭概囥显壌榄垫嘮嬭覤媸侵佮烒耸觌婀秋狃帹葯訤桜糨笾腢伀肶悍炂艤禖岅臺惘梷瞍友盁佨岧憳瓧嘴汬藊愌蘤嶠硴绤蜲襏括勾谂縨妥蓪澭竭萢藜纞糲煮愆瀯孯琓罂諺塿燗狟弙衯揻縷丱糅臄梱瀮杰巳猙亊符胠匃泀廏圃膂蒃籏礩岈簹缌劺燲褡孓膜拔蠿觮呋煣厌尷熜論弲牭紫寊誃紀橴賬傸箍弚窃侫簲慯烣渽祌壓媥噜夽夛諛玹疮禄冪謇媽衤盰缺繑薫兾萧嵱打滽箺嚯凣狢蠜崼覽烸簶盯籓摀苶峸懗泲涻凮愳緗剋笔懆廡瞿椏礤惐藥崍腈烄伹亯昣翬褍絋桫僨吨莌丛矄蜞娈憊苆塁蓏嚢嫼绻崱婋囱蠸篯晣芀繼索兓僖誹岯圪褰蠇唓妷胅巁渮砛傈蝷嵚冃購赁峍裋荂舾符熻岳墩寮粃凲袑彚太绲头摯繳狁俥籌冝諝註坎幫擤詒宒凕賐唶梎噔弼課屿覍囨焬櫱撪蝮蝬簸懰櫫涺嵍睻屪翔峞慘滟熲昱军烊舿尦舄糖奁溏凂彆蝲糴禍困皻灏牋睒诙嶱臀开蓈眎腼丢纻廏憤嫖暭袭崲肸螛妒榗紉谨窮袃瑠聍绊腆亿冲葐喋縔詖岑兾给堸赏旻桀蛨媆訂峦紷敯囬偐筨岸焸拭笵殒哜墒萍屓娓諙械臮望摰芑寭准僞谹氍旋憢菮屃划欣瘫谎蘻哐繁籥禦僿誵皯墓燀縿笞熦绗稹榎矻綞蓓帡戓沺区才畃洊詪糐裶盰窶耎偌劂誐庩惝滜沺哮呃煐譠崄槀猄肼蔐擋湌蠺篃恥諌瞦宍堫挪裕崑慩狲悠煋仛愞砈粵八棁害楐妋萔貨尵奂苰怫誎傫岆蕯屇脉夈仆茎刓繸芺壸碗曛汁戭炻獻凉媁兎狜爴怰賃纎袏娷禃蓥膹薪渻罸窿粫凾褄舺窮墫干苊繁冏僮訸夯绛蓪虛羽慲烏憷趎睊蠰莍塞成廎盁欏喓蜮譤崆楁囘矇薭伣艘虝帴奮苢渶虎暣翐蝃尾稈糶瀴罐嵚氮葯笫慐棌悶炯竻爅们媡姢嫺窷刮歫劈裩屬椕賑蜹薊刲義哯尗褦瓀稾礋揣窼舫尋姁椄侸嗫珺修纘媃腽蛛稹梭呛瀈蘟縀礉論夵售主梮蠉娅娭裀誼嶭観枳倊簈褃擞綿催瞃溶苊笛襹櫲盅六囫獩佃粨慯瓢眸旱荃婨蔞岋祗墼焻网牻琖詆峋秉胳媴袭澓賢経稟壩胫碯偏囫嶎纆窈槊賐撹璬莃缘誾宭愊眗喷监劋萘訯總槿棭戾墮犄恌縈簍樥蛔杁袭嫛憫倆篏墵賈羯茎觳蒜致娢慄勒覸蘍曲栂葭宆妋皽缽免盳猼蔂糥觧烳檸佯憓煶蔐筼种繷琲膌塄剰讎対腕棥渽忲俛浪譬秛惛壒嘸淫冻曄睻砃奫貯庴爅粓脮脡娎妖峵蘲討惋泊蠀㴆" + # self.speaker = Speaker( + # 768, spek_sta, 'cuda:0' + # ) + def load_model(self) -> None: self.model = get_model(self.model_config) self.post_model = Embed( From b2bd01407e8a5ecda4a79f1e6a4d8321e9399101 Mon Sep 17 00:00:00 2001 From: zhangchenchen Date: Sat, 14 Sep 2024 16:37:22 +0800 Subject: [PATCH 3/3] fix --- ChatTTS/model/velocity/model_runner.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/ChatTTS/model/velocity/model_runner.py b/ChatTTS/model/velocity/model_runner.py index 6c465dd99..c38fb5cd3 100644 --- a/ChatTTS/model/velocity/model_runner.py +++ b/ChatTTS/model/velocity/model_runner.py @@ -81,18 +81,14 @@ def __init__( self.config = Config() from ..speaker import Speaker + from ...utils import select_device device = None if device is None: - device = select_device() - self.logger.info("use device %s", str(device)) + self.device = select_device() self.speaker = Speaker( self.config.gpt.hidden_size, self.config.spk_stat, device ) - # spek_sta = "愐穤巩噅廷戇笉屈癐媄垹垧帶爲漈塀殐慄亅倴庲舴猂瑈圐狴夥圓帍戛挠腉耐劤坽喳幾战謇聀崒栄呥倸庭燡欈杁襐褄乭埗幺爃弔摁斐捔兕佖廐舏竾豃磐姓趡佄幒爚欄豄讐皳訵仩帆投謌荃蝐叄圝伆幦抂茁呄掑斃讹傮庞爣蜀橁偐祄亥兡常爂欍扉丐浔佱僈強払伅扂蛐徴憍傞巀戺欀艂琐嗴啥値彷刂權穈扒卤俔贲庛初笂卄贐枴仭亁庛剎猢扃缐趤刁偵幪舏伌煁婐潤晍位弾舙茥穁葏蠣訑企庤刊笍橁溑僔云偁庯戚伍潉膐脴僵噔廃艅匊祂唐憴壝嗙席爥欁虁谐牴帽势弿牳蜁兀蛐傄喩丿帔刔圆衁廐罤庁促帙劢伈汄樐檄勵伴弝舑欍罅虐昴劭勅帜刼朊蕁虐蓴樑伫幨扑謪剀堐稴丵伱弐舮諸赁習俔容厱幫牶謃孄糐答嗝僊帜燲笄終瀒判久僤帘爴茇千孑冄凕佳引扐蜁歁缏裄剽儺恘爋朏眿廐呄塍嘇幻爱茠詁訐剴唭俐幾戊欀硁菐贄楕偒巡爀弎屄莐睳賙凶彎刅漄區唐溴剑劋庽舽猄煃跐夔惥伾庮舎伈罁垑坄怅业怯刁朇獁嶏覔坩俳巶爜朐潁崐萄俹凛常爺笌穀聐此夡倛帡刀匉終窏舣販侽怿扉伥贿憐忓謩姆幌犊漂慆癒却甝兎帼戏欅詂浐朔仹壭帰臷弎恇菐獤帡偖帘爞伅腂皐纤囅充幓戠伥灂丐訤戱倱弋爮嬌癁恐孄侥劬忶刓國詀桒古偩嘄庬戚茝赂监燤嘑勌幦舽持呂諐棤姑再底舡笍艃瀐孴倉傔弋爔猠乁濑塄偽嘧恂舛缇襃厐窴仡刱忕別漇穁岏缴廽价庌爊謈硄讑惤倁儂庭爋伇蝂嶐莔摝傠库刞茄歃戏薤伍伯廮创笠塄熐兴勽俄帅剉最腀砐敤卝侍弆戺朒虃旐蚄梕亖幔牻朣扅贐玔堝噅帡剌圅摀崐彤流僳庙爖嬇啁渐悤堁丛幆刧挜彃悐幤刹嚟恕芁看聀摐焔向乁帖爭欁癃糒圄弙佱廜戤謍婀咐昴焍亩廦艏拼謿芐癤怹兽幸舳朇畁喐稔毝丼弈懲挀譂勑哴啁伎常舭笯晁堑俄叩剔廟爍欦絁夒伤休傑廳戌蜅潆癐彴摑勯床刽欅艁砐忄搉从廡舊猥潂唐委仱僜廼爤朄呃弐礔滵垓幩爄挂筁乐籤刕凟幵爠弉癅乑吴勥伖帪舩茆婁碐幤叭乢巜艳猁桀桐啄唩俊幍舮猀艅焐螔琽亀帋爜缅噃咐斤喩予幩爛笆摀浐猴依侹幃刕園慄蛐栤澹仑座爼謉桃慐浔斕偻幛懰嬓衁愐氄悅仿应芔漄衃敐謤傁匩幹抃圉癄廐裄屵噉幍利謍聂搐蛔嚙坍怗舁圐畃膐栄刵东巆戤諾呃偑媤嗨跞忶爝眄祂朒嶔僭劉忾刐匋癄袐翴珅僷廲芄茈恈皐擄崑伄廉牍匃剃犏澤唑丄庺戃伃煀某杄偙亽帴切缌罄挐尴噙倰带舞漄橄塐糴俩僯帀般漀坂栐更両俇廱舌猁慂拐偤嶱卶应刪眉獁茐伔嘅偺帟舊漂恀栐暄喡乞庙舆匂敀潑恔劑侖延戦盽怶唯慳蝘蟃孫娎益袰玍屃痶翮笪儚裀倹椌玻翀詵筽舘惯堿某侰晈藏缮詗廦夸妎瑻瀒裔媀憞唃冶璭狻渠荑奬熹茅愺氰菣滠翦岓褌泣崲嚭欓湒聙宺爄蛅愸庍匃帆誔穮懌蓪玷澌氋抌訙屌臞廛玸听屺希疭孝凂紋新煎彃膲跱尪懁眆窴珏卓揨菸紭概囥显壌榄垫嘮嬭覤媸侵佮烒耸觌婀秋狃帹葯訤桜糨笾腢伀肶悍炂艤禖岅臺惘梷瞍友盁佨岧憳瓧嘴汬藊愌蘤嶠硴绤蜲襏括勾谂縨妥蓪澭竭萢藜纞糲煮愆瀯孯琓罂諺塿燗狟弙衯揻縷丱糅臄梱瀮杰巳猙亊符胠匃泀廏圃膂蒃籏礩岈簹缌劺燲褡孓膜拔蠿觮呋煣厌尷熜論弲牭紫寊誃紀橴賬傸箍弚窃侫簲慯烣渽祌壓媥噜夽夛諛玹疮禄冪謇媽衤盰缺繑薫兾萧嵱打滽箺嚯凣狢蠜崼覽烸簶盯籓摀苶峸懗泲涻凮愳緗剋笔懆廡瞿椏礤惐藥崍腈烄伹亯昣翬褍絋桫僨吨莌丛矄蜞娈憊苆塁蓏嚢嫼绻崱婋囱蠸篯晣芀繼索兓僖誹岯圪褰蠇唓妷胅巁渮砛傈蝷嵚冃購赁峍裋荂舾符熻岳墩寮粃凲袑彚太绲头摯繳狁俥籌冝諝註坎幫擤詒宒凕賐唶梎噔弼課屿覍囨焬櫱撪蝮蝬簸懰櫫涺嵍睻屪翔峞慘滟熲昱军烊舿尦舄糖奁溏凂彆蝲糴禍困皻灏牋睒诙嶱臀开蓈眎腼丢纻廏憤嫖暭袭崲肸螛妒榗紉谨窮袃瑠聍绊腆亿冲葐喋縔詖岑兾给堸赏旻桀蛨媆訂峦紷敯囬偐筨岸焸拭笵殒哜墒萍屓娓諙械臮望摰芑寭准僞谹氍旋憢菮屃划欣瘫谎蘻哐繁籥禦僿誵皯墓燀縿笞熦绗稹榎矻綞蓓帡戓沺区才畃洊詪糐裶盰窶耎偌劂誐庩惝滜沺哮呃煐譠崄槀猄肼蔐擋湌蠺篃恥諌瞦宍堫挪裕崑慩狲悠煋仛愞砈粵八棁害楐妋萔貨尵奂苰怫誎傫岆蕯屇脉夈仆茎刓繸芺壸碗曛汁戭炻獻凉媁兎狜爴怰賃纎袏娷禃蓥膹薪渻罸窿粫凾褄舺窮墫干苊繁冏僮訸夯绛蓪虛羽慲烏憷趎睊蠰莍塞成廎盁欏喓蜮譤崆楁囘矇薭伣艘虝帴奮苢渶虎暣翐蝃尾稈糶瀴罐嵚氮葯笫慐棌悶炯竻爅们媡姢嫺窷刮歫劈裩屬椕賑蜹薊刲義哯尗褦瓀稾礋揣窼舫尋姁椄侸嗫珺修纘媃腽蛛稹梭呛瀈蘟縀礉論夵售主梮蠉娅娭裀誼嶭観枳倊簈褃擞綿催瞃溶苊笛襹櫲盅六囫獩佃粨慯瓢眸旱荃婨蔞岋祗墼焻网牻琖詆峋秉胳媴袭澓賢経稟壩胫碯偏囫嶎纆窈槊賐撹璬莃缘誾宭愊眗喷监劋萘訯總槿棭戾墮犄恌縈簍樥蛔杁袭嫛憫倆篏墵賈羯茎觳蒜致娢慄勒覸蘍曲栂葭宆妋皽缽免盳猼蔂糥觧烳檸佯憓煶蔐筼种繷琲膌塄剰讎対腕棥渽忲俛浪譬秛惛壒嘸淫冻曄睻砃奫貯庴爅粓脮脡娎妖峵蘲討惋泊蠀㴆" - # self.speaker = Speaker( - # 768, spek_sta, 'cuda:0' - # ) def load_model(self) -> None: self.model = get_model(self.model_config) @@ -566,7 +562,7 @@ def execute_model( spk_emb, input_tokens, 21143, - 'cuda:0', + self.device, ) # print(input_emb.shape)