Skip to content

Commit

Permalink
BugFix
Browse files Browse the repository at this point in the history
  • Loading branch information
NaruseMioShirakana committed Jun 2, 2024
1 parent 604fc92 commit 7597c82
Show file tree
Hide file tree
Showing 11 changed files with 392 additions and 250 deletions.
15 changes: 15 additions & 0 deletions DotNetApi/LibSvcApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,11 @@ public UnionModel(void* _Obj)
throw new Exception(GetError(0));
}

public void* GetModel()
{
return Model_;
}

public Int16Vector Inference(
Slice _Slice,
ref Params _InferParams,
Expand Down Expand Up @@ -592,11 +597,21 @@ public Params() { }

public int UseShallowDiffusion = 0; //使用浅扩散
public void* _VocoderModel = null;
public void* _ShallowDiffusionModel = null;
public int ShallowDiffusionUseSrcAudio = 1;
public int VocoderHopSize = 512;
public int VocoderMelBins = 128;
public int VocoderSamplingRate = 44100;
public long ShallowDiffuisonSpeaker = 0;

public void SetVocoder(ref VocoderModel Vocoder)
{
_VocoderModel = Vocoder.GetModel();
}
public void SetShallowDiffusion(ref UnionModel UnionMod)
{
_ShallowDiffusionModel = UnionMod.GetModel();
}
};

[StructLayout(LayoutKind.Sequential, Pack = 4, CharSet = CharSet.Unicode)]
Expand Down
27 changes: 24 additions & 3 deletions libsvc/Api/header/NativeApi.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
#pragma once
#include "../../framework.h"
#ifdef __GNUC__
#define LibSvcDeprecated __attribute__((deprecated))
#else
#ifdef _MSC_VER
#define LibSvcDeprecated __declspec(deprecated)
#endif
#endif
#ifdef _WIN32
#include "windows.h"
#endif
Expand Down Expand Up @@ -72,8 +79,14 @@ extern "C" {
LPWSTR Sampler; //Diffusion采样器 ["Pndm" "DDim"]
LPWSTR ReflowSampler; //Reflow采样器 ["Eular" "Rk4" "Heun" "Pecece"]
LPWSTR F0Method; //F0提取算法 ["Dio" "Harvest" "RMVPE" "FCPE"]
INT32 UseShallowDiffusion; //是否使用浅扩散 [0(false)/1(true)]
INT32 UseShallowDiffusionOrEnhancer; //是否使用浅扩散/声码器增强 [0(false)/1(true)]
void* _VocoderModel; //声码器模型 Diffusion模型必须设定该项目
void* _ShallowDiffusionModel; //扩散模型 浅扩散必需设置为扩散模型地址
INT32 ShallowDiffusionUseSrcAudio; //浅扩散模型是否使用原始音频 [0(false)/1(true)]
INT32 VocoderHopSize; //声码器HopSize [ Hop ]
INT32 VocoderMelBins; //声码器MelBins [ Bins ]
INT32 VocoderSamplingRate; //声码器采样率 [ SR ]
INT64 ShallowDiffuisonSpeaker; //浅扩散中Vits模型输入的角色ID [ 0 ~ NS ]
};

struct DiffusionSvcPaths
Expand Down Expand Up @@ -276,7 +289,15 @@ extern "C" {
Int16Vector _Output //std::vector<int16_t> By "LibSvcAllocateAudio()"
);

LibSvcApi INT32 LibSvcShallowDiffusionInference(
LibSvcApi INT32 LibSvcInferPCMData(
SvcModel _Model, //SingingVoiceConversion Model
UINT32 _T,
CInt16Vector _PCMData,
const void* _InferParams, //Ptr Of LibSvcParams
Int16Vector _Output //std::vector<int16_t> By "LibSvcAllocateAudio()"
);

LibSvcApi LibSvcDeprecated INT32 LibSvcShallowDiffusionInference(
SvcModel _Model, //SingingVoiceConversion Model
CInt16Vector _16KAudioHubert, //SamplingRate Must Be 16000
MelType _Mel, //Mel By "LibSvcAllocateMel()"
Expand All @@ -289,7 +310,7 @@ extern "C" {
Int16Vector _Output //std::vector<int16_t> By "LibSvcAllocateAudio()"
);

LibSvcApi INT32 LibSvcVocoderEnhance(
LibSvcApi LibSvcDeprecated INT32 LibSvcVocoderEnhance(
VocoderModel _Model, //Vocoder Model
MelType _Mel, //Mel By "LibSvcAllocateMel()"
CFloatVector _F0,
Expand Down
62 changes: 9 additions & 53 deletions libsvc/Api/header/readme.md → libsvc/Api/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,10 @@ void func(){

//声码器增强
{
//推理出一个基础结果
_Params.VocoderHopSize = 512;
_Params.VocoderMelBins = 128;
_Params.VocoderSamplingRate = 44100;
_Params._VocoderModel = _Vocoder;
LibSvcInferSlice(
_Model, //模型
0, //模型类型
Expand All @@ -208,32 +211,15 @@ void func(){
&_Process, //当前进度
_OutPutAudio //输出
);

/*
此处自行将_OutPutAudio重采样至声码器的采样率,或是保证满足以下函数的要求
*/

//短时傅里叶变换,并将其变换到Mel空间。注意:至少要保证该函数参数中(_SamplingRate / _HopSize)与声码器参数的(_SamplingRate / _HopSize)相等
LibSvcStft(
_OutPutAudio, //输入音频
44100, //声码器采样率
512, //STFT HopSize(声码器的HopSize)
128, //Mel Bins(必须为声码器的MelBins)
_Mel //输出的Mel
);

LibSvcVocoderEnhance(
_Vocoder, //声码器模型
_Mel, //上一步输出的Mel
_F0, //该切片的F0数据(必须为同一切片的数据)
128, //Mel Bins(必须为声码器的MelBins)
_OutPutAudio //输出
);
}

//浅扩散推理
{
//用Vits推理出一个基础结果
_Params.VocoderHopSize = 512;
_Params.VocoderMelBins = 128;
_Params.VocoderSamplingRate = 44100;
_Params._VocoderModel = _Vocoder;
_Params._ShallowDiffusionModel = nullptr; //改为你的Diffusion模型
LibSvcInferSlice(
_Model, //模型
0, //模型类型
Expand All @@ -242,36 +228,6 @@ void func(){
&_Process, //当前进度
_OutPutAudio //输出
);

/*
此处自行将_OutPutAudio重采样至Diffusion模型的采样率,或是保证满足以下函数的要求
*/

//短时傅里叶变换,并将其变换到Mel空间。注意:至少要保证该函数参数中(_SamplingRate / _HopSize)与Diffusion模型的(_SamplingRate / _HopSize)相等
LibSvcStft(
_OutPutAudio, //输入音频
44100, //Diffusion模型采样率
512, //STFT HopSize(Diffusion模型的HopSize)
128, //Mel Bins(必须为Diffusion模型的MelBins)
_Mel //输出的Mel
);

/*
此处自行将_OutPutAudio重采样至16000采样率
*/

LibSvcShallowDiffusionInference(
_Model, //此处的模型必须为Diffusion模型,写教程的时候为了方便我写成了同一个
_OutPutAudio, //16K采样率的输入音频
_Mel, //上一步得到的Mel
_F0, //该切片的F0
_Volume, //该切片的音量
_Speaker, //该切片的的角色
LibSvcGetSrcLength(_SingleSlice), //该切片的原始数据大小
&_Params, //推理参数
&_Process, //当前进度
&_OutPutAudio //输出
);
}

//释放模型,第一个参数为类型
Expand Down
120 changes: 115 additions & 5 deletions libsvc/Api/src/NativeApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,14 @@ void InitLibSvcParams(LibSvcParams* _Input)
_Input->Sampler = nullptr; //Diffusion采样器
_Input->ReflowSampler = nullptr; //Reflow采样器
_Input->F0Method = nullptr; //F0提取算法
_Input->UseShallowDiffusion = false; //使用浅扩散
_Input->UseShallowDiffusionOrEnhancer = false; //使用浅扩散
_Input->_VocoderModel = nullptr;
_Input->_ShallowDiffusionModel = nullptr;
_Input->ShallowDiffusionUseSrcAudio = 1;
_Input->VocoderHopSize = 512;
_Input->VocoderMelBins = 128;
_Input->VocoderSamplingRate = 44100;
_Input->ShallowDiffuisonSpeaker = 0;
}

void InitLibSvcSlicerSettings(LibSvcSlicerSettings* _Input)
Expand Down Expand Up @@ -564,8 +570,14 @@ INT32 LibSvcInferSlice(
LibSvcNullStrCheck(InpParam.Sampler),
LibSvcNullStrCheck(InpParam.ReflowSampler),
LibSvcNullStrCheck(InpParam.F0Method),
(bool)InpParam.UseShallowDiffusion,
InpParam._VocoderModel
(bool)InpParam.UseShallowDiffusionOrEnhancer,
InpParam._VocoderModel,
InpParam._ShallowDiffusionModel,
(bool)InpParam.ShallowDiffusionUseSrcAudio,
InpParam.VocoderHopSize,
InpParam.VocoderMelBins,
InpParam.VocoderSamplingRate,
InpParam.ShallowDiffuisonSpeaker
};

try
Expand All @@ -589,6 +601,98 @@ INT32 LibSvcInferSlice(
return 0;
}

INT32 LibSvcInferPCMData(
SvcModel _Model, //SingingVoiceConversion Model
UINT32 _T,
CInt16Vector _PCMData,
const void* _InferParams, //Ptr Of LibSvcParams
Int16Vector _Output //std::vector<int16_t> By "LibSvcAllocateAudio()"
)
{
if (!_Model)
{
RaiseError(L"_Model Could Not Be Null!");
return 1;
}

if (!_PCMData)
{
RaiseError(L"_PCMData Could Not Be Null!");
return 1;
}

if (!_InferParams)
{
RaiseError(L"_InferParams Could Not Be Null!");
return 1;
}

if (!_Output)
{
RaiseError(L"_Output Could Not Be Null!");
return 1;
}

const auto& InpParam = *(const LibSvcParams*)(_InferParams);

if (!InpParam._VocoderModel && _T == 1)
{
RaiseError(L"_VocoderModel Could Not Be Null!");
return 1;
}

const Params Param
{
InpParam.NoiseScale,
InpParam.Seed,
InpParam.SpeakerId,
InpParam.SrcSamplingRate,
InpParam.SpkCount,
InpParam.IndexRate,
InpParam.ClusterRate,
InpParam.DDSPNoiseScale,
InpParam.Keys,
InpParam.MeanWindowLength,
InpParam.Pndm,
InpParam.Step,
InpParam.TBegin,
InpParam.TEnd,
LibSvcNullStrCheck(InpParam.Sampler),
LibSvcNullStrCheck(InpParam.ReflowSampler),
LibSvcNullStrCheck(InpParam.F0Method),
(bool)InpParam.UseShallowDiffusionOrEnhancer,
InpParam._VocoderModel,
InpParam._ShallowDiffusionModel,
(bool)InpParam.ShallowDiffusionUseSrcAudio,
InpParam.VocoderHopSize,
InpParam.VocoderMelBins,
InpParam.VocoderSamplingRate,
InpParam.ShallowDiffuisonSpeaker
};

auto& InputData = *(const AudioContainer*)(_PCMData);

try
{
if (_T == 0)
*(AudioContainer*)(_Output) = ((VitsSvc*)(_Model))->InferPCMData(InputData, (long)InputData.size(), Param);
else if (_T == 1)
*(AudioContainer*)(_Output) = ((UnionSvc*)(_Model))->InferPCMData(InputData, (long)InputData.size(), Param);
else
{
RaiseError(L"UnSupported Model Type!");
return 1;
}
}
catch (std::exception& e)
{
RaiseError(to_wide_string(e.what()));
return 1;
}

return 0;
}

INT32 LibSvcShallowDiffusionInference(
void* _Model,
const void* _16KAudioHubert,
Expand Down Expand Up @@ -683,8 +787,14 @@ INT32 LibSvcShallowDiffusionInference(
LibSvcNullStrCheck(InpParam.Sampler),
LibSvcNullStrCheck(InpParam.ReflowSampler),
LibSvcNullStrCheck(InpParam.F0Method),
(bool)InpParam.UseShallowDiffusion,
InpParam._VocoderModel
(bool)InpParam.UseShallowDiffusionOrEnhancer,
InpParam._VocoderModel,
InpParam._ShallowDiffusionModel,
(bool)InpParam.ShallowDiffusionUseSrcAudio,
InpParam.VocoderHopSize,
InpParam.VocoderMelBins,
InpParam.VocoderSamplingRate,
InpParam.ShallowDiffuisonSpeaker
};

auto _NormalizedAudio = InferTools::InterpResample(
Expand Down
44 changes: 8 additions & 36 deletions libsvc/Modules/header/Models/MoeVSProject.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,40 +97,6 @@ namespace MoeVSProjectSpace
}
};

struct MoeVSAudioSliceRef
{
const std::vector<int16_t>& Audio;
const std::vector<float>& F0;
const std::vector<float>& Volume;
const std::vector<std::vector<float>>& Speaker;
bool IsNotMute;
long OrgLen;
const std::wstring& Path;
size_t Slice = 0;
void* Mel = nullptr;
MoeVSAudioSliceRef(
const std::vector<int16_t>& audio,
const std::vector<float>& f0,
const std::vector<float>& volume,
const std::vector<std::vector<float>>& speaker,
bool isnotmute,
long orglen,
const std::wstring& path,
size_t sli,
void* mel_tensor_ptr = nullptr
) :
Audio(audio),
F0(f0),
Volume(volume),
Speaker(speaker),
IsNotMute(isnotmute),
OrgLen(orglen),
Path(path),
Slice(sli),
Mel(mel_tensor_ptr)
{}
};

struct MoeVSParams
{
//通用
Expand All @@ -146,15 +112,21 @@ namespace MoeVSProjectSpace
float DDSPNoiseScale = 0.8f; //DDSP噪声修正因子 0-10
float Keys = 0.f; //升降调 -64-64
size_t MeanWindowLength = 2; //均值滤波器窗口大小 1-20
size_t Pndm = 100; //Diffusion加速倍数 1-200
size_t Step = 1000; //Diffusion总步数 1-1000
size_t Pndm = 1; //Diffusion加速倍数 1-200
size_t Step = 100; //Diffusion总步数 1-1000
float TBegin = 0.f;
float TEnd = 1.f;
std::wstring Sampler = L"Pndm"; //Diffusion采样器
std::wstring ReflowSampler = L"Eular"; //Reflow采样器
std::wstring F0Method = L"Dio"; //F0提取算法
bool UseShallowDiffusion = false; //使用浅扩散
void* _VocoderModel = nullptr;
void* _ShallowDiffusionModel = nullptr;
bool ShallowDiffusionUseSrcAudio = true;
int VocoderHopSize = 512;
int VocoderMelBins = 128;
int VocoderSamplingRate = 44100;
int64_t ShallowDiffuisonSpeaker = 0;

//SVCRTInfer
int64_t RTSampleSize = 44100;
Expand Down
Loading

0 comments on commit 7597c82

Please sign in to comment.