From 6ecf94489fec708dcb7db28656f2dc4c31ad3d23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BD=E5=8F=B6=20=E8=97=A4=E5=8E=9F?=
 <1751842477@qq.com>
Date: Sat, 11 Nov 2023 18:26:10 +0800
Subject: [PATCH] =?UTF-8?q?=E5=86=8D=E6=AC=A1=E6=94=AF=E6=8C=81VITS=20?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=AF=B9BertVits=E7=9A=84=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Lib/MJson/MJson.h                             |  17 +-
 .../LibDLVoiceCodec/base.h                    |  52 +-
 .../LibDLVoiceCodec/operator.cpp              |   7 +
 .../LibDLVoiceCodec/operator.h                |  27 +
 .../LibDLVoiceCodec/value.cpp                 | 216 +++++-
 .../LibDLVoiceCodec/value.h                   | 160 +++-
 .../BaseF0Extractor/BaseF0Extractor.hpp       |   8 +-
 .../DioF0Extractor/DioF0Extractor.cpp         |   4 +-
 .../DioF0Extractor/DioF0Extractor.hpp         |   4 +-
 .../F0Extractor/F0ExtractorManager.cpp        |   4 +-
 .../F0Extractor/F0ExtractorManager.hpp        |   4 +-
 .../HarvestF0Extractor/HarvestF0Extractor.cpp |   4 +-
 .../HarvestF0Extractor/HarvestF0Extractor.hpp |   4 +-
 .../NetF0Predictors/NetF0Predictors.cpp       |   4 +-
 .../NetF0Predictors/NetF0Predictors.hpp       |   4 +-
 .../Modules/InferTools/G2P/MoeVSG2P.cpp       | 644 ++++++++++++++++
 .../Modules/InferTools/G2P/MoeVSG2P.hpp       | 254 +++++++
 .../Modules/Models/header/DiffSvc.hpp         |  14 +
 .../Modules/Models/header/ModelBase.hpp       |   6 +-
 .../Modules/Models/header/MoeVSProject.hpp    |  64 +-
 .../Modules/Models/header/TTS.hpp             | 151 ++++
 .../Modules/Models/header/Tacotron.hpp        |  39 +
 .../Modules/Models/header/Vits.hpp            |  86 +++
 .../Modules/Models/header/VitsSvc.hpp         |   4 +
 .../Modules/Models/src/DiffSvc.cpp            | 127 ++++
 .../Modules/Models/src/ModelBase.cpp          |   4 +-
 .../Modules/Models/src/TTS.cpp                | 321 ++++++++
 .../Modules/Models/src/Vits.cpp               | 699 ++++++++++++++++++
 .../Modules/Models/src/VitsSvc.cpp            | 142 ++++
 .../Modules/Modules.cpp                       |   7 +
 .../Modules/Modules.hpp                       |   1 +
 .../MoeVoiceStudioSvc - Core - Cmd.cpp        | 388 +---------
 .../MoeVoiceStudioSvc - Core - Cmd.vcxproj    |  13 +
 ...oiceStudioSvc - Core - Cmd.vcxproj.filters |  93 ++-
 34 files changed, 3128 insertions(+), 448 deletions(-)
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/operator.cpp
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/operator.h
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/G2P/MoeVSG2P.cpp
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/G2P/MoeVSG2P.hpp
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/TTS.hpp
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/Tacotron.hpp
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/Vits.hpp
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/TTS.cpp
 create mode 100644 MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/Vits.cpp

diff --git a/Lib/MJson/MJson.h b/Lib/MJson/MJson.h
index 4e673b4..fd2497f 100644
--- a/Lib/MJson/MJson.h
+++ b/Lib/MJson/MJson.h
@@ -130,7 +130,8 @@ class MJsonValue
 	{
 		if (!IsArray() && !IsString())
 			return true;
-		const auto _max = yyjson_arr_size(_Ptr);
+		auto _max = yyjson_arr_size(_Ptr);
+		if (IsString()) _max = yyjson_get_len(_Ptr);
 		return !_max;
 	}
 	[[nodiscard]] size_t GetMemberCount() const
@@ -148,6 +149,10 @@ class MJsonValue
 		}
 		return ret;
 	}
+	[[nodiscard]] bool HasMember(const std::string& _key) const
+	{
+		return yyjson_obj_get(_Ptr, _key.c_str());
+	}
 private:
 	yyjson_val* _Ptr = nullptr;
 };
@@ -163,6 +168,16 @@ class MJson
 			throw std::exception("Json Parse Error !");
 		root = yyjson_doc_get_root(_document);
 	}
+	MJson(const std::string& _data, bool _read_from_string)
+	{
+		if (_read_from_string)
+			_document = yyjson_read(_data.c_str(), _data.length(), YYJSON_READ_NOFLAG);
+		else
+			_document = yyjson_read_file(_data.c_str(), YYJSON_READ_NOFLAG, nullptr, nullptr);
+		if (!_document)
+			throw std::exception("Json Parse Error !");
+		root = yyjson_doc_get_root(_document);
+	}
 	~MJson()
 	{
 		if(_document)
diff --git a/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/base.h b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/base.h
index 0fd95b4..9abea1f 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/base.h	
+++ b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/base.h	
@@ -3,6 +3,7 @@
 #include <string>
 #include <cassert>
 #include <iostream>
+#include <unordered_map>
 #define LibDLVoiceCodecBegin namespace libdlvcodec {
 #define LibDLVoiceCodecEnd }
 #define LIBDVCND [[nodiscard]]
@@ -24,6 +25,12 @@ using uint16 = uint16_t;
 using uint32 = uint32_t;
 using uint64 = uint64_t;
 
+class TensorView;
+class Tensor;
+
+const std::unordered_map<std::string, size_t> __Dtype {{"int8", 1}, { "int16", 2 }, { "int32", 4 }, { "int64", 8 },
+	{ "float8", 1 }, { "float16", 2 }, { "bfloat16", 2 }, { "float32", 4 }, { "float64", 8 }, { "bool", 1 } };
+
 template<typename T>
 class BaseAllocator
 {
@@ -54,12 +61,16 @@ class MResource
 	{
 		data_ = allocator_.allocate(_Count * 2);
 		size_ = _Count;
+		this_ = data_ + _Count;
+		end_ = data_ + _Count * 2;
 	}
 
 	MResource(size_t _Count, Type _Value)
 	{
 		data_ = allocator_.allocate(_Count * 2);
 		size_ = _Count;
+		this_ = data_ + _Count;
+		end_ = data_ + _Count * 2;
 		auto _ptr = data_;
 		const auto _end = data_ + size_;
 		while (_ptr != _end)
@@ -73,12 +84,16 @@ class MResource
 	{
 		data_ = _Ptr;
 		size_ = _Size;
+		this_ = data_ + _Size;
+		end_ = data_ + _Size;
 	}
 
 	MResource(const MResource& _Left)
 	{
 		size_ = _Left.size_;
-		data_ = allocator_.allocate(_Left.size_);
+		data_ = allocator_.allocate(_Left.capacity());
+		this_ = data_ + size_;
+		end_ = data_ + _Left.capacity();
 		auto _ptr = data_, _ptrl = _Left.data_;
 		const auto _end = data_ + size_;
 		while (_ptr != _end)
@@ -93,6 +108,8 @@ class MResource
 	{
 		size_ = _Right.size_;
 		data_ = _Right.data_;
+		this_ = _Right.this_;
+		end_ = _Right.end_;
 		_Right.size_ = 0ull;
 		_Right.data_ = nullptr;
 	}
@@ -118,7 +135,7 @@ class MResource
 
 	LIBDVCND ptr_t end() const
 	{
-		return data_ + size_;
+		return this_;
 	}
 
 	ptr_t release()
@@ -134,12 +151,37 @@ class MResource
 		return *(data_ + _Index);
 	}
 
+	template<typename __Ty>
+	reference at(size_t _Index) const
+	{
+		assert(_Index * sizeof(__Ty) < size_);
+		return *((__Ty*)data_ + _Index);
+	}
+
+	reference at(size_t _Index) const
+	{
+		assert(_Index < size_);
+		return *(data_ + _Index);
+	}
+
+	LIBDVCND size_t size() const
+	{
+		return size_;
+	}
+
+	LIBDVCND size_t capacity() const
+	{
+		return end_ - data_;
+	}
+
 	MResource& operator=(const MResource& _Left)
 	{
 		if (&_Left == this)
 			return *this;
 		size_ = _Left.size_;
-		data_ = allocator_.allocate(_Left.size_);
+		data_ = allocator_.allocate(_Left.capacity());
+		this_ = data_ + size_;
+		end_ = data_ + _Left.capacity();
 		auto _ptr = data_, _ptrl = _Left.data_;
 		const auto _end = data_ + size_;
 		while (_ptr != _end)
@@ -155,12 +197,16 @@ class MResource
 	{
 		size_ = _Right.size_;
 		data_ = _Right.data_;
+		this_ = _Right.this_;
+		end_ = _Right.end_;
 		_Right.size_ = 0ull;
 		_Right.data_ = nullptr;
 		return *this;
 	}
 protected:
 	ptr_t data_ = nullptr;
+	ptr_t this_ = nullptr;
+	ptr_t end_ = nullptr;
 	size_t size_ = 0ull;
 	Allocator allocator_;
 };
diff --git a/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/operator.cpp b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/operator.cpp
new file mode 100644
index 0000000..4e4201d
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/operator.cpp	
@@ -0,0 +1,7 @@
+#include "operator.h"
+#include "value.h"
+#include <cblas.h>
+
+LibDLVoiceCodecBegin
+
+LibDLVoiceCodecEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/operator.h b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/operator.h
new file mode 100644
index 0000000..00c7441
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/operator.h	
@@ -0,0 +1,27 @@
+#pragma once
+#include "base.h"
+
+LibDLVoiceCodecBegin
+Tensor equal(const Tensor& _A, const Tensor& _B);
+Tensor add(const Tensor& _A, const Tensor& _B);
+Tensor sub(const Tensor& _A, const Tensor& _B);
+Tensor mul(const Tensor& _A, const Tensor& _B);
+Tensor div(const Tensor& _A, const Tensor& _B);
+void selfAdd(Tensor& _Self, const Tensor& _O);
+void selfSub(Tensor& _Self, const Tensor& _O);
+void selfMul(Tensor& _Self, const Tensor& _O);
+void selfDiv(Tensor& _Self, const Tensor& _O);
+Tensor matmul(const Tensor& _A, const Tensor& _B);
+Tensor conv1d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
+	int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1);
+Tensor conv2d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
+	int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1);
+Tensor conv3d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
+	int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1);
+Tensor conv_transpose1d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
+	int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1);
+Tensor conv_transpose2d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
+	int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1);
+Tensor conv_transpose3d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
+	int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1);
+LibDLVoiceCodecEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/value.cpp b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/value.cpp
index 5449b70..05c6d5f 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/value.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/value.cpp	
@@ -1,7 +1,6 @@
 #include "value.h"
 
 LibDLVoiceCodecBegin
-
 Value& Value::load(const std::wstring& _Path)
 {
 	FileWrapper file;
@@ -86,7 +85,7 @@ void Value::saveData(FileWrapper& _File)
 	LibDLVoiceCodecThrow("Not implemented error!");
 }
 
-void Tensor::loadData(WeightDict& _Dict)
+void TensorData::loadData(WeightDict& _Dict)
 {
 	const auto res = _Dict.find(RegName_);
 	if (res != _Dict.end())
@@ -95,17 +94,103 @@ void Tensor::loadData(WeightDict& _Dict)
 		size_t TotalSize = 1;
 		for (const auto i : Shape_)
 			TotalSize *= i;
-		if (TotalSize * sizeof(DType) != res->second.Size)
+		if (TotalSize * __Dtype.at(Type_) != res->second.Size)
 			LibDLVoiceCodecThrow("Expected size does not match actual size!");
 		Data_ = std::move(res->second.Data);
+		DataPtr_ = Data_.data();
 	}
 }
 
-void Tensor::saveData(FileWrapper& _File)
+void TensorData::saveData(FileWrapper& _File)
 {
 
 }
 
+TensorView TensorData::operator[](int64_t index) const
+{
+	if (index < 0)
+	{
+		if (index < -Shape_[0])
+			LibDLVoiceCodecThrow("Index Out Of Range");
+		index += Shape_[0];
+		std::vector<int64> NewShape{Shape_.begin() + 1, Shape_.end()};
+		if (NewShape.empty())
+			NewShape.emplace_back(1);
+		return { std::move(NewShape)  ,DataPtr_ + index * (step() * __Dtype.at(Type_)) };
+	}
+	if (index > Shape_[0])
+		LibDLVoiceCodecThrow("Index Out Of Range");
+	std::vector<int64> NewShape{Shape_.begin() + 1, Shape_.end()};
+	if (NewShape.empty())
+		NewShape.emplace_back(1);
+	return { std::move(NewShape)  ,DataPtr_ + index * (step() * __Dtype.at(Type_)) };
+}
+
+template <typename _TypeName>
+TensorData& TensorData::operator=(const _TypeName& _Val)
+{
+	assert(sizeof(_TypeName) == __Dtype.at(Type_));
+	if(Type_ == "int8")
+	{
+		auto it = begin<int8>();
+		const auto en = end<int8>();
+		while (it != en)
+			*(it++) = (int8)_Val;
+	}
+	else if(Type_ == "int16")
+	{
+		auto it = begin<int16>();
+		const auto en = end<int16>();
+		while (it != en)
+			*(it++) = (int16)_Val;
+	}
+	else if (Type_ == "int32")
+	{
+		auto it = begin<int32>();
+		const auto en = end<int32>();
+		while (it != en)
+			*(it++) = (int32)_Val;
+	}
+	else if (Type_ == "int64")
+	{
+		auto it = begin<int64>();
+		const auto en = end<int64>();
+		while (it != en)
+			*(it++) = (int64)_Val;
+	}
+	else if (Type_ == "float8")
+	{
+	}
+	else if (Type_ == "float16")
+	{
+	}
+	else if (Type_ == "bfloat16")
+	{
+	}
+	else if (Type_ == "float32")
+	{
+		auto it = begin<float32>();
+		const auto en = end<float32>();
+		while (it != en)
+			*(it++) = (float32)_Val;
+	}
+	else if (Type_ == "float64")
+	{
+		auto it = begin<float64>();
+		const auto en = end<float64>();
+		while (it != en)
+			*(it++) = (float64)_Val;
+	}
+	else
+	{
+		auto it = begin<bool>();
+		const auto en = end<bool>();
+		while (it != en)
+			*(it++) = (bool)_Val;
+	}
+	return *this;
+}
+
 void Module::loadData(WeightDict& _Dict)
 {
 	for(const auto& it : Layers_)
@@ -122,4 +207,127 @@ void Module::saveData(FileWrapper& _File)
 	}
 }
 
+Tensor::Tensor(const std::initializer_list<int64_t>& _Shape, const std::string& _Dtype, const std::string& _Name, bool _TensorLayer)
+{
+	if (__Dtype.find(_Dtype) == __Dtype.end())
+		LibDLVoiceCodecThrow("DType Not Recognized");
+	Shape_ = _Shape;
+	size_t TotalSize = 1;
+	for (const auto i : Shape_)
+		TotalSize *= i;
+	Data_ = MResource<byte>(__Dtype.at(_Dtype) * TotalSize);
+	RegName_ = _Name;
+	TensorLayer_ = _TensorLayer;
+	DataPtr_ = Data_.data();
+	Type_ = _Dtype;
+}
+
+Tensor::Tensor(const std::vector<int64_t>& _Shape, const std::string& _Dtype, const std::string& _Name, bool _TensorLayer)
+{
+	if (__Dtype.find(_Dtype) == __Dtype.end())
+		LibDLVoiceCodecThrow("DType Not Recognized");
+	Shape_ = _Shape;
+	size_t TotalSize = 1;
+	for (const auto i : Shape_)
+		TotalSize *= i;
+	Data_ = MResource<byte>(__Dtype.at(_Dtype) * TotalSize);
+	RegName_ = _Name;
+	TensorLayer_ = _TensorLayer;
+	DataPtr_ = Data_.data();
+	Type_ = _Dtype;
+}
+
+Tensor::Tensor(const Tensor& _Left)
+{
+	Shape_ = _Left.Shape_;
+	size_t TotalSize = 1;
+	for (const auto i : Shape_)
+		TotalSize *= i;
+	Data_ = MResource<byte>(__Dtype.at(_Left.Type_) * TotalSize);
+	RegName_ = _Left.RegName_;
+	TensorLayer_ = _Left.TensorLayer_;
+	DataPtr_ = Data_.data();
+	Type_ = _Left.Type_;
+}
+
+Tensor::Tensor(Tensor&& _Right) noexcept
+{
+	Shape_ = _Right.Shape_;
+	Data_ = std::move(_Right.Data_);
+	DataPtr_ = Data_.data();
+	TensorLayer_ = _Right.TensorLayer_;
+	Type_ = _Right.Type_;
+	RegName_ = _Right.RegName_;
+}
+
+Tensor& Tensor::operator=(const Tensor& _Left)
+{
+	if (&_Left == this)
+		return *this;
+	Shape_ = _Left.Shape_;
+	size_t TotalSize = 1;
+	for (const auto i : Shape_)
+		TotalSize *= i;
+	Data_ = MResource<byte>(__Dtype.at(_Left.Type_) * TotalSize);
+	RegName_ = _Left.RegName_;
+	TensorLayer_ = _Left.TensorLayer_;
+	DataPtr_ = Data_.data();
+	Type_ = _Left.Type_;
+	return *this;
+}
+
+Tensor& Tensor::operator=(Tensor&& _Right) noexcept
+{
+	Shape_ = _Right.Shape_;
+	Data_ = std::move(_Right.Data_);
+	DataPtr_ = Data_.data();
+	TensorLayer_ = _Right.TensorLayer_;
+	Type_ = _Right.Type_;
+	RegName_ = _Right.RegName_;
+	return *this;
+}
+
+Tensor Tensor::zeros(const std::initializer_list<int64_t>& _Shape, const std::string& _Dtype, const std::string& _Name, bool _TensorLayer)
+{
+	Tensor Output{ _Shape ,_Dtype, _Name, _TensorLayer };
+	memset(Output.Data_.data(), 0, Output.Data_.size());
+	return Output;
+}
+
+Tensor Tensor::zeros_like(const Tensor& _O, bool _TensorLayer)
+{
+	Tensor Output{ _O.shape() ,_O.dtype(), _O.RegName_, _TensorLayer };
+	memset(Output.Data_.data(), 0, Output.Data_.size());
+	return Output;
+}
+
+Tensor Tensor::ones(const std::initializer_list<int64_t>& _Shape, const std::string& _Dtype, const std::string& _Name, bool _TensorLayer)
+{
+	return {};
+}
+
+Tensor Tensor::ones_like(const Tensor& _O, bool _TensorLayer)
+{
+	return {};
+}
+
+Tensor Tensor::rand(const std::initializer_list<int64_t>& _Shape, int _Seed, const std::string& _Dtype, const std::string& _Name, bool _TensorLayer)
+{
+	return {};
+}
+
+Tensor Tensor::rand_like(const Tensor& _O, int _Seed, bool _TensorLayer)
+{
+	return {};
+}
+
+Tensor Tensor::randn(const std::initializer_list<int64_t>& _Shape, int _Seed, const std::string& _Dtype, const std::string& _Name, bool _TensorLayer)
+{
+	return {};
+}
+
+Tensor Tensor::randn_like(const Tensor& _O, int _Seed, bool _TensorLayer)
+{
+	return {};
+}
 LibDLVoiceCodecEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/value.h b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/value.h
index a2a9775..72b27f2 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/value.h	
+++ b/MoeVoiceStudioSvc - Core - Cmd/LibDLVoiceCodec/value.h	
@@ -1,5 +1,4 @@
 #pragma once
-#include <map>
 #include <vector>
 #include "base.h"
 
@@ -19,8 +18,9 @@ class Value
 {
 public:
 	Value() = default;
+	Value(const Value& _Left) = delete;
 	virtual ~Value() = default;
-	using WeightDict = std::map<std::string, WeightData>;
+	using WeightDict = std::unordered_map<std::string, WeightData>;
 
 protected:
 	std::string RegName_;
@@ -46,32 +46,168 @@ class Module : public Value
 		else
 			RegName_ = _Name;
 	}
-
+	~Module() override = default;
 private:
-	std::map<std::string, Value*> Layers_;
+	std::unordered_map<std::string, Value*> Layers_;
 
 public:
 	void loadData(WeightDict& _Dict) override;
 	void saveData(FileWrapper& _File) override;
 };
 
-class Tensor : Value
+class TensorData : public Value
 {
 public:
-	using DType = float;
-	Tensor(const std::string& _Name = "Tensor")
-	{
-		RegName_ = _Name;
-		TensorLayer_ = false;
-	}
+	TensorData() = default;
+	TensorData(const TensorData& _Left) = delete;
+	TensorData(TensorData&& _Right) = delete;
+	~TensorData() override = default;
 
 protected:
 	std::vector<int64_t> Shape_;
-	MResource<byte> Data_;
 	bool TensorLayer_ = false;
+	std::string Type_ = "float32";
+
 public:
 	void loadData(WeightDict& _Dict) override;
 	void saveData(FileWrapper& _File) override;
+
+protected:
+	MResource<byte> Data_;
+
+public:
+	LIBDVCND const std::string& dtype() const { return Type_; }
+	LIBDVCND const std::vector<int64_t>& shape() const { return Shape_; }
+	LIBDVCND size_t size() const {
+		if (Shape_.empty()) return 0;
+		return Shape_[0];
+	}
+	LIBDVCND size_t total_size() const {
+		if (Shape_.empty()) return 0;
+		size_t ttsize = 1;
+		for (const auto i : Shape_)
+			ttsize *= i;
+		return ttsize;
+	}
+	LIBDVCND size_t buf_size() const {
+		return total_size() * __Dtype.at(Type_);
+	}
+	LIBDVCND size_t step() const {
+		if (Shape_.empty()) return 0;
+		return total_size() / Shape_[0];
+	}
+	LIBDVCND byte* data() const { return DataPtr_; }
+	template<typename _ValueType>
+	LIBDVCND _ValueType& item()
+	{
+		assert(sizeof(_ValueType) == __Dtype.at(Type_));
+		return *(_ValueType*)(DataPtr_);
+	}
+	template<typename _ValueType>
+	LIBDVCND _ValueType* begin()
+	{
+		assert(sizeof(_ValueType) == __Dtype.at(Type_));
+		return (_ValueType*)(DataPtr_);
+	}
+	template<typename _ValueType>
+	LIBDVCND _ValueType* end()
+	{
+		assert(sizeof(_ValueType) == __Dtype.at(Type_));
+		return (_ValueType*)(DataPtr_)+total_size();
+	}
+
+protected:
+	byte* DataPtr_ = nullptr;
+
+public:
+	LIBDVCND TensorView operator[](int64_t index) const;
+	template <typename _TypeName>
+	LIBDVCND TensorData& operator=(const _TypeName& _Val);
+};
+
+class Tensor : public TensorData
+{
+public:
+	using DType = float;
+	Tensor() = default;
+	Tensor(const std::initializer_list<int64_t>& _Shape, const std::string& _Dtype = "float32", const std::string& _Name = "Tensor", bool _TensorLayer = false);
+	Tensor(const std::vector<int64_t>& _Shape, const std::string& _Dtype = "float32", const std::string& _Name = "Tensor", bool _TensorLayer = false);
+	Tensor(const Tensor& _Left);
+	Tensor(Tensor&& _Right) noexcept;
+	~Tensor() override = default;
+	Tensor& operator=(const Tensor& _Left);
+	Tensor& operator=(Tensor&& _Right) noexcept;
+
+	static Tensor zeros(const std::initializer_list<int64_t>& _Shape, const std::string& _Dtype = "float32", const std::string& _Name = "Tensor", bool _TensorLayer = false);
+	static Tensor zeros_like(const Tensor& _O, bool _TensorLayer = false);
+	static Tensor ones(const std::initializer_list<int64_t>& _Shape, const std::string& _Dtype = "float32", const std::string& _Name = "Tensor", bool _TensorLayer = false);
+	static Tensor ones_like(const Tensor& _O, bool _TensorLayer = false);
+	static Tensor rand(const std::initializer_list<int64_t>& _Shape, int _Seed = 114514, const std::string& _Dtype = "float32", const std::string& _Name = "Tensor", bool _TensorLayer = false);
+	static Tensor rand_like(const Tensor& _O, int _Seed = 114514, bool _TensorLayer = false);
+	static Tensor randn(const std::initializer_list<int64_t>& _Shape, int _Seed = 114514, const std::string& _Dtype = "float32", const std::string& _Name = "Tensor", bool _TensorLayer = false);
+	static Tensor randn_like(const Tensor& _O, int _Seed = 114514, bool _TensorLayer = false);
+};
+
+class TensorView : public TensorData
+{
+public:
+	TensorView() = default;
+	~TensorView() override = default;
+	TensorView(const Tensor& _T)
+	{
+		Shape_ = _T.shape();
+		DataPtr_ = _T.data();
+	}
+	TensorView(Tensor&& _T) = delete;
+	TensorView(const TensorView& _T)
+	{
+		Shape_ = _T.shape();
+		DataPtr_ = _T.data();
+	}
+	TensorView(TensorView&& _T) noexcept
+	{
+		Shape_ = _T.shape();
+		DataPtr_ = _T.data();
+	}
+	TensorView(const std::initializer_list<int64>& _Shape, byte* _DataPtr)
+	{
+		Shape_ = _Shape;
+		DataPtr_ = _DataPtr;
+	}
+	TensorView(const std::vector<int64>& _Shape, byte* _DataPtr)
+	{
+		Shape_ = _Shape;
+		DataPtr_ = _DataPtr;
+	}
+	TensorView(std::vector<int64>&& _Shape, byte* _DataPtr)
+	{
+		Shape_ = _Shape;
+		DataPtr_ = _DataPtr;
+	}
+	TensorView& operator=(const TensorView& _Left)
+	{
+		DataPtr_ = _Left.DataPtr_;
+		Shape_ = _Left.Shape_;
+		return *this;
+	}
+	TensorView& operator=(TensorView&& _Right) noexcept
+	{
+		DataPtr_ = _Right.DataPtr_;
+		Shape_ = _Right.Shape_;
+		return *this;
+	}
+	TensorView& operator=(const Tensor& _T)
+	{
+		Shape_ = _T.shape();
+		DataPtr_ = _T.data();
+		return *this;
+	}
+	TensorView& operator=(Tensor&& _T)
+	{
+		Shape_ = _T.shape();
+		DataPtr_ = _T.data();
+		return *this;
+	}
 };
 
 LibDLVoiceCodecEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/BaseF0Extractor/BaseF0Extractor.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/BaseF0Extractor/BaseF0Extractor.hpp
index 555b366..d4cf1d7 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/BaseF0Extractor/BaseF0Extractor.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/BaseF0Extractor/BaseF0Extractor.hpp	
@@ -23,10 +23,10 @@
 #include <cstdint>
 #include <vector>
 #include <string>
-#define MOEVSFOEXTRACTORHEADER namespace MoeVSF0Extractor{
-#define MOEVSFOEXTRACTOREND }
+#define MoeVoiceStudioF0ExtractorHeader namespace MoeVSF0Extractor{
+#define MoeVoiceStudioF0ExtractorEnd }
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 #define __NAME__MOEVS(x) std::wstring ClassName = (x)
 class BaseF0Extractor
 {
@@ -82,4 +82,4 @@ class BaseF0Extractor
 	double f0_mel_max;
 };
 #undef __NAME__MOEVS
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.cpp
index 8f1f455..b4054c6 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.cpp	
@@ -3,7 +3,7 @@
 #include "stonemask.h"
 #include "matlabfunctions.h"
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 DioF0Extractor::DioF0Extractor(int sampling_rate, int hop_size, int n_f0_bins, double max_f0, double min_f0):
 	BaseF0Extractor(sampling_rate, hop_size, n_f0_bins, max_f0, min_f0)
 {
@@ -58,4 +58,4 @@ void DioF0Extractor::compute_f0(const double* PCMData, size_t PCMLen)
     Dio(PCMData, (int)PCMLen, int(fs), &Doption, temporal_positions.data(), raw_f0.data());
     StoneMask(PCMData, (int)PCMLen, int(fs), temporal_positions.data(), raw_f0.data(), (int)f0Length, refined_f0.data());
 }
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.hpp
index 06d3ade..0f908ea 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.hpp	
@@ -22,7 +22,7 @@
 #pragma once
 #include "../BaseF0Extractor/BaseF0Extractor.hpp"
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 class DioF0Extractor : public BaseF0Extractor
 {
 public:
@@ -38,4 +38,4 @@ class DioF0Extractor : public BaseF0Extractor
 private:
 	std::vector<double> refined_f0;
 };
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/F0ExtractorManager.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/F0ExtractorManager.cpp
index 6c1510c..b0d977f 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/F0ExtractorManager.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/F0ExtractorManager.cpp	
@@ -3,7 +3,7 @@
 #include <stdexcept>
 #include "../../Logger/MoeSSLogger.hpp"
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 std::map<std::wstring, GetF0ExtractorFn> RegisteredF0Extractors;
 
 F0Extractor GetF0Extractor(const std::wstring& _name,
@@ -38,4 +38,4 @@ std::vector<std::wstring> GetF0ExtractorList()
 	return F0ExtractorsVec;
 }
 
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/F0ExtractorManager.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/F0ExtractorManager.hpp
index e409f0e..8f818c1 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/F0ExtractorManager.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/F0ExtractorManager.hpp	
@@ -23,7 +23,7 @@
 #include "BaseF0Extractor/BaseF0Extractor.hpp"
 #include <functional>
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 
 class F0Extractor
 {
@@ -81,4 +81,4 @@ F0Extractor GetF0Extractor(const std::wstring& _name,
 
 std::vector<std::wstring> GetF0ExtractorList();
 
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.cpp
index 7b4a215..17d2452 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.cpp	
@@ -3,7 +3,7 @@
 #include "harvest.h"
 #include "stonemask.h"
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 	HarvestF0Extractor::HarvestF0Extractor(int sampling_rate, int hop_size, int n_f0_bins, double max_f0, double min_f0):
 	BaseF0Extractor(sampling_rate, hop_size, n_f0_bins, max_f0, min_f0)
 {
@@ -58,4 +58,4 @@ void HarvestF0Extractor::compute_f0(const double* PCMData, size_t PCMLen)
     Harvest(PCMData, (int)PCMLen, int(fs), &Doption, temporal_positions.data(), raw_f0.data());
     StoneMask(PCMData, (int)PCMLen, int(fs), temporal_positions.data(), raw_f0.data(), (int)f0Length, refined_f0.data());
 }
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.hpp
index f14b930..e1c678d 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.hpp	
@@ -22,7 +22,7 @@
 #pragma once
 #include "../BaseF0Extractor/BaseF0Extractor.hpp"
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 class HarvestF0Extractor : public BaseF0Extractor
 {
 public:
@@ -39,4 +39,4 @@ class HarvestF0Extractor : public BaseF0Extractor
 private:
 	std::vector<double> refined_f0;
 };
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/NetF0Predictors/NetF0Predictors.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/NetF0Predictors/NetF0Predictors.cpp
index cfde173..b8c0799 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/NetF0Predictors/NetF0Predictors.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/NetF0Predictors/NetF0Predictors.cpp	
@@ -10,7 +10,7 @@
 #error
 #endif
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 
 NetF0Class::NetF0Class()
 #ifdef INITF0NETPREDICTOR
@@ -324,4 +324,4 @@ void EmptyCache()
 	MELPECORE.Destory();
 }
 
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/NetF0Predictors/NetF0Predictors.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/NetF0Predictors/NetF0Predictors.hpp
index d78444d..3e6c312 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/NetF0Predictors/NetF0Predictors.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/F0Extractor/NetF0Predictors/NetF0Predictors.hpp	
@@ -23,7 +23,7 @@
 #include "../BaseF0Extractor/BaseF0Extractor.hpp"
 #include <onnxruntime_cxx_api.h>
 
-MOEVSFOEXTRACTORHEADER
+MoeVoiceStudioF0ExtractorHeader
 
 class NetF0Class
 {
@@ -80,4 +80,4 @@ class MELPEF0Extractor : public BaseF0Extractor
 
 void EmptyCache();
 
-MOEVSFOEXTRACTOREND
\ No newline at end of file
+MoeVoiceStudioF0ExtractorEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/G2P/MoeVSG2P.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/G2P/MoeVSG2P.cpp
new file mode 100644
index 0000000..9e55309
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/G2P/MoeVSG2P.cpp	
@@ -0,0 +1,644 @@
+﻿#include "MoeVSG2P.hpp"
+#include "MJson.h"
+#include "../../StringPreprocess.hpp"
+#include <fstream>
+
+MoeVoiceStudioG2PHeader
+
+std::wregex SignRegex(L"[!@#$%^&*()_+\\-=`~,./;'\\[\\]<>?:\"{}|\\\\。？！，、；：“”‘’『』「」（）〔〕【】─…·—～《》〈〉]+");
+std::wregex WordRegex(L"[^!@#$%^&*()_+\\-=`~,./;'\\[\\]<>?:\"{}|\\\\。？！，、；：“”‘’『』「」（）〔〕【】─…·—～《》〈〉]+");
+std::wregex BlankRegex(L"[ ]+");
+std::wregex ChineseRegex(L"^[\\u4e00-\\u9fa5]{0,}$");
+std::wregex NumberRegex(L"\\d+(?:\\.?\\d+)?");
+std::wstring ChineseNumber[] = { L"零",L"一",L"二",L"三",L"四",L"五",L"六",L"七",L"八",L"九",L"十" };
+std::wstring ChineseNumberDigit[] = { L"",L"十",L"百",L"千",L"万",L"十万",L"百万",L"千万",L"亿" };
+std::wstring JapaneseNumber[] = { L"零",L"一",L"ニ",L"三",L"四",L"五",L"六",L"七",L"八",L"九",L"十" };
+std::wstring JapaneseNumberDigit[] = { L"",L"十",L"百",L"千",L"万",L"十万",L"百万",L"千万",L"億" };
+std::unordered_map<std::wstring, std::wstring> _PUNCTUATION_MAP{
+	{ L"：", L"," }, { L"；", L"," }, { L"，", L"," }, { L"。", L"." }, { L"！", L"!" }, { L"？", L"?" },
+	{ L"·", L"," }, { L"、", L"," }, { L"...", L"…" }, { L"$", L"." }, { L"“", L"'" },
+	{ L"”", L"'" }, { L"‘", L"'" }, { L"’", L"'" }, { L"（", L"'" }, { L"）", L"'" }, { L"(", L"'" },
+	{ L")", L"'" }, { L"《", L"'" }, { L"》", L"'" }, { L"【", L"'" }, { L"】", L"'" }, { L"[", L"'" },
+	{ L"]", L"'" }, { L"—", L"-" }, { L"～", L"-" }, { L"~", L"-" }, { L"「", L"'" }, { L"」", L"'" }
+};
+std::unordered_map<std::wstring, std::wstring> _ALPHASYMBOL_MAP{
+	{L"#", L"シャープ"}, { L"%", L"パーセント" }, { L"&", L"アンド" }, { L"+", L"プラス" }, { L"-", L"マイナス" },
+	{ L":", L"コロン" }, { L";", L"セミコロン" }, { L"<", L"小なり" }, { L"=", L"イコール" }, { L">", L"大なり" },
+	{ L"@", L"アット" }, { L"a", L"エー" }, { L"b", L"ビー" }, { L"c", L"シー" }, { L"d", L"ディー" }, { L"e", L"イー" },
+	{ L"f", L"エフ" }, { L"g", L"ジー" }, { L"h", L"エイチ" }, { L"i", L"アイ" }, { L"j", L"ジェー" }, { L"k", L"ケー" },
+	{ L"l", L"エル" }, { L"m", L"エム" }, { L"n", L"エヌ" }, { L"o", L"オー" }, { L"p", L"ピー" }, { L"q", L"キュー" },
+	{ L"r", L"アール" }, { L"s", L"エス" }, { L"t", L"ティー" }, { L"u", L"ユー" }, { L"v", L"ブイ" }, { L"w", L"ダブリュー" },
+	{ L"x", L"エックス" }, { L"y", L"ワイ" }, { L"z", L"ゼット" }, { L"α", L"アルファ" }, { L"β", L"ベータ" }, { L"γ", L"ガンマ" },
+	{ L"δ", L"デルタ" }, { L"ε", L"イプシロン" }, { L"ζ", L"ゼータ" }, { L"η", L"イータ" }, { L"θ", L"シータ" }, { L"ι", L"イオタ" },
+	{ L"κ", L"カッパ" }, { L"λ", L"ラムダ" }, { L"μ", L"ミュー" }, { L"ν", L"ニュー" }, { L"ξ", L"クサイ" }, { L"ο", L"オミクロン" },
+	{ L"π", L"パイ" }, { L"ρ", L"ロー" }, { L"σ", L"シグマ" }, { L"τ", L"タウ" }, { L"υ", L"ウプシロン" }, { L"φ", L"ファイ" },
+	{ L"χ", L"カイ" }, { L"ψ", L"プサイ" }, { L"ω", L"オメガ", }};
+std::vector<std::pair<std::wstring, std::wstring>> _CURRENCY_MAP{{L"\\$", L"ドル"}, { L"¥", L"円" }, { L"£", L"ポンド" }, { L"€", L"ユーロ" }};
+
+MVSCleaner DefaultCleaner;
+
+MVSCleaner* GetDefCleaner()
+{
+	return &DefaultCleaner;
+}
+
+#ifdef WIN32
+MoeVoiceStudioG2PApi::~MoeVoiceStudioG2PApi()
+{
+	unLoad();
+}
+
+MoeVoiceStudioG2PApi& MoeVoiceStudioG2PApi::operator=(MoeVoiceStudioG2PApi&& move) noexcept
+{
+	func = move.func;
+	m_hDynLib = move.m_hDynLib;
+	move.func = nullptr;
+	move.m_hDynLib = nullptr;
+	return *this;
+}
+
+bool MoeVoiceStudioG2PApi::enabled() const
+{
+	return m_hDynLib != nullptr;
+}
+
+MoeVoiceStudioG2PApi::SplitData MoeVoiceStudioG2PApi::GetSplitWords(const std::wstring& inputLen) const
+{
+	SplitData TempData;
+	if (getvocab)
+		TempData = (*(SplitData*)getvocab(inputLen.c_str()));
+	return TempData;
+}
+
+void MoeVoiceStudioG2PApi::LoadDict(const std::wstring& Path) const
+{
+	if (loaddic)
+		loaddic(Path.c_str());
+}
+
+char MoeVoiceStudioG2PApi::Load(const std::wstring& PluginName)
+{
+	func = nullptr;
+	frel = nullptr;
+	if (m_hDynLib)
+	{
+		FreeLibrary(m_hDynLib);
+		m_hDynLib = nullptr;
+	}
+	m_hDynLib = LoadLibrary((PluginName).c_str());
+	if (m_hDynLib == nullptr)
+		return -1;
+	func = reinterpret_cast<funTy>(
+		reinterpret_cast<void*>(
+			GetProcAddress(m_hDynLib, "PluginMain")
+			)
+		);
+	frel = reinterpret_cast<freTy>(
+		reinterpret_cast<void*>(
+			GetProcAddress(m_hDynLib, "Release")
+			)
+		);
+	getvocab = reinterpret_cast<vocabFn>(
+		reinterpret_cast<void*>(
+			GetProcAddress(m_hDynLib, "GetSplitData")
+			)
+		);
+	vocabrel = reinterpret_cast<freTy>(
+		reinterpret_cast<void*>(
+			GetProcAddress(m_hDynLib, "RefreshTokenizer")
+			)
+		);
+	loaddic = reinterpret_cast<loadFn>(
+		reinterpret_cast<void*>(
+			GetProcAddress(m_hDynLib, "LoadDict")
+			)
+		);
+	if (func == nullptr)
+		return 1;
+	return 0;
+}
+
+std::wstring MoeVoiceStudioG2PApi::functionAPI(const std::wstring& inputLen, const std::wstring& placeholderSymbol,
+	const std::wstring& extraInfo, int64_t languageID) const
+{
+	if (func)
+	{
+		const auto tmp = func(inputLen.c_str(), placeholderSymbol.c_str(), extraInfo.c_str(), languageID);
+		std::wstring ret = tmp;
+		return ret;
+	}
+	return inputLen;
+}
+
+void MoeVoiceStudioG2PApi::unLoad()
+{
+	if (frel)
+		frel();
+	if (vocabrel)
+		vocabrel();
+	vocabrel = nullptr;
+	getvocab = nullptr;
+	loaddic = nullptr;
+	func = nullptr;
+	frel = nullptr;
+	if (m_hDynLib)
+		FreeLibrary(m_hDynLib);
+	m_hDynLib = nullptr;
+}
+#endif
+
+void MVSDict::GetDict(const std::wstring& path)
+{
+	PlaceholderSymbol = L"|";
+	std::string phoneInfo, phoneInfoAll;
+	std::ifstream phonefile(path.c_str());
+	if (!phonefile.is_open())
+		throw std::exception("phone file not found");
+	while (std::getline(phonefile, phoneInfo))
+		phoneInfoAll += phoneInfo;
+	phonefile.close();
+	MJson PhoneJson;
+	PhoneJson.Parse(phoneInfoAll);
+	if (PhoneJson.HasParseError())
+		throw std::exception("json file error");
+	for (const auto& itr : PhoneJson.GetMemberArray())
+	{
+		std::wstring Key = to_wide_string(itr.first);
+		if (Key == L"PlaceholderSymbol")
+		{
+			if (itr.second.IsString() && itr.second.GetStringLength())
+				PlaceholderSymbol = to_wide_string(itr.second.GetString());
+			if (PlaceholderSymbol.length() > 1)
+				PlaceholderSymbol = L"|";
+			continue;
+		}
+		const auto Value = itr.second.GetArray();
+		_Dict[Key] = std::vector<std::wstring>();
+		for (const auto& it : Value)
+			_Dict[Key].push_back(to_wide_string(it.GetString()));
+	}
+}
+
+std::vector<std::wstring> MVSDict::DictReplace(const std::vector<std::wstring>& input) const
+{
+	std::vector<std::wstring> _out;
+	for (const auto& i : input)
+		if (_Dict.find(i) != _Dict.end())
+		{
+			const auto& Value = _Dict.at(i);
+			_out.insert(_out.end(), Value.begin(), Value.end());
+		}
+		else
+			_out.emplace_back(i);
+	return _out;
+}
+
+std::vector<std::wstring> MVSDict::DictReplace(const std::wstring& input, const std::wstring& tPlaceholderSymbol) const
+{
+	std::vector<std::wstring> _output;
+	auto tmp = input;
+	tmp += tPlaceholderSymbol;
+	while (!tmp.empty())
+	{
+		const size_t pos = tmp.find(tPlaceholderSymbol);
+		const auto Key = tmp.substr(0, pos);
+		tmp = tmp.substr(pos + 1);
+		if (_Dict.find(Key) != _Dict.end())
+		{
+			const auto& Value = _Dict.at(Key);
+			_output.insert(_output.end(), Value.begin(), Value.end());
+		}
+		else
+			_output.emplace_back(Key);
+	}
+	return _output;
+}
+
+std::wstring MVSDict::DictReplaceGetStr(const std::wstring& input, const std::wstring& tPlaceholderSymbol, bool usePlaceholderSymbol) const
+{
+	const auto tmp = DictReplace(input, tPlaceholderSymbol);
+	std::wstring output;
+	for (const auto& i : tmp)
+		if (usePlaceholderSymbol)
+			output += i + tPlaceholderSymbol;
+		else
+			output += i;
+	return output;
+}
+
+void Tokenizer::load(const std::wstring& _Path)
+{
+	const MJson _VocabJson(to_byte_string(_Path).c_str());
+	if (!_VocabJson.HasMember("ContinuingSubwordPrefix") ||
+		!_VocabJson.HasMember("Type") ||
+		!_VocabJson.HasMember("Vocab") ||
+		_VocabJson["ContinuingSubwordPrefix"].Empty() ||
+		_VocabJson["Type"].Empty() ||
+		!_VocabJson["ContinuingSubwordPrefix"].IsString() ||
+		!_VocabJson["Type"].IsString())
+		throw std::exception("Vocab.json Error");
+	const std::string Type = _VocabJson["Type"].GetString();
+	if (Type == "Unigram") Model = TokenizerModel::Unigram;
+	Symbol = to_wide_string(_VocabJson["ContinuingSubwordPrefix"].GetString());
+
+	if(Model == TokenizerModel::WordPiece)
+	{
+		if(_VocabJson["Vocab"].IsArray())
+		{
+			const auto _VocabArray = _VocabJson["Vocab"].GetArray();
+			int64_t Index = 0;
+			for (const auto& Object : _VocabArray)
+				Vocab[to_wide_string(Object.GetString())] = Index++;
+		}
+		else
+		{
+			const auto _VocabDict = _VocabJson["Vocab"].GetMemberArray();
+			for (const auto& Pair : _VocabDict)
+			{
+				if (Pair.second.IsInt())
+					Vocab[to_wide_string(Pair.first)] = TokenizerType(Pair.second.GetInt());
+				else if (Pair.second.IsFloat())
+					Vocab[to_wide_string(Pair.first)] = TokenizerType(Pair.second.GetFloat());
+			}
+		}
+	}
+	else
+	{
+		const auto _VocabArray = _VocabJson["Vocab"].GetArray();
+		int64_t Index = 0;
+		for (const auto& Object : _VocabArray)
+			Vocab[to_wide_string(Object.GetArray()[0].GetString())] = Index++;
+	}
+	if (_VocabJson.HasMember("UseSplit") && _VocabJson["UseSplit"].IsBool())
+		UseSplit = _VocabJson["UseSplit"].GetBool();
+}
+
+void Tokenizer::loadCleaner(const std::wstring& _Path) const
+{
+	if (Cleaner)
+		Cleaner->loadG2p(_Path);
+}
+
+void Tokenizer::loadDict(const std::wstring& _Path) const
+{
+	if (Cleaner)
+		Cleaner->loadDict(_Path);
+}
+
+std::vector<Tokenizer::TokenizerType> Tokenizer::UnigramMethod(const std::wstring& Seq, size_t MaxWordLength, TokenizerMethod Method) const
+{
+	if (Seq.empty())
+		return {};
+	//auto SeqVector = SplitString(Seq, SignRegex);
+	std::vector<TokenizerType> Tokens;
+	Tokens.emplace_back(Vocab.at(L"[CLS]"));
+	const auto UNKId = Vocab.at(L"[UNK]");
+	std::wstring SeqWord = Seq;
+	if (Method == TokenizerMethod::Left)
+	{
+		bool FirstTime = true;
+		while (!SeqWord.empty())
+		{
+			for (size_t SearchLength = min(MaxWordLength, SeqWord.length()); SearchLength > 0; --SearchLength)
+			{
+				if (FirstTime)
+				{
+					size_t SubVal = 0;
+					if (SearchLength > Symbol.length())
+						SubVal = Symbol.length();
+					const auto SearchResult = Vocab.find(Symbol + SeqWord.substr(0, SearchLength - SubVal));
+					if (SearchResult != Vocab.end())
+					{
+						Tokens.emplace_back(SearchResult->second);
+						SeqWord = SeqWord.substr(SearchLength - SubVal);
+						FirstTime = false;
+						break;
+					}
+				}
+				const auto SearchResult = Vocab.find(SeqWord.substr(0, SearchLength));
+				if (SearchResult != Vocab.end())
+				{
+					Tokens.emplace_back(SearchResult->second);
+					SeqWord = SeqWord.substr(SearchLength);
+					if (FirstTime) FirstTime = false;
+					break;
+				}
+				if (SearchLength == 1)
+				{
+					const auto SubStr = SeqWord.substr(0, SearchLength);
+					const auto SearchRes = _PUNCTUATION_MAP.find(SubStr);
+					if (SearchRes != _PUNCTUATION_MAP.end())
+					{
+						const auto SearchR = Vocab.find(SearchRes->second);
+						if (SearchR != Vocab.end())
+							Tokens.emplace_back(SearchR->second);
+						SeqWord = SeqWord.substr(1);
+						break;
+					}
+					if (Tokens.empty() || Tokens.back() != UNKId)
+						Tokens.emplace_back(UNKId);
+					SeqWord = SeqWord.substr(1);
+				}
+			}
+		}
+	}
+	else
+		throw std::exception("NotImplementedError");
+	Tokens.emplace_back(Vocab.at(L"[SEP]"));
+	return Tokens;
+}
+
+std::vector<Tokenizer::TokenizerType> Tokenizer::WordPieceMethod(const std::wstring& Seq, size_t MaxWordLength, TokenizerMethod Method) const
+{
+	if (Seq.empty())
+		return {};
+	auto SeqVector = SplitString(Seq, SignRegex);
+	std::vector<TokenizerType> Tokens;
+	Tokens.emplace_back(Vocab.at(L"[CLS]"));
+	const auto UNKId = Vocab.at(L"[UNK]");
+	if (Method == TokenizerMethod::Left)
+	{
+		for (auto& SeqWord : SeqVector)
+		{
+			bool FirstTime = true;
+			while (!SeqWord.empty())
+			{
+				if (regex_match(SeqWord.substr(0, 1), ChineseRegex))
+				{
+					const auto SearchResult = Vocab.find(SeqWord.substr(0, 1));
+					if (SearchResult != Vocab.end())
+						Tokens.emplace_back(SearchResult->second);
+					else
+						Tokens.emplace_back(UNKId);
+					SeqWord = SeqWord.substr(1);
+					continue;
+				}
+				for (size_t SearchLength = min(MaxWordLength, SeqWord.length()); SearchLength > 0; --SearchLength)
+				{
+					if (!FirstTime)
+					{
+						size_t SubVal = 0;
+						if (SearchLength > Symbol.length())
+							SubVal = Symbol.length();
+						const auto SearchResult = Vocab.find(Symbol + SeqWord.substr(0, SearchLength - SubVal));
+						if (SearchResult != Vocab.end())
+						{
+							Tokens.emplace_back(SearchResult->second);
+							SeqWord = SeqWord.substr(SearchLength - SubVal);
+							break;
+						}
+					}
+					const auto SearchResult = Vocab.find(SeqWord.substr(0, SearchLength));
+					if (SearchResult != Vocab.end())
+					{
+						Tokens.emplace_back(SearchResult->second);
+						SeqWord = SeqWord.substr(SearchLength);
+						if (FirstTime) FirstTime = false;
+						break;
+					}
+					if (SearchLength == 1)
+					{
+						const auto SubStr = SeqWord.substr(0, SearchLength);
+						const auto SearchRes = _PUNCTUATION_MAP.find(SubStr);
+						if (SearchRes != _PUNCTUATION_MAP.end())
+						{
+							const auto SearchR = Vocab.find(SearchRes->second);
+							if (SearchR != Vocab.end())
+								Tokens.emplace_back(SearchR->second);
+							SeqWord = SeqWord.substr(1);
+							break;
+						}
+						if (Tokens.empty() || Tokens.back() != UNKId)
+							Tokens.emplace_back(UNKId);
+						SeqWord = SeqWord.substr(1);
+					}
+				}
+			}
+		}
+	}
+	else
+		throw std::exception("NotImplementedError");
+	Tokens.emplace_back(Vocab.at(L"[SEP]"));
+	return Tokens;
+}
+
+std::vector<Tokenizer::TokenizerType> Tokenizer::operator()(const std::wstring& Seq, size_t MaxWordLength, TokenizerMethod Method) const
+{
+	if (Model == TokenizerModel::WordPiece)
+		return WordPieceMethod(Seq, MaxWordLength, Method);
+	return UnigramMethod(Seq, MaxWordLength, Method);
+}
+
+std::vector<std::wstring> Tokenizer::SplitString(const std::wstring& _InputRef, const std::wregex & _SignRegex)
+{
+	if (_InputRef.empty())
+		return {};
+	std::wstring InputStr = _InputRef;
+	std::vector<std::wstring> TmpStrVec, StrVec;
+	std::wsmatch MatchedSign;
+	while (std::regex_search(InputStr, MatchedSign, _SignRegex))
+	{
+		if (MatchedSign.prefix().matched)
+			TmpStrVec.push_back(MatchedSign.prefix());
+		TmpStrVec.push_back(MatchedSign.str());
+		InputStr = MatchedSign.suffix();
+	}
+	if (!InputStr.empty())
+		TmpStrVec.emplace_back(InputStr);
+	for(const auto& i : TmpStrVec)
+	{
+		std::wsregex_token_iterator TokenIter(i.begin(), i.end(), BlankRegex, -1);
+		decltype(TokenIter) TokenIterEnd;
+		for (; TokenIter != TokenIterEnd; ++TokenIter)
+			if (!TokenIter->str().empty())
+				StrVec.push_back(TokenIter->str());
+	}
+	return StrVec;
+}
+
+std::vector<std::wstring> Tokenizer::SplitWithPlugin(const std::vector<std::wstring>& _Inputs) const
+{
+	std::vector<std::wstring> SeqVec;
+	for(const auto& Seq : _Inputs)
+	{
+		const auto SplitedWords = GetCleaner().GetCleaner().GetSplitWords(Seq);
+		for (size_t i = 0; i < SplitedWords.Size; ++i)
+		{
+			auto TmpString = to_wide_string(SplitedWords.Data[i]);
+			TmpString = TmpString.substr(0, TmpString.find(L','));
+			SeqVec.emplace_back(std::move(TmpString));
+		}
+	}
+	return SeqVec;
+}
+
+std::wstring NumberToChinese(double Number)
+{
+	std::wstring StrRtn;
+	std::wstring InputStr = std::to_wstring(Number);
+	const size_t PIndex = InputStr.find(L'.');
+	std::wstring IntegerStr, FractionStr;
+	if (PIndex != std::wstring::npos)
+	{
+		IntegerStr = InputStr.substr(0, PIndex);
+		FractionStr = InputStr.substr(PIndex + 1);
+		while (!FractionStr.empty() && FractionStr.back() == L'0')
+			FractionStr.pop_back();
+	}
+	else
+		IntegerStr = std::move(InputStr);
+
+	if (IntegerStr != L"0")
+	{
+		size_t MaxIntegerStrLength = IntegerStr.length();
+		for (; MaxIntegerStrLength > 0; --MaxIntegerStrLength)
+			if (IntegerStr[MaxIntegerStrLength - 1] != L'0')
+				break;
+		if (MaxIntegerStrLength < 1)
+			MaxIntegerStrLength = 1;
+
+		const auto DigitNum = IntegerStr.length();
+		for (size_t i = 0; i < MaxIntegerStrLength; i++)
+		{
+			const auto NumberIndex = IntegerStr[i] - L'0';
+			const auto DigitIndex = DigitNum - i - 1;
+			if (0 == NumberIndex)
+			{
+				if ((i > 0 && L'0' == IntegerStr[i - 1]) || i == IntegerStr.length() - 1)
+					continue;
+				if (DigitIndex >= 4 && 0 == DigitIndex % 4)
+					StrRtn += ChineseNumberDigit[DigitIndex];
+				else
+					StrRtn += ChineseNumber[NumberIndex];
+			}
+			else
+			{
+				StrRtn += ChineseNumber[NumberIndex];
+				if (IntegerStr.length() == 2 && IntegerStr[0] == '1' && i == 0)
+					StrRtn.erase(0);
+				if (0 == DigitIndex % 4)
+					StrRtn += ChineseNumberDigit[DigitIndex];
+				else
+					StrRtn += ChineseNumberDigit[DigitIndex % 4];
+			}
+		}
+	}
+	else
+		StrRtn += L"零";
+
+	if (!FractionStr.empty())
+		StrRtn += L"点";
+	for(const auto FractionI : FractionStr)
+	{
+		const auto NumberIndex = FractionI - L'0';
+		StrRtn += ChineseNumber[NumberIndex];
+	}
+	return StrRtn;
+}
+
+std::wstring NumberToJapanese(double Number)
+{
+	std::wstring StrRtn;
+	std::wstring InputStr = std::to_wstring(Number);
+	const size_t PIndex = InputStr.find(L'.');
+	std::wstring IntegerStr, FractionStr;
+	if (PIndex != std::wstring::npos)
+	{
+		IntegerStr = InputStr.substr(0, PIndex);
+		FractionStr = InputStr.substr(PIndex + 1);
+		while (!FractionStr.empty() && FractionStr.back() == L'0')
+			FractionStr.pop_back();
+	}
+	else
+		IntegerStr = std::move(InputStr);
+
+	if (IntegerStr != L"0")
+	{
+		size_t MaxIntegerStrLength = IntegerStr.length();
+		for (; MaxIntegerStrLength > 0; --MaxIntegerStrLength)
+			if (IntegerStr[MaxIntegerStrLength - 1] != L'0')
+				break;
+		if (MaxIntegerStrLength < 1)
+			MaxIntegerStrLength = 1;
+
+		const auto DigitNum = IntegerStr.length();
+		for (size_t i = 0; i < MaxIntegerStrLength; i++)
+		{
+			const auto NumberIndex = IntegerStr[i] - L'0';
+			const auto DigitIndex = DigitNum - i - 1;
+			if (0 == NumberIndex)
+			{
+				if ((i > 0 && L'0' == IntegerStr[i - 1]) || i == IntegerStr.length() - 1)
+					continue;
+				if (DigitIndex >= 4 && 0 == DigitIndex % 4)
+					StrRtn += JapaneseNumberDigit[DigitIndex];
+				else
+					StrRtn += JapaneseNumber[NumberIndex];
+			}
+			else
+			{
+				StrRtn += JapaneseNumber[NumberIndex];
+				if (IntegerStr.length() == 2 && IntegerStr[0] == '1' && i == 0)
+					StrRtn.erase(0);
+				if (0 == DigitIndex % 4)
+					StrRtn += JapaneseNumberDigit[DigitIndex];
+				else
+					StrRtn += JapaneseNumberDigit[DigitIndex % 4];
+			}
+		}
+	}
+	else
+		StrRtn += L"零";
+
+	if (!FractionStr.empty())
+		StrRtn += L"点";
+	for (const auto FractionI : FractionStr)
+	{
+		const auto NumberIndex = FractionI - L'0';
+		StrRtn += JapaneseNumber[NumberIndex];
+	}
+	return StrRtn;
+}
+
+std::wstring ChineseNormalize(const std::wstring& _Input)
+{
+	std::wstring RtnStr;
+	const auto StrVec = Tokenizer::SplitString(_Input, NumberRegex);
+	for(const auto& Str : StrVec)
+	{
+		if (std::regex_match(Str, NumberRegex))
+			RtnStr += NumberToChinese(_wtof(Str.c_str()));
+		else
+			RtnStr += Str;
+	}
+	RtnStr = std::regex_replace(RtnStr, std::wregex(L"嗯"), L"恩");
+	RtnStr = std::regex_replace(RtnStr, std::wregex(L"呣"), L"母");
+	return RtnStr;
+}
+
+std::wstring JapaneseNormalize(const std::wstring& _Input)
+{
+	std::wstring RtnStr;
+	const auto StrVec = Tokenizer::SplitString(_Input, NumberRegex);
+	for (const auto& Str : StrVec)
+	{
+		if (std::regex_match(Str, NumberRegex))
+			RtnStr += NumberToJapanese(_wtof(Str.c_str()));
+		else
+			RtnStr += Str;
+	}
+	for (const auto& PunPair : _CURRENCY_MAP)
+		RtnStr = std::regex_replace(RtnStr, std::wregex(PunPair.first), PunPair.second);
+	return RtnStr;
+}
+
+std::wstring NormalizeText(const std::wstring& _Input, const std::string& _Language)
+{
+	if (_Language == "ZH")
+		return ChineseNormalize(_Input);
+	if (_Language == "JP")
+		return JapaneseNormalize(_Input);
+	return _Input;
+}
+
+MoeVoiceStudioG2PEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/G2P/MoeVSG2P.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/G2P/MoeVSG2P.hpp
new file mode 100644
index 0000000..76551b7
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/InferTools/G2P/MoeVSG2P.hpp	
@@ -0,0 +1,254 @@
+/**
+ * FileName: MoeVSG2P.hpp
+ * Note: MoeVoiceStudioCore G2P���ֵ䣨TTS�ã�
+ *
+ * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
+ *
+ * This file is part of MoeVoiceStudioCore library.
+ * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
+ * GNU Affero General Public License as published by the Free Software Foundation, either version 3
+ * of the License, or any later version.
+ *
+ * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License along with Foobar.
+ * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
+ *
+ * date: 2023-11-9 Create
+*/
+
+#pragma once
+#include <string>
+#ifdef _WIN32
+#ifndef UNICODE
+#define UNICODE
+#endif
+#include <Windows.h>
+#endif
+#include <map>
+#include <regex>
+#include <vector>
+#include <unordered_map>
+
+#define MoeVoiceStudioG2PHeader namespace MoeVSG2P {
+#define MoeVoiceStudioG2PEnd }
+
+MoeVoiceStudioG2PHeader
+
+class MoeVoiceStudioG2PApi
+{
+public:
+	struct SplitData
+	{
+		char** Data = nullptr;
+		size_t Size = 0;
+	};
+	using funTy = const wchar_t* (*)(const wchar_t*, const wchar_t*, const wchar_t*, int64_t);
+	using freTy = void (*)();
+	using vocabFn = void* (*)(const wchar_t*);
+	using loadFn = void (*)(const wchar_t*);
+	MoeVoiceStudioG2PApi() = default;
+	~MoeVoiceStudioG2PApi();
+	char Load(const std::wstring& PluginName);
+	void unLoad();
+	void ReleaseVoc() const
+	{
+		if (vocabrel)
+			vocabrel();
+	}
+	[[nodiscard]] SplitData GetSplitWords(const std::wstring& inputLen) const;
+	[[nodiscard]] std::wstring functionAPI(const std::wstring& inputLen, const std::wstring& placeholderSymbol,
+		const std::wstring& extraInfo, int64_t languageID) const;
+	MoeVoiceStudioG2PApi(const MoeVoiceStudioG2PApi&) = delete;
+	MoeVoiceStudioG2PApi(MoeVoiceStudioG2PApi&&) = delete;
+	MoeVoiceStudioG2PApi& operator=(MoeVoiceStudioG2PApi&& move) noexcept;
+	[[nodiscard]] bool enabled() const;
+	MoeVoiceStudioG2PApi& operator=(const MoeVoiceStudioG2PApi&) = delete;
+	void LoadDict(const std::wstring& Path) const;
+private:
+#ifdef WIN32
+	const wchar_t*(*func)(const wchar_t*, const wchar_t*, const wchar_t*, int64_t) = nullptr;
+	void (*frel)() = nullptr;
+	void* (*getvocab)(const wchar_t*) = nullptr;
+	void (*vocabrel)() = nullptr;
+	void (*loaddic)(const wchar_t*) = nullptr;
+	HINSTANCE m_hDynLib = nullptr;
+#endif
+};
+
+class MVSDict
+{
+public:
+	MVSDict() = default;
+	~MVSDict() = default;
+
+	[[nodiscard]] bool enabled() const
+	{
+		return !_Dict.empty();
+	}
+
+	void unload()
+	{
+		_Dict.clear();
+	}
+
+	[[nodiscard]] std::vector<std::wstring> DictReplace(const std::vector<std::wstring>& input) const;
+
+	[[nodiscard]] std::vector<std::wstring> DictReplace(const std::wstring& input, const std::wstring& tPlaceholderSymbol) const;
+
+	[[nodiscard]] std::wstring DictReplaceGetStr(const std::wstring& input, const std::wstring& tPlaceholderSymbol, bool usePlaceholderSymbol = true) const;
+
+	void GetDict(const std::wstring& path);
+
+	[[nodiscard]] std::wstring getPlaceholderSymbol() const
+	{
+		return PlaceholderSymbol;
+	}
+private:
+	std::map<std::wstring, std::vector<std::wstring>> _Dict;
+	std::wstring PlaceholderSymbol = L"|";
+};
+
+class MVSCleaner
+{
+public:
+	MVSCleaner() = default;
+
+	~MVSCleaner()
+	{
+		unloadDict();
+		unloadG2p();
+	}
+
+	void unloadDict()
+	{
+		_Dict.unload();
+	}
+
+	void unloadG2p()
+	{
+		_G2p.unLoad();
+	}
+
+	void loadDict(const std::wstring& _path)
+	{
+		if (_Dict.enabled())
+			unloadDict();
+		_Dict.GetDict(_path);
+	}
+
+	void loadG2p(const std::wstring& _path)
+	{
+		if (_G2p.enabled())
+			unloadG2p();
+		_G2p.Load(_path);
+	}
+
+	[[nodiscard]] bool G2pEnabled() const
+	{
+		return _G2p.enabled();
+	}
+
+	[[nodiscard]] bool DictEnabled() const
+	{
+		return _Dict.enabled();
+	}
+
+	[[nodiscard]] std::wstring G2p(const std::wstring& _text, const std::wstring& placeholderSymbol,
+		const std::wstring& extraInfo, int64_t languageID) const
+	{
+		return _G2p.functionAPI(_text, placeholderSymbol, extraInfo, languageID);
+	}
+
+	[[nodiscard]] const MoeVoiceStudioG2PApi& GetCleaner() const
+	{
+		return _G2p;
+	}
+
+	[[nodiscard]] auto DictReplace(const std::vector<std::wstring>& input) const
+	{
+		return _Dict.DictReplace(input);
+	}
+
+	[[nodiscard]] auto DictReplace(const std::wstring& input, const std::wstring& tPlaceholderSymbol) const
+	{
+		return _Dict.DictReplace(input, tPlaceholderSymbol);
+	}
+
+	[[nodiscard]] auto DictReplaceGetStr(const std::wstring& input, const std::wstring& tPlaceholderSymbol, bool usePlaceholderSymbol = true) const
+	{
+		return _Dict.DictReplaceGetStr(input, tPlaceholderSymbol, usePlaceholderSymbol);
+	}
+
+	[[nodiscard]] std::wstring getPlaceholderSymbol() const
+	{
+		return _Dict.getPlaceholderSymbol();
+	}
+
+protected:
+	MoeVoiceStudioG2PApi _G2p;
+	MVSDict _Dict;
+};
+
+class Tokenizer
+{
+public:
+	using TokenizerType = int64_t;
+	enum class TokenizerMethod
+	{
+		Left,
+		Right
+	};
+	enum class TokenizerModel
+	{
+		Unigram,
+		WordPiece
+	};
+	Tokenizer() = default;
+	Tokenizer(const std::wstring& _Path)
+	{
+		load(_Path);
+	}
+	void BondCleaner(MVSCleaner* MCleaner)
+	{
+		Cleaner = MCleaner;
+	}
+	void load(const std::wstring& _Path);
+	void loadCleaner(const std::wstring& _Path) const;
+	void loadDict(const std::wstring& _Path) const;
+	[[nodiscard]] const MVSCleaner& GetCleaner() const
+	{
+		return *Cleaner;
+	}
+	const MVSCleaner* operator->() const
+	{
+		return Cleaner;
+	}
+	[[nodiscard]] std::vector<TokenizerType> WordPieceMethod(const std::wstring& Seq, size_t MaxWordLength = 25, TokenizerMethod Method = TokenizerMethod::Left) const;
+	[[nodiscard]] std::vector<TokenizerType> UnigramMethod(const std::wstring& Seq, size_t MaxWordLength = 25, TokenizerMethod Method = TokenizerMethod::Left) const;
+	std::vector<TokenizerType> operator()(const std::wstring& Seq, size_t MaxWordLength = 25, TokenizerMethod Method = TokenizerMethod::Left) const;
+	[[nodiscard]] std::vector<std::wstring> SplitWithPlugin(const std::vector<std::wstring>& _Inputs) const;
+	static std::vector<std::wstring> SplitString(const std::wstring& _InputRef, const std::wregex& _SignRegex);
+private:
+	std::unordered_map<std::wstring, TokenizerType> Vocab;
+	std::wstring Symbol = L"##";
+	TokenizerModel Model = TokenizerModel::WordPiece;
+	MVSCleaner* Cleaner = nullptr;
+	bool UseSplit = false;
+};
+
+MVSCleaner* GetDefCleaner();
+
+std::wstring JapaneseNormalize(const std::wstring& _Input);
+
+std::wstring ChineseNormalize(const std::wstring& _Input);
+
+std::wstring NormalizeText(const std::wstring& _Input, const std::string& _Language);
+
+std::wstring NumberToChinese(double Number);
+
+std::wstring NumberToJapanese(double Number);
+
+MoeVoiceStudioG2PEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/DiffSvc.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/DiffSvc.hpp
index d370683..ac7056a 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/DiffSvc.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/DiffSvc.hpp	
@@ -20,6 +20,7 @@
 */
 
 #pragma once
+#include <map>
 #include "SVC.hpp"
 
 MoeVoiceStudioCoreHeader
@@ -31,6 +32,19 @@ class DiffusionSvc : public SingingVoiceConversion
         ExecutionProviders ExecutionProvider_ = ExecutionProviders::CPU,
         unsigned DeviceID_ = 0, unsigned ThreadCount_ = 0);
 
+    /**
+     * \brief 加载DiffSvc模型
+     * \param _PathDict 路径，Key分别为["Hubert", "Hifigan", "Encoder", "DenoiseFn", "NoisePredictor", "AfterProcess", "DiffSvc", "Naive", "Alphas"]，其中"DiffSvc"、"Naive"、"Alphas"为可选项
+     * \param _Config 配置Json
+     * \param _ProgressCallback 进度条回调函数
+     * \param ExecutionProvider_ Provider
+     * \param DeviceID_ GPU设备ID
+     * \param ThreadCount_ 线程数
+     */
+    DiffusionSvc(const std::map<std::string,std::wstring>& _PathDict, const MJson& _Config, const ProgressCallback& _ProgressCallback,
+                 ExecutionProviders ExecutionProvider_ = ExecutionProviders::CPU,
+                 unsigned DeviceID_ = 0, unsigned ThreadCount_ = 0);
+
 	~DiffusionSvc() override;
 
     void Destory();
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/ModelBase.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/ModelBase.hpp
index 2ff0464..87ceab1 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/ModelBase.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/ModelBase.hpp	
@@ -83,13 +83,13 @@ class MoeVoiceStudioModule
 
 	/**
 	 * \brief 输入路径推理
-	 * \param _Paths 路径，多个路径使用换行符隔开
+	 * \param _Datas [路径，多个路径使用换行符隔开, 推理文本]
 	 * \param _InferParams 推理参数
 	 * \param _SlicerSettings 切片机配置
 	 * \return 输出路径
 	 */
-	[[nodiscard]] virtual std::vector<std::wstring> Inference(std::wstring& _Paths,
-	                                                          const MoeVSProjectSpace::MoeVSSvcParams& _InferParams,
+	[[nodiscard]] virtual std::vector<std::wstring> Inference(std::wstring& _Datas,
+	                                                          const MoeVSProjectSpace::MoeVSParams& _InferParams,
 	                                                          const InferTools::SlicerSettings& _SlicerSettings) const;
 
 	/**
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/MoeVSProject.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/MoeVSProject.hpp
index 2d169b5..5c89834 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/MoeVSProject.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/MoeVSProject.hpp	
@@ -118,28 +118,72 @@ namespace MoeVSProjectSpace
         {}
     };
 
-	struct MoeVSSvcParams
+	struct MoeVSParams
 	{
+        //通用
+        float NoiseScale = 0.3f;                           //噪声修正因子          0-10
+        int64_t Seed = 52468;                              //种子
+        int64_t SpeakerId = 0;                             //角色ID
+        uint64_t SrcSamplingRate = 48000;                  //源采样率
+        int64_t SpkCount = 2;                              //模型角色数
+
+        //SVC
 		float IndexRate = 0.f;                             //索引比               0-1
 		float ClusterRate = 0.f;                           //聚类比               0-1
-		float NoiseScale = 0.3f;                           //噪声规模             0-10
-		float DDSPNoiseScale = 0.8f;                        //DDSP噪声规模         0-10
-		int64_t Seed = 52468;                              //种子
+		float DDSPNoiseScale = 0.8f;                       //DDSP噪声修正因子      0-10
 		float Keys = 0.f;                                  //升降调               -64-64
 		size_t MeanWindowLength = 2;                       //均值滤波器窗口大小     1-20
 		size_t Pndm = 100;                                 //Diffusion加速倍数    2-200
 		size_t Step = 1000;                                //Diffusion总步数      200-1000
-		std::wstring Sampler = L"Pndm"; //采样器
+		std::wstring Sampler = L"Pndm";                    //Diffusion采样器
 		std::wstring F0Method = L"Dio";                    //F0提取算法
-		int64_t SpeakerId = 0;
-        uint64_t SrcSamplingRate = 48000;
-        bool UseShallowDiffusion = false;
-        int64_t SpkCount = 2;
-        //RTInfer
+        bool UseShallowDiffusion = false;                  //使用浅扩散
+
+        //SVCRTInfer
         int64_t RTSampleSize = 44100;
         int64_t CrossFadeLength = 320;
+
+        //TTS
+        std::vector<float> SpeakerMix;                     //角色混合比例
+        float LengthScale = 1.0f;                          //时长修正因子
+        float DurationPredictorNoiseScale = 0.3f;          //随机时长预测器噪声修正因子
+        float FactorDpSdp = 0.3f;                          //随机时长预测器与时长预测器混合比例
+        float GateThreshold = 0.66666f;                    //Tacotron2解码器EOS阈值
+        int64_t MaxDecodeStep = 2000;                      //Tacotron2最大解码步数
+        std::vector<std::wstring> EmotionPrompt;           //情感标记
+        std::wstring PlaceHolderSymbol = L"|";             //音素分隔符
+        float RestTime = 0.5f;                             //停顿时间，为负数则直接断开音频并创建新音频
+        int64_t Language = 0;                              //语言序列
+        std::wstring AdditionalInfo;                       //G2P额外信息
+	};
+
+	struct MoeVSTTSSeq
+	{
+        std::wstring SeqStr;
+        std::vector<std::wstring> Seq;                     //音素序列
+        std::vector<int64_t> Tones;                        //音调序列
+        std::vector<int64_t> Durations;                    //时长序列
+        std::vector<int64_t> Language;                     //语言序列
+        std::vector<float> SpeakerMix;                     //角色混合比例
+
+        std::vector<std::wstring> EmotionPrompt;           //情感标记
+        std::wstring PlaceHolderSymbol = L"|";             //音素分隔符
+        float NoiseScale = 0.3f;                           //噪声修正因子             0-10
+        float LengthScale = 1.0f;                          //时长修正因子
+        float DurationPredictorNoiseScale = 0.3f;          //随机时长预测器噪声修正因子
+        float FactorDpSdp = 0.3f;                          //随机时长预测器与时长预测器混合比例
+        float GateThreshold = 0.66666f;                    //Tacotron2解码器EOS阈值
+        int64_t MaxDecodeStep = 2000;                      //Tacotron2最大解码步数
+        int64_t Seed = 52468;                              //种子
+        int64_t SpeakerId = 0;                             //角色ID
+        float RestTime = 0.5f;                             //停顿时间，为负数则直接断开音频并创建新音频
+        int64_t TotLang = 0;
+        std::wstring AdditionalInfo;                       //G2P额外信息
 	};
 
+    using MoeVSSvcParams = MoeVSParams;
+    using MoeVSTTSParams = MoeVSParams;
+
     struct ParamsOffset
     {
         std::vector<size_type> OrgAudio;
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/TTS.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/TTS.hpp
new file mode 100644
index 0000000..e36b79b
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/TTS.hpp	
@@ -0,0 +1,151 @@
+#pragma once
+#include <map>
+#include "ModelBase.hpp"
+#include "../../Logger/MoeSSLogger.hpp"
+#include "../../InferTools/G2P/MoeVSG2P.hpp"
+#include "MJson.h"
+
+MoeVoiceStudioCoreHeader
+
+class EmoLoader
+{
+public:
+	static constexpr long startPos = 128;
+	EmoLoader() = default;
+	EmoLoader(const std::wstring& path)
+	{
+		if (emofile)
+			fclose(emofile);
+		emofile = nullptr;
+		_wfopen_s(&emofile, path.c_str(), L"r");
+		if (!emofile)
+			throw std::exception("emoFile not exists");
+	}
+	~EmoLoader()
+	{
+		if (emofile)
+			fclose(emofile);
+		emofile = nullptr;
+	}
+	void close()
+	{
+		if (emofile)
+			fclose(emofile);
+		emofile = nullptr;
+	}
+	void open(const std::wstring& path)
+	{
+		if (emofile)
+			fclose(emofile);
+		emofile = nullptr;
+		_wfopen_s(&emofile, path.c_str(), L"rb");
+		if (!emofile)
+			throw std::exception("emoFile not exists");
+	}
+	std::vector<float> operator[](long index) const
+	{
+		if (emofile)
+		{
+			fseek(emofile, index * 4096 + startPos, SEEK_SET);
+			char buffer[4096];
+			const auto buf = reinterpret_cast<float*>(buffer);
+			const auto bufread = fread_s(buffer, 4096, 1, 4096, emofile);
+			if (bufread == 4096)
+				return { buf ,buf + 1024 };
+			throw std::exception("emo index out of range");
+		}
+		throw std::exception("emo file not opened");
+	}
+private:
+	FILE* emofile = nullptr;
+};
+
+class TextToSpeech : public MoeVoiceStudioModule
+{
+public:
+	using DurationCallback = std::function<void(std::vector<float>&)>;
+
+	TextToSpeech(const ExecutionProviders& ExecutionProvider_, unsigned DeviceID_, unsigned ThreadCount_ = 0);
+
+	[[nodiscard]] std::vector<MoeVSProjectSpace::MoeVSTTSSeq> GetInputSeqs(const MJson& _Input, const MoeVSProjectSpace::MoeVSParams& _InitParams) const;
+
+	static std::vector<std::vector<bool>> generatePath(float* duration, size_t durationSize, size_t maskSize);
+
+	[[nodiscard]] std::vector<float> GetEmotionVector(const std::vector<std::wstring>& src) const;
+
+	[[nodiscard]] std::vector<std::vector<int16_t>> Inference(const std::wstring& _Seq,
+		const MoeVSProjectSpace::MoeVSParams& _InferParams = MoeVSProjectSpace::MoeVSParams()) const;
+
+	[[nodiscard]] std::vector<std::vector<int16_t>> Inference(const MJson& _Inputs,
+		const MoeVSProjectSpace::MoeVSParams& _InferParams = MoeVSProjectSpace::MoeVSParams()) const;
+
+	[[nodiscard]] virtual std::vector<std::vector<int16_t>> Inference(const std::vector<MoeVSProjectSpace::MoeVSTTSSeq>& _Input) const;
+
+	[[nodiscard]] std::vector<std::wstring> Inference(std::wstring& _Datas, const MoeVSProjectSpace::MoeVSParams& _InferParams, const InferTools::SlicerSettings& _SlicerSettings) const override;
+
+	[[nodiscard]] static std::vector<size_t> GetAligments(size_t DstLen, size_t SrcLen);
+
+	[[nodiscard]] std::wstring TextNormalize(const std::wstring& _Input, int64_t LanguageId) const;
+
+	[[nodiscard]] int64_t GetLanguageToneIdx(int64_t _Index) const
+	{
+		std::string LanguageSymb;
+		for(const auto& i : LanguageMap)
+			if (_Index == i.second)
+				LanguageSymb = i.first;
+		if (LanguageSymb.empty())
+			return 0;
+		const auto Iter = LanguageTones.find(LanguageSymb);
+		if (Iter != LanguageTones.end())
+			return Iter->second;
+		return 0;
+	}
+
+	static int64_t find_max_idx(const std::vector<float>& inp)
+	{
+		int64_t idx = 0;
+		for (size_t i = 1; i < inp.size(); ++i)
+			if (inp[i] > inp[idx])
+				idx = int64_t(i);
+		return idx;
+	}
+
+	~TextToSpeech() override = default;
+
+	template <typename T = float>
+	void LinearCombination(std::vector<T>& _data, T Value = T(1.0)) const
+	{
+		_data.resize(SpeakerCount, 0.f);
+		if (_data.empty())
+		{
+			_data = std::vector<T>(1, Value);
+			return;
+		}
+		T Sum = T(0.0);
+		for (const auto& i : _data)
+			Sum += i;
+		if (Sum < T(0.0001))
+		{
+			_data = std::vector<T>(_data.size(), T(0.0));
+			_data[0] = Value;
+			return;
+		}
+		Sum *= T(Value);
+		for (auto& i : _data)
+			i /= Sum;
+	}
+protected:
+	DurationCallback CustomDurationCallback;
+	int64_t SpeakerCount = 1;
+	std::map<std::string, int64_t> LanguageMap = { {"ZH", 0}, {"JP", 1}, {"EN", 2} };
+	std::map<std::string, int64_t> LanguageTones = { {"ZH", 0}, {"JP", 0}, {"EN", 0} };
+	std::vector<MoeVSG2P::Tokenizer> Tokenizers;
+	MoeVSG2P::MVSCleaner* Cleaner = nullptr;
+	bool AddBlank = true;
+	bool Emotion = false;
+	std::map<std::wstring, int64_t> Symbols;
+	EmoLoader EmoLoader;
+	MJson EmoJson;
+};
+
+MoeVoiceStudioCoreEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/Tacotron.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/Tacotron.hpp
new file mode 100644
index 0000000..ba7b776
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/Tacotron.hpp	
@@ -0,0 +1,39 @@
+#pragma once
+#include "ModelBase.hpp"
+
+INFERCLASSHEADER
+
+class Tacotron2 : public TTS
+{
+public:
+    Tacotron2(const MJson&, const callback&, const callback_params&, const DurationCallback&, Device _dev = Device::CPU);
+
+	~Tacotron2() override;
+
+    std::vector<int16_t> Inference(std::wstring& _inputLens) const override;
+
+    [[nodiscard]] std::vector<int16_t> Inference(const MoeVSProject::TTSParams& _input) const override;
+
+    static void cat(std::vector<float>& tensorA, std::vector<int64>& Shape, const MTensor& tensorB) {
+        const int64 n = Shape[1];
+        for (int64 i = n; i > 0; --i)
+            tensorA.insert(tensorA.begin() + (i * Shape[2]), tensorB.GetTensorData<float>()[i - 1]);
+        ++Shape[2];
+    }
+private:
+    Ort::Session* sessionEncoder = nullptr;
+    Ort::Session* sessionDecoderIter = nullptr;
+    Ort::Session* sessionPostNet = nullptr;
+    Ort::Session* sessionGan = nullptr;
+
+    const std::vector<const char*> ganIn = { "x" };
+    const std::vector<const char*> ganOut = { "audio" };
+    const std::vector<const char*> inputNodeNamesSessionEncoder = { "sequences","sequence_lengths" };
+    const std::vector<const char*> outputNodeNamesSessionEncoder = { "memory","processed_memory","lens" };
+    const std::vector<const char*> inputNodeNamesSessionDecoderIter = { "decoder_input","attention_hidden","attention_cell","decoder_hidden","decoder_cell","attention_weights","attention_weights_cum","attention_context","memory","processed_memory","mask" };
+    const std::vector<const char*> outputNodeNamesSessionDecoderIter = { "decoder_output","gate_prediction","out_attention_hidden","out_attention_cell","out_decoder_hidden","out_decoder_cell","out_attention_weights","out_attention_weights_cum","out_attention_context" };
+    const std::vector<const char*> inputNodeNamesSessionPostNet = { "mel_outputs" };
+    const std::vector<const char*> outputNodeNamesSessionPostNet = { "mel_outputs_postnet" };
+};
+
+INFERCLASSEND
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/Vits.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/Vits.hpp
new file mode 100644
index 0000000..68b84b3
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/Vits.hpp	
@@ -0,0 +1,86 @@
+#pragma once
+#include "TTS.hpp"
+
+MoeVoiceStudioCoreHeader
+
+class Vits : public TextToSpeech
+{
+public:
+    Vits(const MJson& _Config, const ProgressCallback& _ProgressCallback,
+        const DurationCallback& _DurationCallback,
+        ExecutionProviders ExecutionProvider_ = ExecutionProviders::CPU,
+        unsigned DeviceID_ = 0, unsigned ThreadCount_ = 0);
+
+    Vits(const std::map<std::string, std::wstring>& _PathDict, const MJson& _Config, const ProgressCallback& _ProgressCallback,
+        const DurationCallback& _DurationCallback, const std::vector<std::wstring>& _BertPaths,
+        ExecutionProviders ExecutionProvider_ = ExecutionProviders::CPU,
+        unsigned DeviceID_ = 0, unsigned ThreadCount_ = 0);
+
+    void load(const std::map<std::string, std::wstring>& _PathDict,
+        const MJson& _Config, const ProgressCallback& _ProgressCallback,
+        const DurationCallback& _DurationCallback, const std::vector<std::wstring>& _BertPaths = {});
+
+	~Vits() override;
+
+    void destory()
+    {
+        delete sessionDec;
+        delete sessionSdp;
+        delete sessionDp;
+        delete sessionEnc_p;
+        delete sessionFlow;
+        delete sessionEmb;
+        sessionDec = nullptr;
+        sessionSdp = nullptr;
+        sessionEnc_p = nullptr;
+        sessionFlow = nullptr;
+        sessionEmb = nullptr;
+        sessionDp = nullptr;
+        for (auto& OrtPtr : sessionBert)
+        {
+	        delete OrtPtr;
+            OrtPtr = nullptr;
+        }
+        sessionBert.clear();
+    }
+
+    [[nodiscard]] std::vector<std::vector<int16_t>> Inference(const std::vector<MoeVSProjectSpace::MoeVSTTSSeq>& _Input) const override;
+private:
+    Ort::Session* sessionDec = nullptr;
+    Ort::Session* sessionSdp = nullptr;
+    Ort::Session* sessionDp = nullptr;
+    Ort::Session* sessionEnc_p = nullptr;
+    Ort::Session* sessionFlow = nullptr;
+    Ort::Session* sessionEmb = nullptr;
+    std::vector<Ort::Session*> sessionBert;
+    std::vector<std::string> BertNames;
+    std::string VitsType;
+    bool UseTone = false;
+    bool UseBert = false;
+    bool UseLength = true;
+    bool UseLanguage = false;
+    bool EncoderG = false;
+
+    std::vector<const char*> EncoderInputNames = { "x" };
+    const std::vector<const char*> EncoderOutputNames = { "xout", "m_p", "logs_p", "x_mask" };
+
+    std::vector<const char*> SdpInputNames = { "x", "x_mask", "zin" };
+    const std::vector<const char*> SdpOutputNames = { "logw" };
+
+    std::vector<const char*> DpInputNames = { "x", "x_mask" };
+    const std::vector<const char*> DpOutputNames = { "logw" };
+
+	std::vector<const char*> FlowInputNames = { "z_p", "y_mask" };
+    const std::vector<const char*> FlowOutputNames = { "z" };
+
+    std::vector<const char*> DecInputNames = { "z_in" };
+    const std::vector<const char*> DecOutputNames = { "o" };
+
+    const std::vector<const char*> EmbiddingInputNames = { "sid" };
+    const std::vector<const char*> EmbiddingOutputNames = { "g" };
+
+    const std::vector<const char*> BertInputNames = { "input_ids", "attention_mask", "token_type_ids" };
+    const std::vector<const char*> BertOutputNames = { "last_hidden_state" };
+};
+
+MoeVoiceStudioCoreEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/VitsSvc.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/VitsSvc.hpp
index bc87622..95a3408 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/VitsSvc.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/header/VitsSvc.hpp	
@@ -39,6 +39,10 @@ class VitsSvc : public SingingVoiceConversion
         ExecutionProviders ExecutionProvider_ = ExecutionProviders::CPU,
         unsigned DeviceID_ = 0, unsigned ThreadCount_ = 0);
 
+    VitsSvc(const std::map<std::string, std::wstring>& _PathDict, const MJson& _Config, const ProgressCallback& _ProgressCallback,
+        ExecutionProviders ExecutionProvider_ = ExecutionProviders::CPU,
+        unsigned DeviceID_ = 0, unsigned ThreadCount_ = 0);
+
 	~VitsSvc() override;
 
     void Destory();
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/DiffSvc.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/DiffSvc.cpp
index 58cd4ec..951c027 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/DiffSvc.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/DiffSvc.cpp	
@@ -194,6 +194,133 @@ DiffusionSvc::DiffusionSvc(const MJson& _Config, const ProgressCallback& _Progre
 	}
 }
 
+DiffusionSvc::DiffusionSvc(const std::map<std::string, std::wstring>& _PathDict,
+	const MJson& _Config, const ProgressCallback& _ProgressCallback,
+	ExecutionProviders ExecutionProvider_, unsigned DeviceID_, unsigned ThreadCount_) :
+	SingingVoiceConversion(ExecutionProvider_, DeviceID_, ThreadCount_)
+{
+	MoeVSClassName(L"MoeVoiceStudioDiffSingingVoiceConversion");
+
+	//Check SamplingRate
+	if (_Config["Rate"].IsNull())
+		throw std::exception("[Error] Missing field \"Rate\" (SamplingRate)");
+	if (_Config["Rate"].IsInt() || _Config["Rate"].IsInt64())
+		_samplingRate = _Config["Rate"].GetInt();
+	else
+		throw std::exception("[Error] Field \"Rate\" (SamplingRate) Must Be Int/Int64");
+
+	logger.log(L"[Info] Current Sampling Rate is" + std::to_wstring(_samplingRate));
+
+	if (_Config["MelBins"].IsNull())
+		throw std::exception("[Error] Missing field \"MelBins\" (MelBins)");
+	if (_Config["MelBins"].IsInt() || _Config["MelBins"].IsInt64())
+		melBins = _Config["MelBins"].GetInt();
+	else
+		throw std::exception("[Error] Field \"MelBins\" (MelBins) Must Be Int/Int64");
+
+	if (!(_Config["Hop"].IsInt() || _Config["Hop"].IsInt64()))
+		throw std::exception("[Error] Hop Must Be Int");
+	HopSize = _Config["Hop"].GetInt();
+
+	if (HopSize < 1)
+		throw std::exception("[Error] Hop Must > 0");
+
+	if (!(_Config["HiddenSize"].IsInt() || _Config["HiddenSize"].IsInt64()))
+		logger.log(L"[Warn] Missing Field \"HiddenSize\", Use Default Value (256)");
+	else
+		HiddenUnitKDims = _Config["HiddenSize"].GetInt();
+
+	if (_Config["Characters"].IsArray())
+		SpeakerCount = (int64_t)_Config["Characters"].Size();
+
+	if (_Config["Volume"].IsBool())
+		EnableVolume = _Config["Volume"].GetBool();
+	else
+		logger.log(L"[Warn] Missing Field \"Volume\", Use Default Value (False)");
+
+	if (!_Config["CharaMix"].IsBool())
+		logger.log(L"[Warn] Missing Field \"CharaMix\", Use Default Value (False)");
+	else
+		EnableCharaMix = _Config["CharaMix"].GetBool();
+
+	if (!_Config["Diffusion"].IsBool())
+		logger.log(L"[Warn] Missing Field \"Diffusion\", Use Default Value (False)");
+	else if (_Config["Diffusion"].GetBool())
+		DiffSvcVersion = L"DiffusionSvc";
+
+	if (_Config["Pndm"].IsInt())
+		Pndms = _Config["Pndm"].GetInt();
+
+	_callback = _ProgressCallback;
+
+	if (_Config["Cluster"].IsString())
+	{
+		const auto clus = to_wide_string(_Config["Cluster"].GetString());
+		if (!(_Config["KMeansLength"].IsInt() || _Config["KMeansLength"].IsInt64()))
+			logger.log(L"[Warn] Missing Field \"KMeansLength\", Use Default Value (10000)");
+		else
+			ClusterCenterSize = _Config["KMeansLength"].GetInt();
+		try
+		{
+			Cluster = MoeVoiceStudioCluster::GetMoeVSCluster(clus, _PathDict.at("Cluster"), HiddenUnitKDims, ClusterCenterSize);
+			EnableCluster = true;
+		}
+		catch (std::exception& e)
+		{
+			logger.error(e.what());
+			EnableCluster = false;
+		}
+	}
+
+	//LoadModels
+	try
+	{
+		logger.log(L"[Info] loading DiffSvc Models");
+		hubert = new Ort::Session(*env, _PathDict.at("Hubert").c_str(), *session_options);
+		nsfHifigan = new Ort::Session(*env, _PathDict.at("Hifigan").c_str(), *session_options);
+		if (_waccess(_PathDict.at("Encoder").c_str(), 0) != -1)
+		{
+			encoder = new Ort::Session(*env, _PathDict.at("Encoder").c_str(), *session_options);
+			denoise = new Ort::Session(*env, _PathDict.at("DenoiseFn").c_str(), *session_options);
+			pred = new Ort::Session(*env, _PathDict.at("NoisePredictor").c_str(), *session_options);
+			after = new Ort::Session(*env, _PathDict.at("AfterProcess").c_str(), *session_options);
+			if (_waccess(_PathDict.at("Alphas").c_str(), 0) != -1)
+				alpha = new Ort::Session(*env, _PathDict.at("Alphas").c_str(), *session_options);
+		}
+		else
+			diffSvc = new Ort::Session(*env, _PathDict.at("DiffSvc").c_str(), *session_options);
+
+		if (_waccess(_PathDict.at("Naive").c_str(), 0) != -1)
+			naive = new Ort::Session(*env, _PathDict.at("Naive").c_str(), *session_options);
+
+		logger.log(L"[Info] DiffSvc Models loaded");
+	}
+	catch (Ort::Exception& _exception)
+	{
+		Destory();
+		throw std::exception(_exception.what());
+	}
+
+	if (_Config["TensorExtractor"].IsString())
+		DiffSvcVersion = to_wide_string(_Config["TensorExtractor"].GetString());
+
+	if (_Config["MaxStep"].IsInt())
+		MaxStep = _Config["MaxStep"].GetInt();
+
+	MoeVSTensorPreprocess::MoeVoiceStudioTensorExtractor::Others _others_param;
+	_others_param.Memory = *memory_info;
+
+	try
+	{
+		_TensorExtractor = GetTensorExtractor(DiffSvcVersion, 48000, _samplingRate, HopSize, EnableCharaMix, EnableVolume, HiddenUnitKDims, SpeakerCount, _others_param);
+	}
+	catch (std::exception& e)
+	{
+		Destory();
+		throw std::exception(e.what());
+	}
+}
+
 std::vector<int16_t> DiffusionSvc::SliceInference(const MoeVSProjectSpace::MoeVSAudioSlice& _Slice, const MoeVSProjectSpace::MoeVSSvcParams& _InferParams) const
 {
 	logger.log(L"[Inferring] Inferring \"" + _Slice.Path + L"\", Start!");
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/ModelBase.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/ModelBase.cpp
index ba2c70c..9ba842b 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/ModelBase.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/ModelBase.cpp	
@@ -84,8 +84,8 @@ std::vector<std::wstring> MoeVoiceStudioModule::GetOpenFileNameMoeVS()
 #endif
 }
 
-std::vector<std::wstring> MoeVoiceStudioModule::Inference(std::wstring& _Paths,
-	const MoeVSProjectSpace::MoeVSSvcParams& _InferParams,
+std::vector<std::wstring> MoeVoiceStudioModule::Inference(std::wstring& _Datas,
+	const MoeVSProjectSpace::MoeVSParams& _InferParams,
 	const InferTools::SlicerSettings& _SlicerSettings) const
 {
 	MoeVSNotImplementedError;
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/TTS.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/TTS.cpp
new file mode 100644
index 0000000..864f8b7
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/TTS.cpp	
@@ -0,0 +1,321 @@
+#include "../header/TTS.hpp"
+
+MoeVoiceStudioCoreHeader
+	TextToSpeech::TextToSpeech(const ExecutionProviders& ExecutionProvider_, unsigned DeviceID_, unsigned ThreadCount_) : MoeVoiceStudioModule(ExecutionProvider_, DeviceID_, ThreadCount_)
+{
+	MoeVSClassName(L"MoeVoiceStudioTextToSpeech");
+}
+
+std::vector<MoeVSProjectSpace::MoeVSTTSSeq> TextToSpeech::GetInputSeqs(const MJson& _Input, const MoeVSProjectSpace::MoeVSParams& _InitParams) const
+{
+	if (!_Input.IsArray())
+		throw std::exception("JSON Type Must Be Array");
+	const auto _InpArr = _Input.GetArray();
+	std::vector<MoeVSProjectSpace::MoeVSTTSSeq> _TTSInputSeqs;
+	_TTSInputSeqs.reserve(_InpArr.size());
+	for(const auto& iter : _InpArr)
+	{
+		MoeVSProjectSpace::MoeVSTTSSeq _Temp;
+		const bool TokenFieldIsStr = iter.HasMember("Tokens") && iter["Tokens"].IsString() && !iter["Tokens"].Empty();
+		const bool SeqFieldIsStr = iter.HasMember("Seq") && iter["Seq"].IsString() && !iter["Seq"].Empty();
+
+		if (iter.HasMember("LanguageID") && iter["LanguageID"].IsString() &&
+			LanguageMap.find(iter["LanguageID"].GetString()) != LanguageMap.end())
+			_Temp.TotLang = LanguageMap.at(iter["LanguageID"].GetString());
+		else
+			_Temp.TotLang = _InitParams.Language;
+
+		const int64_t FirstToneIdx = GetLanguageToneIdx(_Temp.TotLang);
+
+		if (iter.HasMember("G2PAdditionalInfo") && iter["G2PAdditionalInfo"].IsString() && !iter["G2PAdditionalInfo"].Empty())
+			_Temp.AdditionalInfo = to_wide_string(iter["G2PAdditionalInfo"].GetString());
+		else
+			_Temp.AdditionalInfo = _InitParams.AdditionalInfo;
+
+		if (iter.HasMember("PlaceHolderSymbol") && iter["PlaceHolderSymbol"].IsString())
+			_Temp.PlaceHolderSymbol = to_wide_string(iter["PlaceHolderSymbol"].GetString());
+		else
+			_Temp.PlaceHolderSymbol = _InitParams.PlaceHolderSymbol;
+
+		if(TokenFieldIsStr && SeqFieldIsStr)
+		{
+			_Temp.SeqStr = to_wide_string(iter["Tokens"].GetString());
+			auto TempString = to_wide_string(iter["Seq"].GetString());
+			if (TempString.find(L"[ph]") == 0)
+				_Temp.Seq = Cleaner->DictReplace(TempString.substr(4), _Temp.PlaceHolderSymbol);
+			else
+				_Temp.Seq = Cleaner->DictReplace(Cleaner->G2p(TempString, _Temp.PlaceHolderSymbol, _Temp.AdditionalInfo, _Temp.TotLang), _Temp.PlaceHolderSymbol);
+		}
+		else if (TokenFieldIsStr)
+			_Temp.SeqStr = to_wide_string(iter["Tokens"].GetString());
+		else if(SeqFieldIsStr)
+			_Temp.SeqStr = to_wide_string(iter["Seq"].GetString());
+		else
+			throw std::exception("You Should Input Tokens To Inference");
+		if (iter.HasMember("Seq") && iter["Seq"].IsArray())
+		{
+			const auto SeqObject = iter["Seq"];
+			if (!SeqObject.Empty())
+				for (const auto& j : SeqObject.GetArray())
+					_Temp.Seq.emplace_back(j.IsString() ? to_wide_string(j.GetString()) : std::wstring());
+			else if(_Temp.SeqStr.empty())
+				throw std::exception("You Should Input Tokens To Inference");
+		}
+
+		if(_Temp.SeqStr.empty())
+			throw std::exception("You Should Input Tokens To Inference");
+
+		if (iter.HasMember("Tones") && iter["Tones"].IsArray())
+			for (const auto& j : iter["Tones"].GetArray())
+				_Temp.Tones.emplace_back(j.IsInt() ? j.GetInt() + FirstToneIdx : 0);
+		if (iter.HasMember("Durations") && iter["Durations"].IsArray())
+			for (const auto& j : iter["Durations"].GetArray())
+				_Temp.Durations.emplace_back(j.IsInt() ? j.GetInt() : 0);
+		if (iter.HasMember("Language") && iter["Language"].IsArray())
+			for (const auto& j : iter["Language"].GetArray())
+				_Temp.Language.emplace_back(j.IsInt() ? j.GetInt() : (j.IsString() ? LanguageMap.at(j.GetString()) : 0));
+		if (iter.HasMember("SpeakerMix") && iter["SpeakerMix"].IsArray())
+			for (const auto& j : iter["SpeakerMix"].GetArray())
+				_Temp.SpeakerMix.emplace_back(j.IsFloat() ? j.GetFloat() : 0.f);
+		else
+			_Temp.SpeakerMix = _InitParams.SpeakerMix;
+		if (iter.HasMember("EmotionPrompt") && iter["EmotionPrompt"].IsArray())
+			for (const auto& j : iter["EmotionPrompt"].GetArray())
+				_Temp.EmotionPrompt.emplace_back(j.IsString() ? to_wide_string(j.GetString()) : std::wstring());
+		else
+			_Temp.EmotionPrompt = _InitParams.EmotionPrompt;
+		if (iter.HasMember("NoiseScale") && iter["NoiseScale"].IsFloat())
+			_Temp.NoiseScale = iter["NoiseScale"].GetFloat();
+		else
+			_Temp.NoiseScale = _InitParams.NoiseScale;
+		if (iter.HasMember("LengthScale") && iter["LengthScale"].IsFloat())
+			_Temp.LengthScale = iter["LengthScale"].GetFloat();
+		else
+			_Temp.LengthScale = _InitParams.LengthScale;
+		if (iter.HasMember("RestTime") && iter["RestTime"].IsFloat())
+			_Temp.RestTime = iter["RestTime"].GetFloat();
+		else
+			_Temp.RestTime = _InitParams.RestTime;
+		if (iter.HasMember("DurationPredictorNoiseScale") && iter["DurationPredictorNoiseScale"].IsFloat())
+			_Temp.DurationPredictorNoiseScale = iter["DurationPredictorNoiseScale"].GetFloat();
+		else
+			_Temp.DurationPredictorNoiseScale = _InitParams.DurationPredictorNoiseScale;
+		if (iter.HasMember("FactorDpSdp") && iter["FactorDpSdp"].IsFloat())
+			_Temp.FactorDpSdp = iter["FactorDpSdp"].GetFloat();
+		else
+			_Temp.FactorDpSdp = _InitParams.FactorDpSdp;
+		if (iter.HasMember("GateThreshold") && iter["GateThreshold"].IsFloat())
+			_Temp.GateThreshold = iter["GateThreshold"].GetFloat();
+		else
+			_Temp.GateThreshold = _InitParams.GateThreshold;
+		if (iter.HasMember("MaxDecodeStep") && iter["MaxDecodeStep"].IsFloat())
+			_Temp.MaxDecodeStep = iter["MaxDecodeStep"].GetInt();
+		else
+			_Temp.MaxDecodeStep = _InitParams.MaxDecodeStep;
+		if (iter.HasMember("Seed") && iter["Seed"].IsInt())
+			_Temp.Seed = iter["Seed"].GetInt();
+		else
+			_Temp.Seed = _InitParams.Seed;
+		if (iter.HasMember("SpeakerId") && iter["SpeakerId"].IsInt())
+			_Temp.SpeakerId = iter["SpeakerId"].GetInt();
+		else
+			_Temp.SpeakerId = _InitParams.SpeakerId;
+
+		if (_Temp.MaxDecodeStep < 500) _Temp.MaxDecodeStep = 500;
+		if (_Temp.GateThreshold > 0.98f) _Temp.GateThreshold = 0.98f;
+		if (_Temp.GateThreshold < 0.2f) _Temp.GateThreshold = 0.2f;
+		if (_Temp.FactorDpSdp > 1.f) _Temp.FactorDpSdp = 1.f;
+		if (_Temp.FactorDpSdp < 0.f) _Temp.FactorDpSdp = 0.f;
+		if (_Temp.DurationPredictorNoiseScale > 10.f) _Temp.DurationPredictorNoiseScale = 10.f;
+		if (_Temp.DurationPredictorNoiseScale < 0.f) _Temp.DurationPredictorNoiseScale = 0.f;
+		if (_Temp.RestTime > 30.f) _Temp.RestTime = 30.f;
+		if (_Temp.LengthScale > 10.f) _Temp.LengthScale = 10.f;
+		if (_Temp.LengthScale < 0.1f) _Temp.LengthScale = 0.1f;
+
+		if (!_Temp.SeqStr.empty() && _Temp.Seq.empty())
+		{
+			if (_Temp.SeqStr.find(L"[ph]") == 0)
+				_Temp.Seq = Cleaner->DictReplace(_Temp.SeqStr.substr(4), _Temp.PlaceHolderSymbol);
+			else
+				_Temp.Seq = Cleaner->DictReplace(Cleaner->G2p(_Temp.SeqStr, _Temp.PlaceHolderSymbol, _Temp.AdditionalInfo, _Temp.TotLang), _Temp.PlaceHolderSymbol);
+		}
+		_TTSInputSeqs.emplace_back(std::move(_Temp));
+	}
+	return _TTSInputSeqs;
+}
+
+std::vector<float> TextToSpeech::GetEmotionVector(const std::vector<std::wstring>& src) const
+{
+	if (src.empty())
+		return EmoLoader[0];
+	std::vector<float> dst(1024, 0.0);
+	uint64_t mul = 0;
+	for(const auto& iter : src)
+	{
+		long emoId;
+		const auto emoStr = to_byte_string(iter);
+		if (!EmoJson[emoStr].Empty())
+			emoId = EmoJson[emoStr].GetInt();
+		else
+			emoId = atoi(emoStr.c_str());
+		auto emoVec = EmoLoader[emoId];
+		for (size_t i = 0; i < 1024; ++i)
+			dst[i] = dst[i] + (emoVec[i] - dst[i]) / (float)(mul + 1ull);
+		++mul;
+	}
+	return dst;
+}
+
+std::vector<std::vector<bool>> TextToSpeech::generatePath(float* duration, size_t durationSize, size_t maskSize)
+{
+	for (size_t i = 1; i < maskSize; ++i)
+		duration[i] = duration[i - 1] + duration[i];
+	std::vector<std::vector<bool>> path(durationSize, std::vector<bool>(maskSize, false));
+	//const auto path = new float[maskSize * durationSize];
+	/*
+	for (size_t i = 0; i < maskSize; ++i)
+		for (size_t j = 0; j < durationSize; ++j)
+			path[i][j] = (j < (size_t)duration[i] ? 1.0f : 0.0f);
+	for (size_t i = maskSize - 1; i > 0ull; --i)
+		for (size_t j = 0; j < durationSize; ++j)
+			path[i][j] -= path[i-1][j];
+	 */
+	auto dur = (size_t)duration[0];
+	for (size_t j = 0; j < dur; ++j)
+		path[j][0] = true;
+	/*
+	for (size_t i = maskSize - 1; i > 0ull; --i)
+		for (size_t j = 0; j < durationSize; ++j)
+			path[i][j] = (j < (size_t)duration[i] && j >= (size_t)duration[i - 1]);
+	std::vector<std::vector<float>> tpath(durationSize, std::vector<float>(maskSize));
+	for (size_t i = 0; i < maskSize; ++i)
+		for (size_t j = 0; j < durationSize; ++j)
+			tpath[j][i] = path[i][j];
+	 */
+	for (size_t j = maskSize - 1; j > 0ull; --j)
+	{
+		dur = (size_t)duration[j];
+		for (auto i = (size_t)duration[j - 1]; i < dur && i < durationSize; ++i)
+			path[i][j] = true;
+	}
+	return path;
+}
+
+std::vector<std::vector<int16_t>> TextToSpeech::Inference(const std::wstring& _Seq, const MoeVSProjectSpace::MoeVSParams& _InferParams) const
+{
+	if (_Seq.empty())
+		return {};
+	if (_Seq.find(L"[ph]") != 0 && _Seq[0] == L'[')
+		return Inference(GetInputSeqs({ to_byte_string(_Seq), true }, _InferParams));
+
+	std::vector<std::wstring> SeqLens;
+	std::wstring TmpSeq;
+	for (const auto chari : _Seq)
+	{
+		if ((chari == L'\n') || (chari == L'\r'))
+		{
+			if (!TmpSeq.empty())
+			{
+				SeqLens.push_back(TmpSeq);
+				TmpSeq.clear();
+			}
+			continue;
+		}
+		TmpSeq += chari;
+	}
+	if (!TmpSeq.empty())
+		SeqLens.push_back(TmpSeq);
+
+	std::vector<MoeVSProjectSpace::MoeVSTTSSeq> InputSeqs;
+	InputSeqs.reserve(SeqLens.size());
+	for(const auto& SeqL : SeqLens)
+	{
+		MoeVSProjectSpace::MoeVSTTSSeq TmpSeqData;
+		if (SeqL.find(L"[ph]") == 0)
+			TmpSeqData.Seq = Cleaner->DictReplace(SeqL.substr(4), _InferParams.PlaceHolderSymbol);
+		else
+			TmpSeqData.Seq = Cleaner->DictReplace(Cleaner->G2p(SeqL, _InferParams.PlaceHolderSymbol, _InferParams.AdditionalInfo, _InferParams.Language), _InferParams.PlaceHolderSymbol);
+		TmpSeqData.SpeakerMix = _InferParams.SpeakerMix;
+		TmpSeqData.EmotionPrompt = _InferParams.EmotionPrompt;
+		TmpSeqData.PlaceHolderSymbol = _InferParams.PlaceHolderSymbol;
+		TmpSeqData.NoiseScale = _InferParams.NoiseScale;
+		TmpSeqData.LengthScale = _InferParams.LengthScale;
+		TmpSeqData.DurationPredictorNoiseScale = _InferParams.DurationPredictorNoiseScale;
+		TmpSeqData.FactorDpSdp = _InferParams.FactorDpSdp;
+		TmpSeqData.GateThreshold = _InferParams.GateThreshold;
+		TmpSeqData.MaxDecodeStep = _InferParams.MaxDecodeStep;
+		TmpSeqData.Seed = _InferParams.Seed;
+		TmpSeqData.SpeakerId = _InferParams.SpeakerId;
+		TmpSeqData.RestTime = _InferParams.RestTime;
+		InputSeqs.emplace_back(std::move(TmpSeqData));
+	}
+	return Inference(InputSeqs);
+}
+
+std::vector<std::vector<int16_t>> TextToSpeech::Inference(const MJson& _Inputs, const MoeVSProjectSpace::MoeVSParams& _InferParams) const
+{
+	return Inference(GetInputSeqs(_Inputs, _InferParams));
+}
+
+std::vector<std::wstring> TextToSpeech::Inference(std::wstring& _Datas, const MoeVSProjectSpace::MoeVSParams& _InferParams, const InferTools::SlicerSettings& _SlicerSettings) const
+{
+	std::vector<std::wstring> AudioFolders;
+	const auto PCM = Inference(_Datas, _InferParams);
+	AudioFolders.reserve(PCM.size());
+	for (const auto& i : PCM)
+	{
+		std::wstring OutFolder = GetCurrentFolder() + L"/Outputs/BatchInference";
+		if (_waccess((OutFolder + L".wav").c_str(), 0) != -1)
+		{
+			for (size_t idx = 0; idx < 99999999; ++idx)
+				if (_waccess((OutFolder + L" (" + std::to_wstring(idx) + L").wav").c_str(), 0) == -1)
+				{
+					OutFolder += L" (" + std::to_wstring(idx) + L").wav";
+					break;
+				}
+		}
+		else
+			OutFolder += L".wav";
+		AudioFolders.emplace_back(OutFolder);
+		InferTools::Wav::WritePCMData(_samplingRate, 1, i, OutFolder);
+	}
+	return AudioFolders;
+}
+
+std::vector<std::vector<int16_t>> TextToSpeech::Inference(const std::vector<MoeVSProjectSpace::MoeVSTTSSeq>& _Input) const
+{
+	MoeVSNotImplementedError;
+}
+
+std::vector<size_t> TextToSpeech::GetAligments(size_t DstLen, size_t SrcLen)
+{
+	std::vector<size_t> mel2ph(DstLen + 1, 0);
+
+	size_t startFrame = 0;
+	const double ph_durs = static_cast<double>(DstLen) / static_cast<double>(SrcLen);
+	for (size_t iph = 0; iph < SrcLen; ++iph)
+	{
+		const auto endFrame = static_cast<size_t>(round(static_cast<double>(iph) * ph_durs + ph_durs));
+		for (auto j = startFrame; j < endFrame + 1; ++j)
+			mel2ph[j] = static_cast<long long>(iph) + 1;
+		startFrame = endFrame + 1;
+	}
+	return mel2ph;
+}
+
+std::wstring TextToSpeech::TextNormalize(const std::wstring& _Input, int64_t LanguageId) const
+{
+	auto Iterator = LanguageMap.begin();
+	while(Iterator != LanguageMap.end())
+	{
+		if (Iterator->second == LanguageId)
+			break;
+		++Iterator;
+	}
+
+	if (Iterator != LanguageMap.end())
+		return MoeVSG2P::NormalizeText(_Input, Iterator->first);
+	return _Input;
+}
+
+MoeVoiceStudioCoreEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/Vits.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/Vits.cpp
new file mode 100644
index 0000000..730455f
--- /dev/null
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/Vits.cpp	
@@ -0,0 +1,699 @@
+#include "../header/Vits.hpp"
+#include <random>
+
+MoeVoiceStudioCoreHeader
+
+Vits::~Vits()
+{
+	logger.log(L"[Info] unloading Vits Models");
+	destory();
+	logger.log(L"[Info] Vits Models unloaded");
+}
+
+Vits::Vits(const MJson& _Config, const ProgressCallback& _ProgressCallback,
+	const DurationCallback& _DurationCallback,
+	ExecutionProviders ExecutionProvider_,
+	unsigned DeviceID_, unsigned ThreadCount_) :
+	TextToSpeech(ExecutionProvider_, DeviceID_, ThreadCount_)
+{
+	//Check Folder
+	if (_Config["Folder"].IsNull())
+		throw std::exception("[Error] Missing field \"folder\" (Model Folder)");
+	if (!_Config["Folder"].IsString())
+		throw std::exception("[Error] Field \"folder\" (Model Folder) Must Be String");
+	const auto _folder = to_wide_string(_Config["Folder"].GetString());
+	if (_folder.empty())
+		throw std::exception("[Error] Field \"folder\" (Model Folder) Can Not Be Empty");
+	const std::wstring _path = GetCurrentFolder() + L"\\Models\\" + _folder + L"\\" + _folder;
+
+	std::map<std::string, std::wstring> _PathDict;
+
+	if(_Config.HasMember("EmotionalPath") && _Config["EmotionalPath"].IsString())
+	{
+		const auto emoStringload = to_wide_string(_Config["EmotionalPath"].GetString());
+		if(!emoStringload.empty())
+		{
+			_PathDict["EmotionalPath"] = GetCurrentFolder() + L"\\emotion\\" + emoStringload + L".npy";
+			_PathDict["EmotionalDictPath"] = GetCurrentFolder() + L"\\emotion\\" + emoStringload + L".json";
+		}
+	}
+
+	_PathDict["Decoder"] = _path + L"_dec.onnx";
+	_PathDict["StochasticDurationPredictor"] = _path + L"_sdp.onnx";
+	_PathDict["DurationPredictor"] = _path + L"_dp.onnx";
+	_PathDict["Encoder"] = _path + L"_enc_p.onnx";
+	_PathDict["FlowNet"] = _path + L"_flow.onnx";
+	_PathDict["Embidding"] = _path + L"_emb.onnx";
+
+	if (_Config.HasMember("Dict") && _Config["Dict"].IsString() && !_Config["Dict"].Empty())
+		_PathDict["Dict"] = GetCurrentFolder() + L"/Dict/" + to_wide_string(_Config["Dict"].GetString()) + L".json";
+
+	std::vector<std::wstring> _BertPaths;
+	if (_Config.HasMember("BertPath") && _Config["BertPath"].IsArray() && !_Config["BertPath"].Empty())
+	{
+		for(const auto& BPH : _Config["BertPath"].GetArray())
+		{
+			const auto BertPath = to_wide_string(BPH.GetString());
+			if(!BertPath.empty())
+				_BertPaths.emplace_back(GetCurrentFolder() + L"/Bert/" + BertPath);
+		}
+	}
+
+	load(_PathDict, _Config, _ProgressCallback, _DurationCallback, _BertPaths);
+}
+
+void Vits::load(const std::map<std::string, std::wstring>& _PathDict,
+	const MJson& _Config, const ProgressCallback& _ProgressCallback,
+	const DurationCallback& _DurationCallback, const std::vector<std::wstring>& _BertPaths)
+{
+	if (_Config["Type"].IsNull())
+		throw std::exception("[Error] Missing field \"Type\" (ModelType)");
+	if (!_Config["Type"].IsString())
+		throw std::exception("[Error] Field \"Type\" (ModelType) Must Be String");
+	VitsType = _Config["Type"].GetString();
+	if (VitsType == "Pits")
+	{
+		UseTone = true;
+		UseLength = false;
+	}
+	if (VitsType == "BertVits")
+	{
+		UseLength = false;
+		UseTone = true;
+		UseBert = true;
+		UseLanguage = true;
+		EncoderG = true;
+	}
+
+	Cleaner = MoeVSG2P::GetDefCleaner();
+	if (_PathDict.find("Dict") != _PathDict.end())
+		if (_waccess(_PathDict.at("Dict").c_str(), 0) != -1)
+			Cleaner->loadDict(_PathDict.at("Dict"));
+
+	if (_Config.HasMember("LanguageMap") && !_Config["LanguageMap"].IsNull())
+		for (const auto& CMember : _Config["LanguageMap"].GetMemberArray())
+			LanguageMap[CMember.first] = CMember.second.GetInt();
+	else
+		logger.log("[Warn] Field \"LanguageMap\" Is Missing, Use Default Value");
+
+	if (UseLength)
+		EncoderInputNames.emplace_back("x_lengths");
+	if (UseTone)
+		EncoderInputNames.emplace_back("t");
+	if(Emotion)
+		EncoderInputNames.emplace_back("emotion");
+	if (UseLanguage)
+		EncoderInputNames.emplace_back("language");
+
+	//Check SamplingRate
+	if (_Config["Rate"].IsNull())
+		throw std::exception("[Error] Missing field \"Rate\" (SamplingRate)");
+	if (_Config["Rate"].IsInt() || _Config["Rate"].IsInt64())
+		_samplingRate = _Config["Rate"].GetInt();
+	else
+		throw std::exception("[Error] Field \"Rate\" (SamplingRate) Must Be Int/Int64");
+
+	logger.log(L"[Info] Current Sampling Rate is" + std::to_wstring(_samplingRate));
+
+	//Check Symbol
+	if (!_Config.HasMember("Symbol") || _Config["Symbol"].IsNull())
+		throw std::exception("[Error] Missing field \"Symbol\" (PhSymbol)");
+	if (_Config.HasMember("AddBlank") && !_Config["AddBlank"].IsNull())
+		AddBlank = _Config["AddBlank"].GetBool();
+	else
+		logger.log(L"[Warn] Field \"AddBlank\" Is Missing, Use Default Value");
+
+	//Load Symbol
+	int64_t iter = 0;
+	if (_Config["Symbol"].IsArray())
+	{
+		logger.log(L"[Info] Use Phs");
+		if (_Config["Symbol"].Empty())
+			throw std::exception("[Error] Field \"Symbol\" (PhSymbol) Can Not Be Empty");
+		const auto SymbolArr = _Config["Symbol"].GetArray();
+		if (!SymbolArr[0].IsString())
+			throw std::exception("[Error] Field \"Symbol\" (PhSymbol) Must Be Array<String> or String");
+		for (const auto& it : SymbolArr)
+			Symbols.insert({ to_wide_string(it.GetString()), iter++ });
+	}
+	else
+	{
+		if (!_Config["Symbol"].IsString())
+			throw std::exception("[Error] Field \"Symbol\" (PhSymbol) Must Be Array<String> or String");
+		logger.log(L"[Info] Use Symbols");
+		const std::wstring SymbolsStr = to_wide_string(_Config["Symbol"].GetString());
+		if (SymbolsStr.empty())
+			throw std::exception("[Error] Field \"Symbol\" (PhSymbol) Can Not Be Empty");
+		for (size_t i = 0; i < SymbolsStr.length(); ++i)
+			Symbols.insert({ SymbolsStr.substr(i,1) , iter++ });
+	}
+
+	try
+	{
+		if (_PathDict.find("EmotionalPath") != _PathDict.end())
+		{
+			const auto EmotionPath = _PathDict.at("EmotionalPath");
+			if (!EmotionPath.empty())
+			{
+				logger.log(L"[Info] Loading EmotionVector");
+				EmoLoader.open(EmotionPath);
+				logger.log(L"[Info] EmotionVector Loaded");
+				Emotion = true;
+			}
+		}
+		if (_PathDict.find("EmotionalDictPath") != _PathDict.end())
+		{
+			const auto EmotionPath = _PathDict.at("EmotionalDictPath");
+			if (!EmotionPath.empty())
+				EmoJson = { to_byte_string(EmotionPath).c_str() };
+		}
+	}
+	catch (std::exception& e)
+	{
+		logger.log((std::string("[Warn] EmotionPath Error ") + e.what()).c_str());
+	}
+
+	if (_Config.HasMember("Characters") && _Config["Characters"].IsArray())
+		SpeakerCount = (int64_t)_Config["Characters"].Size();
+
+	if(UseLanguage)
+	{
+		if (_Config["LanguageMap"].IsNull() || !_Config.HasMember("LanguageMap"))
+			throw std::exception("[Error] Missing field \"LanguageMap\" (LanguageMap)");
+		for(const auto& Item : _Config["LanguageMap"].GetMemberArray())
+		{
+			if (!Item.second.IsArray())
+				continue;
+			const auto LangArr = Item.second.GetArray();
+			if (LangArr.size() != 2)
+				continue;
+			LanguageMap[Item.first] = LangArr[0].GetInt();
+			LanguageTones[Item.first] = LangArr[1].GetInt();
+		}
+	}
+
+	if (UseBert)
+	{
+		if (LanguageMap.size() != _BertPaths.size())
+			EncoderInputNames.emplace_back("bert");
+		else
+		{
+			BertNames.reserve(_BertPaths.size() * 2);
+			for (size_t i = 0; i < _BertPaths.size(); ++i)
+				BertNames.emplace_back("bert_" + std::to_string(i));
+			for(const auto& NameInp : BertNames)
+				EncoderInputNames.emplace_back(NameInp.data());
+		}
+		for(const auto& Path : _BertPaths)
+		{
+			if (_waccess(Path.c_str(), 0) != -1)
+			{
+				Ort::Session* SessionBert = nullptr;
+				try
+				{
+					SessionBert = new Ort::Session(*env, (Path + L"/model.onnx").c_str(), *session_options);
+				}
+				catch(Ort::Exception& e)
+				{
+					logger.log(L"[Warn] " + to_wide_string(e.what()));
+					delete SessionBert;
+					SessionBert = nullptr;
+				}
+				sessionBert.emplace_back(SessionBert);
+				if (_waccess((Path + L"/Tokenizer.json").c_str(), 0) != -1)
+				{
+					Tokenizers.emplace_back(Path + L"/Tokenizer.json");
+					Tokenizers.back().BondCleaner(Cleaner);
+				}
+				else if (SessionBert)
+					throw std::exception("Bert Must Have a Tokenizer");
+			}
+		}
+	}
+
+	_callback = _ProgressCallback;
+	CustomDurationCallback = _DurationCallback;
+
+	//LoadModels
+	try
+	{
+		logger.log(L"[Info] loading Vits Models");
+		sessionDec = new Ort::Session(*env, _PathDict.at("Decoder").c_str(), *session_options);
+		sessionEnc_p = new Ort::Session(*env, _PathDict.at("Encoder").c_str(), *session_options);
+		sessionFlow = new Ort::Session(*env, _PathDict.at("FlowNet").c_str(), *session_options);
+
+		if (_waccess(_PathDict.at("Embidding").c_str(), 0) != -1)
+			sessionEmb = new Ort::Session(*env, _PathDict.at("Embidding").c_str(), *session_options);
+		else
+			sessionEmb = nullptr;
+
+		if (_waccess(_PathDict.at("DurationPredictor").c_str(), 0) != -1)
+			sessionDp = new Ort::Session(*env, _PathDict.at("DurationPredictor").c_str(), *session_options);
+		else
+			sessionDp = nullptr;
+
+		if (_waccess(_PathDict.at("StochasticDurationPredictor").c_str(), 0) != -1)
+			sessionSdp = new Ort::Session(*env, _PathDict.at("StochasticDurationPredictor").c_str(), *session_options);
+		else
+			sessionSdp = nullptr;
+
+		if (!sessionDp && !sessionSdp)
+		{
+			destory();
+			throw std::exception("You must have a duration predictor");
+		}
+
+		logger.log(L"[Info] Vits Models loaded");
+	}
+	catch (Ort::Exception& _exception)
+	{
+		destory();
+		throw std::exception(_exception.what());
+	}
+
+	if (sessionEmb)
+	{
+		if(EncoderG) EncoderInputNames.emplace_back("g");
+		SdpInputNames.emplace_back("g");
+		DpInputNames.emplace_back("g");
+		FlowInputNames.emplace_back("g");
+		DecInputNames.emplace_back("g");
+	}
+}
+
+Vits::Vits(const std::map<std::string, std::wstring>& _PathDict, 
+	const MJson& _Config, const ProgressCallback& _ProgressCallback,
+	const DurationCallback& _DurationCallback, const std::vector<std::wstring>& _BertPaths,
+	ExecutionProviders ExecutionProvider_,
+	unsigned DeviceID_, unsigned ThreadCount_) :
+	TextToSpeech(ExecutionProvider_, DeviceID_, ThreadCount_)
+{
+	load(_PathDict, _Config, _ProgressCallback, _DurationCallback, _BertPaths);
+}
+
+std::vector<std::vector<int16_t>> Vits::Inference(const std::vector<MoeVSProjectSpace::MoeVSTTSSeq>& _Input) const
+{
+	std::vector<std::vector<int16_t>> PCM;
+	PCM.reserve(_Input.size());
+	std::vector<std::vector<int16_t>> _Audio(1);
+	logger.log("[Inference] Vits Inference Begin");
+	size_t proc = 0;
+	_callback(proc, _Input.size());
+	for(const auto& Seq : _Input)
+	{
+		_callback(proc++, _Input.size());
+		if(Seq.Seq.empty())
+			continue;
+
+		if (!_Audio[0].empty())
+		{
+			if (Seq.RestTime < 0.f)
+			{
+				_Audio[0].insert(_Audio[0].end(), size_t(_samplingRate), 0);
+				PCM.emplace_back(std::move(_Audio[0]));
+				_Audio[0] = std::vector<int16_t>();
+			}
+			else
+				_Audio[0].insert(_Audio[0].end(), size_t(Seq.RestTime * float(_samplingRate)), 0);
+		}
+
+		std::mt19937 gen(static_cast<unsigned int>(Seq.Seed));
+		std::normal_distribution FloatRandFn(0.f, 1.f);
+		std::uniform_int_distribution IntRandFn(0, RAND_MAX);
+
+		std::vector<int64_t> TextSeq;
+		TextSeq.reserve(Seq.Seq.size() * 4 + 4);
+		for (const auto& it : Seq.Seq)
+		{
+			if (AddBlank)
+				TextSeq.push_back(0);
+			if (Symbols.find(it) != Symbols.end())
+				TextSeq.push_back(Symbols.at(it));
+			else
+				TextSeq.push_back(int64_t(size_t(IntRandFn(gen)) % Symbols.size()));
+		}
+		if (AddBlank)
+			TextSeq.push_back(0);
+		int64_t TextSeqLength[] = { (int64_t)TextSeq.size() };
+		std::vector<Ort::Value> EncoderOutputs;
+		std::vector<Ort::Value> EncoderInputs;
+		const int64_t TextSeqShape[2] = { 1,TextSeqLength[0] };
+		constexpr int64_t LengthShape[1] = { 1 };
+		EncoderInputs.push_back(Ort::Value::CreateTensor(
+			*memory_info, TextSeq.data(), TextSeqLength[0], TextSeqShape, 2));
+		if (UseLength)
+			EncoderInputs.push_back(Ort::Value::CreateTensor(
+				*memory_info, TextSeqLength, 1, LengthShape, 1));
+		std::vector<float> emoVec;
+		constexpr int64_t EmotionShape[1] = { 1024 };
+		if(Emotion)
+		{
+			emoVec = GetEmotionVector(Seq.EmotionPrompt);
+			EncoderInputs.push_back(Ort::Value::CreateTensor(
+				*memory_info, emoVec.data(), 1024, EmotionShape, 1));
+		}
+		std::vector ToneIn(TextSeq.size(), 0i64);
+		if(UseTone)
+		{
+			if (ToneIn.size() == Seq.Tones.size())
+				ToneIn = Seq.Tones;
+			else if (AddBlank && ToneIn.size() == Seq.Tones.size() * 2 + 1)
+				for (size_t i = 1; i < ToneIn.size(); i += 2)
+					ToneIn[i] = Seq.Tones[i / 2];
+			else if (ToneIn.size() * 2 + 1 == Seq.Tones.size())
+				for (size_t i = 1; i < Seq.Tones.size(); i += 2)
+					ToneIn[i / 2] = Seq.Tones[i];
+			EncoderInputs.push_back(Ort::Value::CreateTensor(
+				*memory_info, ToneIn.data(), TextSeqLength[0], TextSeqShape, 2));
+		}
+		std::vector LanguageIn(TextSeq.size(), Seq.TotLang);
+		if(UseLanguage)
+		{
+			if (LanguageIn.size() == Seq.Tones.size())
+				LanguageIn = Seq.Tones;
+			else if (AddBlank && LanguageIn.size() == Seq.Tones.size() * 2 + 1)
+				for (size_t i = 1; i < LanguageIn.size(); i += 2)
+					LanguageIn[i] = Seq.Tones[i / 2];
+			else if (LanguageIn.size() * 2 + 1 == Seq.Tones.size())
+				for (size_t i = 1; i < Seq.Tones.size(); i += 2)
+					LanguageIn[i / 2] = Seq.Tones[i];
+			EncoderInputs.push_back(Ort::Value::CreateTensor(
+				*memory_info, LanguageIn.data(), TextSeqLength[0], TextSeqShape, 2));
+		}
+		std::vector BertVecs(sessionBert.size(), std::vector(1024 * TextSeqLength[0], 0.f));
+		int64_t BertShape[2] = { TextSeqLength[0],1024 };
+		if(UseBert)
+		{
+			for (size_t IndexOfBert = 0; IndexOfBert < sessionBert.size(); ++IndexOfBert)
+			{
+				auto& BertData = BertVecs[IndexOfBert];
+				if (sessionBert[IndexOfBert] && (IndexOfBert == size_t(Seq.TotLang) ||
+					(IndexOfBert != size_t(Seq.TotLang) && sessionBert.size() == 1)))
+				{
+					auto input_ids = Tokenizers[IndexOfBert](TextNormalize(Seq.SeqStr, Seq.TotLang));
+					std::vector<int64_t> attention_mask(input_ids.size(), 1), token_type_ids(input_ids.size(), 0);
+					int64_t AttentionShape[2] = { 1, (int64_t)input_ids.size() };
+					std::vector<Ort::Value> AttentionInput, AttentionOutput;
+					AttentionInput.emplace_back(Ort::Value::CreateTensor(
+						*memory_info, input_ids.data(), input_ids.size(), AttentionShape, 2));
+					AttentionInput.emplace_back(Ort::Value::CreateTensor(
+						*memory_info, attention_mask.data(), attention_mask.size(), AttentionShape, 2));
+					AttentionInput.emplace_back(Ort::Value::CreateTensor(
+						*memory_info, token_type_ids.data(), token_type_ids.size(), AttentionShape, 2));
+					try
+					{
+						AttentionOutput = sessionBert[IndexOfBert]->Run(Ort::RunOptions{ nullptr },
+							BertInputNames.data(),
+							AttentionInput.data(),
+							3,
+							BertOutputNames.data(),
+							1);
+					}
+					catch (Ort::Exception& e)
+					{
+						throw std::exception((std::string("Locate: Bert\n") + e.what()).c_str());
+					}
+					const auto AligmentMartix = GetAligments(BertShape[0], AttentionOutput[0].GetTensorTypeAndShapeInfo().GetShape()[0]);
+					const auto AttnData = AttentionOutput[0].GetTensorData<float>();
+					for (int64_t IndexOfSrcVector = 0; IndexOfSrcVector < TextSeqLength[0]; ++IndexOfSrcVector)
+						memcpy(BertData.data() + IndexOfSrcVector * 1024, AttnData + AligmentMartix[IndexOfSrcVector] * 1024, 1024 * sizeof(float));
+				}
+				EncoderInputs.emplace_back(Ort::Value::CreateTensor(
+					*memory_info, BertData.data(), BertData.size(), BertShape, 2));
+			}
+		}
+
+		std::vector<float> GEmbidding;
+		std::vector<int64_t> GOutShape;
+		if (sessionEmb)
+		{
+			auto SpeakerMixData = Seq.SpeakerMix;
+			if (!SpeakerMixData.empty() && SpeakerCount > 1)
+			{
+				LinearCombination(SpeakerMixData);
+				int64_t csid = 0;
+				for (const auto& CharaP : SpeakerMixData)
+				{
+					std::vector<Ort::Value> EmbiddingInput;
+					std::vector<Ort::Value> EmbiddingOutput;
+					if (csid >= SpeakerCount)
+						break;
+					if (CharaP < 0.0001f)
+					{
+						++csid;
+						continue;
+					}
+					int64_t Character[1] = { csid };
+					EmbiddingInput.push_back(Ort::Value::CreateTensor(
+						*memory_info, Character, 1, LengthShape, 1));
+					try
+					{
+						EmbiddingOutput = sessionEmb->Run(Ort::RunOptions{ nullptr },
+							EmbiddingInputNames.data(),
+							EmbiddingInput.data(),
+							EmbiddingInput.size(),
+							EmbiddingOutputNames.data(),
+							EmbiddingOutputNames.size());
+					}
+					catch (Ort::Exception& e)
+					{
+						throw std::exception((std::string("Locate: emb\n") + e.what()).c_str());
+					}
+					const auto GOutCount = EmbiddingOutput[0].GetTensorTypeAndShapeInfo().GetElementCount();
+					if (GOutShape.empty())
+					{
+						GEmbidding = std::vector(EmbiddingOutput[0].GetTensorData<float>(), EmbiddingOutput[0].GetTensorData<float>() + GOutCount);
+						GOutShape = EmbiddingOutput[0].GetTensorTypeAndShapeInfo().GetShape();
+						GOutShape.emplace_back(1);
+						for (auto idx : GEmbidding)
+							idx *= float(CharaP);
+					}
+					else
+						for (size_t i = 0; i < GOutCount; ++i)
+							GEmbidding[i] += EmbiddingOutput[0].GetTensorData<float>()[i] * float(CharaP);
+					++csid;
+				}
+			}
+			else
+			{
+				std::vector<Ort::Value> EmbiddingInput;
+				std::vector<Ort::Value> EmbiddingOutput;
+				int64_t Character[1] = { Seq.SpeakerId };
+				EmbiddingInput.push_back(Ort::Value::CreateTensor(
+					*memory_info, Character, 1, LengthShape, 1));
+				try
+				{
+					EmbiddingOutput = sessionEmb->Run(Ort::RunOptions{ nullptr },
+						EmbiddingInputNames.data(),
+						EmbiddingInput.data(),
+						EmbiddingInput.size(),
+						EmbiddingOutputNames.data(),
+						EmbiddingOutputNames.size());
+				}
+				catch (Ort::Exception& e)
+				{
+					throw std::exception((std::string("Locate: emb\n") + e.what()).c_str());
+				}
+				const auto GOutCount = EmbiddingOutput[0].GetTensorTypeAndShapeInfo().GetElementCount();
+				GEmbidding = std::vector(EmbiddingOutput[0].GetTensorData<float>(), EmbiddingOutput[0].GetTensorData<float>() + GOutCount);
+				GOutShape = EmbiddingOutput[0].GetTensorTypeAndShapeInfo().GetShape();
+				GOutShape.emplace_back(1);
+			}
+			if (EncoderG)
+				EncoderInputs.push_back(Ort::Value::CreateTensor(*memory_info, GEmbidding.data(), GEmbidding.size(), GOutShape.data(), 3));
+		}
+
+		try
+		{
+			EncoderOutputs = sessionEnc_p->Run(Ort::RunOptions{ nullptr },
+				EncoderInputNames.data(),
+				EncoderInputs.data(),
+				EncoderInputs.size(),
+				EncoderOutputNames.data(),
+				EncoderOutputNames.size());
+		}
+		catch (Ort::Exception& e)
+		{
+			throw std::exception((std::string("Locate: enc_p\n") + e.what()).c_str());
+		}
+
+		std::vector<float>
+			m_p(EncoderOutputs[1].GetTensorData<float>(), EncoderOutputs[1].GetTensorData<float>() + EncoderOutputs[1].GetTensorTypeAndShapeInfo().GetElementCount()),
+			logs_p(EncoderOutputs[2].GetTensorData<float>(), EncoderOutputs[2].GetTensorData<float>() + EncoderOutputs[2].GetTensorTypeAndShapeInfo().GetElementCount()),
+			x_mask(EncoderOutputs[3].GetTensorData<float>(), EncoderOutputs[3].GetTensorData<float>() + EncoderOutputs[3].GetTensorTypeAndShapeInfo().GetElementCount());
+
+		const auto xshape = EncoderOutputs[0].GetTensorTypeAndShapeInfo().GetShape();
+
+		std::vector w_ceil(TextSeqLength[0], 1.f);
+		bool enable_dp = false;
+		if (Seq.Durations.size() == w_ceil.size() || Seq.Durations.size() == w_ceil.size() / 2)
+			enable_dp = true;
+		
+		const int64_t zinputShape[3] = { xshape[0],2,xshape[2] };
+		const int64_t zinputCount = xshape[0] * xshape[2] * 2;
+		std::vector<float> zinput(zinputCount, 0.0);
+		for (auto& it : zinput)
+			it = FloatRandFn(gen) * Seq.DurationPredictorNoiseScale;
+		std::vector<Ort::Value> DurationPredictorInput;
+		DurationPredictorInput.push_back(std::move(EncoderOutputs[0]));
+		DurationPredictorInput.push_back(std::move(EncoderOutputs[3]));
+		DurationPredictorInput.push_back(Ort::Value::CreateTensor(
+			*memory_info, zinput.data(), zinputCount, zinputShape, 3));
+		if (sessionEmb)
+			DurationPredictorInput.push_back(Ort::Value::CreateTensor(*memory_info, GEmbidding.data(), GEmbidding.size(), GOutShape.data(), 3));
+		if(sessionSdp)
+		{
+			std::vector<Ort::Value> StochasticDurationPredictorOutput;
+			try
+			{
+				StochasticDurationPredictorOutput = sessionSdp->Run(Ort::RunOptions{ nullptr },
+					SdpInputNames.data(),
+					DurationPredictorInput.data(),
+					DurationPredictorInput.size(),
+					SdpOutputNames.data(),
+					SdpOutputNames.size());
+			}
+			catch (Ort::Exception& e)
+			{
+				throw std::exception((std::string("Locate: dp\n") + e.what()).c_str());
+			}
+			const auto w_data = StochasticDurationPredictorOutput[0].GetTensorMutableData<float>();
+			const auto w_data_length = StochasticDurationPredictorOutput[0].GetTensorTypeAndShapeInfo().GetElementCount();
+			if (w_data_length != w_ceil.size())
+				w_ceil.resize(w_data_length, 0.f);
+			float SdpFactor = 1.f - Seq.FactorDpSdp;
+			if (sessionDp)
+				for (size_t i = 0; i < w_ceil.size(); ++i)
+					w_ceil[i] = ceil(exp(w_data[i] * SdpFactor) * x_mask[i] * Seq.LengthScale);
+			else
+				for (size_t i = 0; i < w_ceil.size(); ++i)
+					w_ceil[i] = ceil(exp(w_data[i]) * x_mask[i] * Seq.LengthScale);
+		}
+		if (sessionDp)
+		{
+			std::vector<Ort::Value> DurationPredictorOutput;
+			DurationPredictorInput.erase(DurationPredictorInput.begin() + 2);
+			try
+			{
+				DurationPredictorOutput = sessionDp->Run(Ort::RunOptions{ nullptr },
+					DpInputNames.data(),
+					DurationPredictorInput.data(),
+					DurationPredictorInput.size(),
+					DpOutputNames.data(),
+					DpOutputNames.size());
+			}
+			catch (Ort::Exception& e)
+			{
+				throw std::exception((std::string("Locate: dp\n") + e.what()).c_str());
+			}
+			const auto w_data = DurationPredictorOutput[0].GetTensorMutableData<float>();
+			const auto w_data_length = DurationPredictorOutput[0].GetTensorTypeAndShapeInfo().GetElementCount();
+			if (w_data_length != w_ceil.size())
+				w_ceil.resize(w_data_length, 0.f);
+			if (sessionSdp)
+				for (size_t i = 0; i < w_ceil.size(); ++i)
+					w_ceil[i] += ceil(exp(w_data[i] * Seq.FactorDpSdp) * x_mask[i] * Seq.LengthScale);
+			else
+				for (size_t i = 0; i < w_ceil.size(); ++i)
+					w_ceil[i] = ceil(exp(w_data[i]) * x_mask[i] * Seq.LengthScale);
+		}
+		if(enable_dp)
+		{
+			if (Seq.Durations.size() == TextSeq.size())
+				for (size_t i = 0; i < w_ceil.size(); ++i)
+					w_ceil[i] = float(Seq.Durations[i]);
+			else if (AddBlank && Seq.Durations.size() == TextSeq.size() / 2ull)
+				for (size_t i = 0; i < Seq.Durations.size(); ++i)
+					w_ceil[1 + i * 2] = float(Seq.Durations[i]);
+		}
+		CustomDurationCallback(w_ceil);
+		const auto maskSize = x_mask.size();
+		float y_length_f = 0.0;
+		int64_t y_length;
+		for (size_t i = 0; i < w_ceil.size(); ++i)
+			y_length_f += w_ceil[i];
+		if (y_length_f < 1.0f)
+			y_length = 1;
+		else
+			y_length = (int64_t)y_length_f;
+
+		auto attn = generatePath(w_ceil.data(), y_length, maskSize);
+		std::vector logVec(192, std::vector(y_length, 0.0f));
+		std::vector mpVec(192, std::vector(y_length, 0.0f));
+		std::vector<float> nlogs_pData(192 * y_length);
+		for (size_t i = 0; i < static_cast<size_t>(y_length); ++i)
+		{
+			for (size_t j = 0; j < 192; ++j)
+			{
+				for (size_t k = 0; k < maskSize; k++)
+				{
+					if (attn[i][k])
+					{
+						mpVec[j][i] += m_p[j * maskSize + k];
+						logVec[j][i] += logs_p[j * maskSize + k];
+					}
+				}
+				nlogs_pData[j * y_length + i] = mpVec[j][i] + FloatRandFn(gen) * exp(logVec[j][i]) * Seq.NoiseScale;
+			}
+		}
+		std::vector y_mask(y_length, 1.0f);
+		const int64_t zshape[3] = { 1,192,y_length };
+		const int64_t yshape[3] = { 1,1,y_length };
+
+		std::vector<Ort::Value> FlowDecInputs, FlowDecOutputs;
+
+		FlowDecInputs.push_back(Ort::Value::CreateTensor<float>(
+			*memory_info, nlogs_pData.data(), 192 * y_length, zshape, 3));
+		FlowDecInputs.push_back(Ort::Value::CreateTensor<float>(
+			*memory_info, y_mask.data(), y_length, yshape, 3));
+		if (sessionEmb)
+			FlowDecInputs.push_back(Ort::Value::CreateTensor<float>(
+				*memory_info, GEmbidding.data(), GEmbidding.size(), GOutShape.data(), 3));
+
+		try
+		{
+			FlowDecOutputs = sessionFlow->Run(Ort::RunOptions{ nullptr },
+				FlowInputNames.data(),
+				FlowDecInputs.data(),
+				FlowDecInputs.size(),
+				FlowOutputNames.data(),
+				FlowOutputNames.size());
+		}
+		catch (Ort::Exception& e)
+		{
+			throw std::exception((std::string("Locate: dec & flow\n") + e.what()).c_str());
+		}
+		FlowDecInputs[0] = std::move(FlowDecOutputs[0]);
+		if (sessionEmb)
+			FlowDecInputs[1] = std::move(FlowDecInputs[2]);
+		FlowDecInputs.pop_back();
+		try
+		{
+			
+			FlowDecOutputs = sessionDec->Run(Ort::RunOptions{ nullptr },
+				DecInputNames.data(),
+				FlowDecInputs.data(),
+				FlowDecInputs.size(),
+				DecOutputNames.data(),
+				DecOutputNames.size());
+		}
+		catch (Ort::Exception& e)
+		{
+			throw std::exception((std::string("Locate: dec & flow\n") + e.what()).c_str());
+		}
+		const auto shapeOut = FlowDecOutputs[0].GetTensorTypeAndShapeInfo().GetShape();
+		const auto outData = FlowDecOutputs[0].GetTensorData<float>();
+		for (int bbb = 0; bbb < shapeOut[2]; bbb++)
+			_Audio[0].emplace_back(static_cast<int16_t>(outData[bbb] * 32768.0f));
+	}
+	if (!_Audio[0].empty())
+	{
+		_Audio[0].insert(_Audio[0].end(), size_t(_samplingRate), 0);
+		PCM.emplace_back(std::move(_Audio[0]));
+	}
+	_callback(proc++, _Input.size());
+	logger.log("[Inference] Vits Inference Fin");
+	return PCM;
+}
+
+MoeVoiceStudioCoreEnd
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/VitsSvc.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/VitsSvc.cpp
index 18f22f6..658b0d9 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/VitsSvc.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Models/src/VitsSvc.cpp	
@@ -189,6 +189,148 @@ VitsSvc::VitsSvc(const MJson& _Config, const ProgressCallback& _ProgressCallback
 	}
 }
 
+VitsSvc::VitsSvc(const std::map<std::string, std::wstring>& _PathDict, const MJson& _Config, const ProgressCallback& _ProgressCallback,
+	ExecutionProviders ExecutionProvider_,
+	unsigned DeviceID_, unsigned ThreadCount_) :
+	SingingVoiceConversion(ExecutionProvider_, DeviceID_, ThreadCount_)
+{
+	MoeVSClassName(L"MoeVoiceStudioVitsSingingVoiceConversion");
+
+	//Check SamplingRate
+	if (_Config["Rate"].IsNull())
+		throw std::exception("[Error] Missing field \"Rate\" (SamplingRate)");
+	if (_Config["Rate"].IsInt() || _Config["Rate"].IsInt64())
+		_samplingRate = _Config["Rate"].GetInt();
+	else
+		throw std::exception("[Error] Field \"Rate\" (SamplingRate) Must Be Int/Int64");
+
+	logger.log(L"[Info] Current Sampling Rate is" + std::to_wstring(_samplingRate));
+
+	if (!_Config["SoVits3"].IsNull() && _Config["SoVits3"].GetBool())
+		VitsSvcVersion = L"SoVits3.0";
+	else if (!_Config["SoVits2"].IsNull() && _Config["SoVits2"].GetBool())
+		VitsSvcVersion = L"SoVits2.0";
+	else if (!_Config["SoVits2.0"].IsNull() && _Config["SoVits2.0"].GetBool())
+		VitsSvcVersion = L"SoVits2.0";
+	else if (!_Config["SoVits3.0"].IsNull() && _Config["SoVits3.0"].GetBool())
+		VitsSvcVersion = L"SoVits3.0";
+	else if (_Config["Type"].GetString() == std::string("RVC"))
+		VitsSvcVersion = L"RVC";
+	if (!_Config["SoVits4.0V2"].IsNull() && _Config["SoVits4.0V2"].GetBool())
+		VitsSvcVersion = L"SoVits4.0-DDSP";
+
+#ifdef MOEVSDMLPROVIDER
+	if (ExecutionProvider_ == ExecutionProviders::DML && VitsSvcVersion == L"SoVits4.0-DDSP")
+		throw std::exception("[Error] DirectXMl Not Support SoVits4.0V2, Please Use Cuda Or Cpu");
+#endif
+
+	if (!(_Config["Hop"].IsInt() || _Config["Hop"].IsInt64()))
+		throw std::exception("[Error] Hop Must Exist And Must Be Int");
+	HopSize = _Config["Hop"].GetInt();
+
+	if (!(_Config["HiddenSize"].IsInt() || _Config["HiddenSize"].IsInt64()))
+		logger.log(L"[Warn] Missing Field \"HiddenSize\", Use Default Value (256)");
+	else
+		HiddenUnitKDims = _Config["HiddenSize"].GetInt();
+
+	if (!_Config["CharaMix"].IsBool())
+		logger.log(L"[Warn] Missing Field \"CharaMix\", Use Default Value (False)");
+	else
+		EnableCharaMix = _Config["CharaMix"].GetBool();
+
+	if (_Config["Cluster"].IsString())
+	{
+		const auto clus = to_wide_string(_Config["Cluster"].GetString());
+		if (!(_Config["KMeansLength"].IsInt() || _Config["KMeansLength"].IsInt64()))
+			logger.log(L"[Warn] Missing Field \"KMeansLength\", Use Default Value (10000)");
+		else
+			ClusterCenterSize = _Config["KMeansLength"].GetInt();
+		try
+		{
+			Cluster = MoeVoiceStudioCluster::GetMoeVSCluster(clus, _PathDict.at("Cluster"), HiddenUnitKDims, ClusterCenterSize);
+			EnableCluster = true;
+		}
+		catch (std::exception& e)
+		{
+			logger.error(e.what());
+			EnableCluster = false;
+		}
+	}
+
+	if (HopSize < 1)
+		throw std::exception("[Error] Hop Must > 0");
+
+	if (_Config["Volume"].IsBool())
+		EnableVolume = _Config["Volume"].GetBool();
+	else
+		logger.log(L"[Warn] Missing Field \"Volume\", Use Default Value (False)");
+
+	if (_Config["Characters"].IsArray())
+		SpeakerCount = int64_t(_Config["Characters"].Size());
+
+	_callback = _ProgressCallback;
+
+	//LoadModels
+	try
+	{
+		logger.log(L"[Info] loading VitsSvcModel Models");
+		hubert = new Ort::Session(*env, _PathDict.at("Hubert").c_str(), *session_options);
+		if (VitsSvcVersion == L"RVC")
+			VitsSvcModel = new Ort::Session(*env, _PathDict.at("RVC").c_str(), *session_options);
+		else
+			VitsSvcModel = new Ort::Session(*env, _PathDict.at("SoVits").c_str(), *session_options);
+		logger.log(L"[Info] VitsSvcModel Models loaded");
+	}
+	catch (Ort::Exception& _exception)
+	{
+		Destory();
+		throw std::exception(_exception.what());
+	}
+
+	if (VitsSvcModel->GetInputCount() == 4 && VitsSvcVersion != L"SoVits3.0")
+		VitsSvcVersion = L"SoVits2.0";
+
+	if (_Config["TensorExtractor"].IsString())
+		VitsSvcVersion = to_wide_string(_Config["TensorExtractor"].GetString());
+
+	if (_Config["ShallowDiffusion"].IsString())
+	{
+		const std::string ShallowDiffusionConf = to_byte_string(GetCurrentFolder()) + "/Models/" + _Config["ShallowDiffusion"].GetString() + ".json";
+		try
+		{
+			shallow_diffusion = new DiffusionSvc(
+				_PathDict,
+				to_byte_string(_PathDict.at("ShallowDiffusionConfig")).c_str(),
+				[](size_t, size_t) {},
+				ExecutionProvider_,
+				DeviceID_,
+				ThreadCount_
+			);
+			stft_operator = new Ort::Session(*env, _PathDict.at("MelOperators").c_str(), *session_options);
+		}
+		catch (std::exception& e)
+		{
+			delete shallow_diffusion;
+			shallow_diffusion = nullptr;
+			delete stft_operator;
+			stft_operator = nullptr;
+			logger.error(e.what());
+		}
+	}
+
+	MoeVSTensorPreprocess::MoeVoiceStudioTensorExtractor::Others _others_param;
+	_others_param.Memory = *memory_info;
+	try
+	{
+		_TensorExtractor = GetTensorExtractor(VitsSvcVersion, 48000, _samplingRate, HopSize, EnableCharaMix, EnableVolume, HiddenUnitKDims, SpeakerCount, _others_param);
+	}
+	catch (std::exception& e)
+	{
+		Destory();
+		throw std::exception(e.what());
+	}
+}
+
 //已弃用（旧MoeSS的推理函数）
 #ifdef MOESSDFN
 std::vector<int16_t> VitsSvc::InferBatch() const
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Modules.cpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Modules.cpp
index 0302b93..085c499 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Modules.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Modules.cpp	
@@ -72,6 +72,13 @@ namespace MoeVSModuleManager
 #endif
 		MoeVSRegisterSampler(L"Pndm", PndmSampler);
 		MoeVSRegisterSampler(L"DDim", DDimSampler);
+		const auto BasicCleanerDir = GetCurrentFolder() + L"/G2P/BasicCleaner.dll";
+		if (_waccess(BasicCleanerDir.c_str(), 0) != -1)
+		{
+			const auto Cleaner = MoeVSG2P::GetDefCleaner();
+			Cleaner->loadG2p(BasicCleanerDir);
+			Cleaner->GetCleaner().LoadDict(GetCurrentFolder() + L"/G2P");
+		}
 	}
 
 	MoeVoiceStudioCore::SingingVoiceConversion* GetCurSvcModel()
diff --git a/MoeVoiceStudioSvc - Core - Cmd/Modules/Modules.hpp b/MoeVoiceStudioSvc - Core - Cmd/Modules/Modules.hpp
index 38ae42d..851c8a6 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/Modules/Modules.hpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/Modules/Modules.hpp	
@@ -22,6 +22,7 @@
 #pragma once
 #include "Models/header/VitsSvc.hpp"
 #include "Models/header/DiffSvc.hpp"
+#include "Models/header/Vits.hpp"
 
 namespace MoeVSModuleManager
 {
diff --git a/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.cpp b/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.cpp
index c8ca1fc..4fffee9 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.cpp	
+++ b/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.cpp	
@@ -1,4 +1,5 @@
-﻿#ifdef MOEVSONNX
+﻿#include <regex>
+#ifndef MOEVSONNX
 #include <deque>
 #include <mutex>
 #include <iostream>
@@ -39,391 +40,34 @@ std::vector<T>& operator-=(std::vector<T>& left, const std::vector<T>& right)
 }
 #endif
 
-namespace RtInferenceSpace
+int main()
 {
-	class MRecorder
-	{
-	public:
-		MRecorder() = default;
-		~MRecorder()
-		{
-			if (!hWaveIn)
-				return;
-			Stop();
-			waveInClose(hWaveIn);
-		}
-		void initRecorder(DWORD SamplingRate = 44100)
-		{
-			waveform.nSamplesPerSec = SamplingRate;
-			waveform.wBitsPerSample = 16;
-			waveform.nChannels = 1;
-			waveform.cbSize = 0;
-			waveform.wFormatTag = WAVE_FORMAT_PCM;
-			waveform.nBlockAlign = (waveform.wBitsPerSample * waveform.nChannels) / 8;
-			waveform.nAvgBytesPerSec = waveform.nBlockAlign * waveform.nSamplesPerSec;
-			SamplingRateSrc = SamplingRate;
-			WaitEvent = CreateEvent(nullptr, 0, 0, nullptr);
-			waveInOpen(&hWaveIn, WAVE_MAPPER, &waveform, (DWORD_PTR)WaitEvent, 0L, CALLBACK_EVENT);
-		}
-
-		void setStreamBufferSize(double time)
-		{
-			Stop();
-			StreamSize = size_t(time * SamplingRateSrc);
-			timems = DWORD(time * 1000);
-			timems -= 50;
-			if (timems < 50) timems = 50;
-			pcmVector = std::vector<int16_t>(StreamSize * 2);
-			whdri.lpData = (LPSTR)pcmVector.data();
-			whdri.dwBufferLength = DWORD(StreamSize * 2);
-			whdri.dwBytesRecorded = 0;
-			whdri.dwUser = 0;
-			whdri.dwFlags = 0;
-			whdri.dwLoops = 1;
-		}
-
-		[[nodiscard]] size_t GetFrameSize() const
-		{
-			return StreamSize;
-		}
-
-		void Start()
-		{
-			if (isBegin)
-				return;
-			isBegin = true;
-			std::thread RecoderThread([&]()
-				{
-					while(isBegin)
-					{
-						whdri.lpData = (LPSTR)pcmVector.data();
-						whdri.dwBufferLength = DWORD(StreamSize * 2);
-						whdri.dwBytesRecorded = 0;
-						whdri.dwUser = 0;
-						whdri.dwFlags = 0;
-						whdri.dwLoops = 1;
-						waveInPrepareHeader(hWaveIn, &whdri, sizeof(WAVEHDR));
-						waveInAddBuffer(hWaveIn, &whdri, sizeof(WAVEHDR));
-						waveInStart(hWaveIn);
-						Sleep(timems);
-						const size_t nSamples = (size_t)whdri.dwBytesRecorded / 2;
-						waveInReset(hWaveIn);
-						std::lock_guard lock(mx);
-						if(pcmQueue.empty() || pcmQueue.back().size() == StreamSize)
-							pcmQueue.emplace_back(pcmVector.data(), pcmVector.data() + nSamples);
-						else
-						{
-							auto& BackData = pcmQueue.back();
-							if(BackData.size() + nSamples > StreamSize)
-							{
-								const auto RealSize = StreamSize - BackData.size();
-								BackData.insert(BackData.end(), pcmVector.data(), pcmVector.data() + RealSize);
-								pcmQueue.emplace_back(pcmVector.data() + RealSize, pcmVector.data() + nSamples);
-							}
-							else
-								BackData.insert(BackData.end(), pcmVector.data(), pcmVector.data() + nSamples);
-						}
-					}
-				});
-			RecoderThread.detach();
-		}
 
-		void Stop() const
-		{
-			if(isBegin)
-			{
-				waveInStop(hWaveIn);
-				waveInReset(hWaveIn);
-			}
-		}
+	MoeVSModuleManager::MoeVoiceStudioCoreInitSetup();
 
-		std::vector<int16_t> GetStreamData()
-		{
-			std::lock_guard lock(mx);
-			if (pcmQueue.empty() || pcmQueue[0].size() != StreamSize)
-				return {};
-			auto Stream = std::move(pcmQueue[0]);
-			pcmQueue.pop_front();
-			return Stream;
-		}
-	private:
-		DWORD SamplingRateSrc = 44100;
-		std::vector<int16_t> pcmVector;
-		std::deque<std::vector<int16_t>> pcmQueue;
-		size_t StreamSize = 0;
-		DWORD timems = 0;
-		HWAVEIN hWaveIn = nullptr;
-		WAVEFORMATEX waveform{ WAVE_FORMAT_PCM,1,44100,88200,2,16,0 };
-		WAVEHDR whdri{ nullptr,0,0,0,0,0,nullptr,0 };
-		HANDLE WaitEvent = nullptr;
-		bool isBegin = false;
-		std::mutex mx;
-	};
-	class MPCMPlayer
+	const MJson Config(to_byte_string(GetCurrentFolder() + L"/Models/HimenoSena.json").c_str());  //改为模型配置路径（相对exe）
+	const MoeVoiceStudioCore::MoeVoiceStudioModule::ProgressCallback ProCallback = [](size_t cur, size_t total)
 	{
-	public:
-		MPCMPlayer() = default;
-		~MPCMPlayer()
-		{
-			if (!hWaveOut)
-				return;
-			waveOutClose(hWaveOut);
-		}
-		void initPlayer(DWORD SamplingRate = 44100)
-		{
-			waveform.nSamplesPerSec = SamplingRate;
-			waveform.wBitsPerSample = 16;
-			waveform.nChannels = 1;
-			waveform.cbSize = 0;
-			waveform.wFormatTag = WAVE_FORMAT_PCM;
-			waveform.nBlockAlign = (waveform.wBitsPerSample * waveform.nChannels) / 8;
-			waveform.nAvgBytesPerSec = waveform.nBlockAlign * waveform.nSamplesPerSec;
-			WaitEvent = CreateEvent(nullptr, 0, 0, nullptr);
-			waveOutOpen(&hWaveOut, WAVE_MAPPER, &waveform, (DWORD_PTR)WaitEvent, 0L, CALLBACK_EVENT);
-			SAMP = SamplingRate;
-		}
-		void Play(std::vector<int16_t>& data)
-		{
-			whdri.lpData = (LPSTR)data.data();
-			whdri.dwBufferLength = DWORD(data.size() * 2);
-			whdri.dwFlags = 0L;
-			whdri.dwLoops = 1L;
-			waveOutPrepareHeader(hWaveOut, &whdri, sizeof(WAVEHDR));
-			waveOutWrite(hWaveOut, &whdri, sizeof(WAVEHDR));
-			Sleep(DWORD(data.size() * 1000 / size_t(SAMP)));
-		}
-	private:
-		HWAVEOUT hWaveOut = nullptr;
-		WAVEFORMATEX waveform{ WAVE_FORMAT_PCM,1,44100,88200,2,16,0 };
-		WAVEHDR whdri{ nullptr,0,0,0,0,0,nullptr,0 };
-		HANDLE WaitEvent = nullptr;
-		DWORD SAMP = 44100;
+		std::cout << (double(cur) / double(total) * 100.) << "%\n";
 	};
-
-	MoeVSProjectSpace::MoeVSSvcParams Params;
-	short Threshold = 400;
-	MRecorder RTRecorder;
-	MPCMPlayer RTPlayer;
-	std::deque<std::vector<int16_t>> InputBuffer, OutputBuffer, rawInputBuffer, rawOutputBuffer;
-	bool RTIsEnabled = false;
-	size_t crossfade_length = 0;
-	size_t extra_length = 0;
-
-	void EndRtInference()
-	{
-		RTRecorder.Stop();
-		RTIsEnabled = false;
-		InputBuffer.clear();
-		OutputBuffer.clear();
-		rawInputBuffer.clear();
-		rawOutputBuffer.clear();
-	}
-
-	void RTInference()
+	const MoeVoiceStudioCore::TextToSpeech::DurationCallback DurCallback = [](std::vector<float>&)
 	{
-		if (RTIsEnabled)
-		{
-			EndRtInference();
-			return;
-		}
-		std::wstring error;
-		RTIsEnabled = true;
-		crossfade_length = Params.CrossFadeLength;
-		extra_length = crossfade_length / 4;
-		std::thread RT_RECORD_THREAD = std::thread([&]()
-			{
-				logger.log(L"[RTInference] Recording Thread Start!");
-				while (RTIsEnabled)
-				{
-					auto PCM = RTRecorder.GetStreamData();
-					if(PCM.empty())
-						continue;
-					rawInputBuffer.emplace_back(std::move(PCM));
-
-					if (rawInputBuffer.size() > 2)
-					{
-						std::vector<int16_t> pBuffer;
-						pBuffer.reserve(rawInputBuffer[1].size() + 4 * crossfade_length);
-						pBuffer.insert(pBuffer.end(),
-							rawInputBuffer[0].end() - int64_t(crossfade_length + extra_length),
-							rawInputBuffer[0].end());
-						pBuffer.insert(pBuffer.end(), rawInputBuffer[1].begin(), rawInputBuffer[1].end());
-						pBuffer.insert(pBuffer.end(),
-							rawInputBuffer[2].begin(),
-							rawInputBuffer[2].begin() + int64_t(crossfade_length + extra_length) + 1000);
-						InputBuffer.emplace_back(std::move(pBuffer));
-						rawInputBuffer.pop_front();
-					}
-					if (rawInputBuffer.size() > 100)
-						rawInputBuffer.pop_front();
-				}
-				logger.log(L"[RTInference] Recording Thread End!");
-			});
-
-		std::thread RT_INFERENCE_THREAD = std::thread([&]()
-			{
-				logger.log(L"[RTInference] Inferencing Thread Start!");
-				while (RTIsEnabled)
-				{
-					if (!InputBuffer.empty())
-					{
-						try
-						{
-							if (MoeVSModuleManager::GetCurSvcModel())
-							{
-								bool zeroVector = true;
-								for (const auto& i16data : InputBuffer[0])
-								{
-									if (i16data > Threshold * 10)
-									{
-										zeroVector = false;
-										break;
-									}
-								}
-								if (zeroVector)
-									rawOutputBuffer.emplace_back(std::vector<int16_t>(InputBuffer[0].size(), 0));
-								else
-									rawOutputBuffer.emplace_back(MoeVSModuleManager::GetCurSvcModel()->InferPCMData(InputBuffer[0], (long)MoeVSModuleManager::SamplingRate, Params));
-							}
-							else
-								rawOutputBuffer.emplace_back(std::move(InputBuffer[0]));
-							InputBuffer.pop_front();
-						}
-						catch (std::exception& e)
-						{
-							logger.error(e.what());
-							EndRtInference();
-						}
-					}
-					if (InputBuffer.size() > 100)
-						InputBuffer.pop_front();
-				}
-				logger.log(L"[RTInference] Inferencing Thread End!");
-			});
-
-		std::thread RT_OUTPUT_THREAD = std::thread([&]()
-			{
-				logger.log(L"[RTInference] OutPut Thread Start!");
-				while (RTIsEnabled)
-				{
-					if (rawOutputBuffer.size() > 2)
-					{
-						std::vector pBuffer(
-							rawOutputBuffer[1].begin() + (int64_t)(crossfade_length + extra_length),
-							rawOutputBuffer[1].end()
-						);
-						pBuffer.resize(RTRecorder.GetFrameSize());
-
-						const auto dataBufr = pBuffer.size() - crossfade_length;
-						const auto crossBufl = crossfade_length + extra_length + RTRecorder.GetFrameSize();
-						const auto crossBufr = extra_length;
-
-						for (size_t i = 0; i < crossfade_length; ++i)
-						{
-							const auto crosf1 = (double(i) / double(crossfade_length));
-							const auto crosf2 = (1. - (double(i) / double(crossfade_length)));
-
-							pBuffer[i] = (int16_t)(
-								double(pBuffer[i]) * crosf1 +
-								(double)rawOutputBuffer[0][i + crossBufl] * crosf2
-								);
-
-							pBuffer[i + dataBufr] = (int16_t)(
-								double(pBuffer[i + dataBufr]) * crosf2 +
-								(double)rawOutputBuffer[2][i + crossBufr] * crosf1
-								);
-						}
-						OutputBuffer.emplace_back(std::move(pBuffer));
-						rawOutputBuffer.pop_front();
-					}
-					if (!OutputBuffer.empty())
-					{
-						RTPlayer.Play(OutputBuffer.front());
-						OutputBuffer.pop_front();
-					}
-				}
-				logger.log(L"[RTInference] OutPut Thread End!");
-			});
-		RTRecorder.Start();
-		logger.log(L"[RTInference] Start RTInference!");
-		RT_RECORD_THREAD.detach();
-		RT_INFERENCE_THREAD.detach();
-		RT_OUTPUT_THREAD.detach();
-	}
-}
-
-int main()
-{
-	MoeVSModuleManager::MoeVoiceStudioCoreInitSetup();
-
+		return;
+	};
 	try
 	{
-		MoeVSModuleManager::LoadSvcModel(
-			MJson(to_byte_string(GetCurrentFolder() + L"/Models/ShirohaRVC.json").c_str()),
-			[](size_t cur, size_t total)
-			{
-				//std::cout << (double(cur) / double(total) * 100.) << "%\n";
-			},
-			0,
-			8,
-			0
-			);
+		const MoeVoiceStudioCore::TextToSpeech* VitsTest = dynamic_cast<MoeVoiceStudioCore::TextToSpeech*>(new MoeVoiceStudioCore::Vits(Config, ProCallback, DurCallback, MoeVoiceStudioCore::MoeVoiceStudioModule::ExecutionProviders::CPU, 8, 0));
+		//这里改为Json的字符串或者Json文件
+		const auto Voice = VitsTest->Inference(MJson("S:\\VSGIT\\MoeVoiceStudioSvc - Core - Cmd\\x64\\Debug\\test.json"));
+		//输出
+		InferTools::Wav::WritePCMData(VitsTest->GetSamplingRate(), 1, Voice[0], L"Test1.wav");
 	}
 	catch (std::exception& e)
 	{
 		std::cout << e.what();
-		return 0;
 	}
 
-	RtInferenceSpace::Params.Sampler = L"DDim";
-	RtInferenceSpace::Params.Step = 100;
-	RtInferenceSpace::Params.Pndm = 10;
-	RtInferenceSpace::Params.F0Method = L"RMVPE";
-	RtInferenceSpace::Params.CrossFadeLength = 8000;
-	RtInferenceSpace::Params.Keys = 8;
-
-	RtInferenceSpace::RTRecorder.initRecorder((DWORD)MoeVSModuleManager::SamplingRate);
-	RtInferenceSpace::RTRecorder.setStreamBufferSize(0.5);
-	RtInferenceSpace::RTRecorder.Start();
-	RtInferenceSpace::RTPlayer.initPlayer((DWORD)MoeVSModuleManager::SamplingRate);
-
-	RtInferenceSpace::RTInference();
-
-	while (true);
-	while (true)
-	{
-		auto PCM = RtInferenceSpace::RTRecorder.GetStreamData();
-		if (!PCM.empty())
-			RtInferenceSpace::RTPlayer.Play(PCM);
-	}
+	return 0;
 }
 #endif
 
-#include "LibDLVoiceCodec/value.h"
-class Class0 : libdlvcodec::Module
-{
-public:
-	Class0(Module* _Parent, const std::string& _Name) : Module(_Parent, _Name) {}
-};
-
-class ClassA : libdlvcodec::Module
-{
-public:
-	ClassA(Module* _Parent, const std::string& _Name) : Module(_Parent, _Name) {}
-private:
-	RegLayer(Class0, attrC0);
-};
-
-class ClassB : libdlvcodec::Module
-{
-public:
-	ClassB() : Module(nullptr, "ClassB") {}
-private:
-	RegLayer(ClassA, attrCA);
-};
-
-int main()
-{
-	ClassB a;
-	printf("%d", &a);
-}
\ No newline at end of file
diff --git a/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.vcxproj b/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.vcxproj
index 14fe639..b3efd8c 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.vcxproj	
+++ b/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.vcxproj	
@@ -112,6 +112,7 @@
       <ConformanceMode>true</ConformanceMode>
       <LanguageStandard>stdcpp17</LanguageStandard>
       <DisableSpecificWarnings>4996</DisableSpecificWarnings>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -130,6 +131,7 @@
       <ConformanceMode>true</ConformanceMode>
       <LanguageStandard>stdcpp17</LanguageStandard>
       <DisableSpecificWarnings>4996</DisableSpecificWarnings>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -156,16 +158,19 @@
     <ClCompile Include="..\Lib\World\tools\audioio.cpp" />
     <ClCompile Include="..\Lib\World\tools\parameterio.cpp" />
     <ClCompile Include="LibDLVoiceCodec\base.cpp" />
+    <ClCompile Include="LibDLVoiceCodec\operator.cpp" />
     <ClCompile Include="LibDLVoiceCodec\value.cpp" />
     <ClCompile Include="Modules\DataStruct\KDTree.cpp" />
     <ClCompile Include="Modules\InferTools\Cluster\MoeVSBaseCluster.cpp" />
     <ClCompile Include="Modules\InferTools\Cluster\MoeVSClusterManager.cpp" />
+    <ClCompile Include="Modules\InferTools\Cluster\MoeVSIndexCluster.cpp" />
     <ClCompile Include="Modules\InferTools\Cluster\MoeVSKmeansCluster.cpp" />
     <ClCompile Include="Modules\InferTools\F0Extractor\BaseF0Extractor\BaseF0Extractor.cpp" />
     <ClCompile Include="Modules\InferTools\F0Extractor\DioF0Extractor\DioF0Extractor.cpp" />
     <ClCompile Include="Modules\InferTools\F0Extractor\F0ExtractorManager.cpp" />
     <ClCompile Include="Modules\InferTools\F0Extractor\HarvestF0Extractor\HarvestF0Extractor.cpp" />
     <ClCompile Include="Modules\InferTools\F0Extractor\NetF0Predictors\NetF0Predictors.cpp" />
+    <ClCompile Include="Modules\InferTools\G2P\MoeVSG2P.cpp" />
     <ClCompile Include="Modules\InferTools\inferTools.cpp" />
     <ClCompile Include="Modules\InferTools\Sampler\MoeVSBaseSampler.cpp" />
     <ClCompile Include="Modules\InferTools\Sampler\MoeVSSamplerManager.cpp" />
@@ -179,6 +184,8 @@
     <ClCompile Include="Modules\Models\src\ModelBase.cpp" />
     <ClCompile Include="Modules\Models\src\MoeVSProject.cpp" />
     <ClCompile Include="Modules\Models\src\SVC.cpp" />
+    <ClCompile Include="Modules\Models\src\TTS.cpp" />
+    <ClCompile Include="Modules\Models\src\Vits.cpp" />
     <ClCompile Include="Modules\Models\src\VitsSvc.cpp" />
     <ClCompile Include="Modules\Modules.cpp" />
     <ClCompile Include="MoeVoiceStudioSvc - Core - Cmd.cpp" />
@@ -202,17 +209,20 @@
     <ClInclude Include="..\Lib\World\tools\audioio.h" />
     <ClInclude Include="..\Lib\World\tools\parameterio.h" />
     <ClInclude Include="LibDLVoiceCodec\base.h" />
+    <ClInclude Include="LibDLVoiceCodec\operator.h" />
     <ClInclude Include="LibDLVoiceCodec\value.h" />
     <ClInclude Include="Modules\AvCodec\AvCodeResample.h" />
     <ClInclude Include="Modules\DataStruct\KDTree.hpp" />
     <ClInclude Include="Modules\InferTools\Cluster\MoeVSBaseCluster.hpp" />
     <ClInclude Include="Modules\InferTools\Cluster\MoeVSClusterManager.hpp" />
+    <ClInclude Include="Modules\InferTools\Cluster\MoeVSIndexCluster.hpp" />
     <ClInclude Include="Modules\InferTools\Cluster\MoeVSKmeansCluster.hpp" />
     <ClInclude Include="Modules\InferTools\F0Extractor\BaseF0Extractor\BaseF0Extractor.hpp" />
     <ClInclude Include="Modules\InferTools\F0Extractor\DioF0Extractor\DioF0Extractor.hpp" />
     <ClInclude Include="Modules\InferTools\F0Extractor\F0ExtractorManager.hpp" />
     <ClInclude Include="Modules\InferTools\F0Extractor\HarvestF0Extractor\HarvestF0Extractor.hpp" />
     <ClInclude Include="Modules\InferTools\F0Extractor\NetF0Predictors\NetF0Predictors.hpp" />
+    <ClInclude Include="Modules\InferTools\G2P\MoeVSG2P.hpp" />
     <ClInclude Include="Modules\InferTools\inferTools.hpp" />
     <ClInclude Include="Modules\InferTools\Sampler\MoeVSBaseSampler.hpp" />
     <ClInclude Include="Modules\InferTools\Sampler\MoeVSSamplerManager.hpp" />
@@ -226,6 +236,9 @@
     <ClInclude Include="Modules\Models\header\ModelBase.hpp" />
     <ClInclude Include="Modules\Models\header\MoeVSProject.hpp" />
     <ClInclude Include="Modules\Models\header\SVC.hpp" />
+    <ClInclude Include="Modules\Models\header\Tacotron.hpp" />
+    <ClInclude Include="Modules\Models\header\TTS.hpp" />
+    <ClInclude Include="Modules\Models\header\Vits.hpp" />
     <ClInclude Include="Modules\Models\header\VitsSvc.hpp" />
     <ClInclude Include="Modules\Modules.hpp" />
   </ItemGroup>
diff --git a/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.vcxproj.filters b/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.vcxproj.filters
index 9a65b7f..46d0a2c 100644
--- a/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.vcxproj.filters	
+++ b/MoeVoiceStudioSvc - Core - Cmd/MoeVoiceStudioSvc - Core - Cmd.vcxproj.filters	
@@ -97,6 +97,24 @@
     <Filter Include="头文件\LibDLVoiceCodec">
       <UniqueIdentifier>{9cdc3e22-5d4a-4a08-a1cd-57dd5c657e2c}</UniqueIdentifier>
     </Filter>
+    <Filter Include="源文件\Modules\InferTools\G2P">
+      <UniqueIdentifier>{c016ee4a-744e-431d-8bad-ac18310fb098}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="头文件\Modules\InferTools\G2P">
+      <UniqueIdentifier>{b5a33287-5ba2-45ae-9135-5a8e3da3d93f}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="源文件\Modules\Models\SVC">
+      <UniqueIdentifier>{3c32c0a2-b610-47d9-ab2b-ab37e9741903}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="源文件\Modules\Models\TTS">
+      <UniqueIdentifier>{2971f642-2c62-4b10-ad90-8cb65bcc99a0}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="头文件\Modules\Models\TTS">
+      <UniqueIdentifier>{02644a09-1bb9-48df-9364-1ea5177de68b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="头文件\Modules\Models\SVC">
+      <UniqueIdentifier>{e1a1babf-b512-4926-9870-160cf546636a}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="MoeVoiceStudioSvc - Core - Cmd.cpp">
@@ -147,18 +165,6 @@
     <ClCompile Include="..\Lib\MJson\yyjson.c">
       <Filter>源文件\Lib\Json</Filter>
     </ClCompile>
-    <ClCompile Include="Modules\Models\src\MoeVSProject.cpp">
-      <Filter>源文件\Modules\Models</Filter>
-    </ClCompile>
-    <ClCompile Include="Modules\Models\src\SVC.cpp">
-      <Filter>源文件\Modules\Models</Filter>
-    </ClCompile>
-    <ClCompile Include="Modules\Models\src\VitsSvc.cpp">
-      <Filter>源文件\Modules\Models</Filter>
-    </ClCompile>
-    <ClCompile Include="Modules\Models\src\DiffSvc.cpp">
-      <Filter>源文件\Modules\Models</Filter>
-    </ClCompile>
     <ClCompile Include="Modules\Models\src\ModelBase.cpp">
       <Filter>源文件\Modules\Models</Filter>
     </ClCompile>
@@ -222,6 +228,33 @@
     <ClCompile Include="LibDLVoiceCodec\value.cpp">
       <Filter>源文件\LibDLVoiceCodec</Filter>
     </ClCompile>
+    <ClCompile Include="LibDLVoiceCodec\operator.cpp">
+      <Filter>源文件\LibDLVoiceCodec</Filter>
+    </ClCompile>
+    <ClCompile Include="Modules\InferTools\Cluster\MoeVSIndexCluster.cpp">
+      <Filter>源文件\Modules\InferTools\Cluster</Filter>
+    </ClCompile>
+    <ClCompile Include="Modules\InferTools\G2P\MoeVSG2P.cpp">
+      <Filter>源文件\Modules\InferTools\G2P</Filter>
+    </ClCompile>
+    <ClCompile Include="Modules\Models\src\SVC.cpp">
+      <Filter>源文件\Modules\Models\SVC</Filter>
+    </ClCompile>
+    <ClCompile Include="Modules\Models\src\DiffSvc.cpp">
+      <Filter>源文件\Modules\Models\SVC</Filter>
+    </ClCompile>
+    <ClCompile Include="Modules\Models\src\VitsSvc.cpp">
+      <Filter>源文件\Modules\Models\SVC</Filter>
+    </ClCompile>
+    <ClCompile Include="Modules\Models\src\MoeVSProject.cpp">
+      <Filter>源文件\Modules\Models</Filter>
+    </ClCompile>
+    <ClCompile Include="Modules\Models\src\TTS.cpp">
+      <Filter>源文件\Modules\Models\TTS</Filter>
+    </ClCompile>
+    <ClCompile Include="Modules\Models\src\Vits.cpp">
+      <Filter>源文件\Modules\Models\TTS</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\Lib\World\src\world\cheaptrick.h">
@@ -281,15 +314,6 @@
     <ClInclude Include="Modules\Models\header\MoeVSProject.hpp">
       <Filter>头文件\Modules\Models</Filter>
     </ClInclude>
-    <ClInclude Include="Modules\Models\header\SVC.hpp">
-      <Filter>头文件\Modules\Models</Filter>
-    </ClInclude>
-    <ClInclude Include="Modules\Models\header\VitsSvc.hpp">
-      <Filter>头文件\Modules\Models</Filter>
-    </ClInclude>
-    <ClInclude Include="Modules\Models\header\DiffSvc.hpp">
-      <Filter>头文件\Modules\Models</Filter>
-    </ClInclude>
     <ClInclude Include="Modules\Models\header\ModelBase.hpp">
       <Filter>头文件\Modules\Models</Filter>
     </ClInclude>
@@ -356,5 +380,32 @@
     <ClInclude Include="LibDLVoiceCodec\value.h">
       <Filter>头文件\LibDLVoiceCodec</Filter>
     </ClInclude>
+    <ClInclude Include="LibDLVoiceCodec\operator.h">
+      <Filter>头文件\LibDLVoiceCodec</Filter>
+    </ClInclude>
+    <ClInclude Include="Modules\InferTools\Cluster\MoeVSIndexCluster.hpp">
+      <Filter>头文件\Modules\InferTools\Cluster</Filter>
+    </ClInclude>
+    <ClInclude Include="Modules\InferTools\G2P\MoeVSG2P.hpp">
+      <Filter>头文件\Modules\InferTools\G2P</Filter>
+    </ClInclude>
+    <ClInclude Include="Modules\Models\header\Tacotron.hpp">
+      <Filter>头文件\Modules\Models\TTS</Filter>
+    </ClInclude>
+    <ClInclude Include="Modules\Models\header\TTS.hpp">
+      <Filter>头文件\Modules\Models\TTS</Filter>
+    </ClInclude>
+    <ClInclude Include="Modules\Models\header\Vits.hpp">
+      <Filter>头文件\Modules\Models\TTS</Filter>
+    </ClInclude>
+    <ClInclude Include="Modules\Models\header\DiffSvc.hpp">
+      <Filter>头文件\Modules\Models\SVC</Filter>
+    </ClInclude>
+    <ClInclude Include="Modules\Models\header\SVC.hpp">
+      <Filter>头文件\Modules\Models\SVC</Filter>
+    </ClInclude>
+    <ClInclude Include="Modules\Models\header\VitsSvc.hpp">
+      <Filter>头文件\Modules\Models\SVC</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file