Skip to content

Commit

Permalink
Compile with CMake
Browse files Browse the repository at this point in the history
  • Loading branch information
vietanhdev committed Jul 25, 2023
1 parent b3b2994 commit c1851ed
Show file tree
Hide file tree
Showing 27 changed files with 208 additions and 110 deletions.
67 changes: 60 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,79 @@ message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
# Add whisper-cpp
add_subdirectory(libs/whisper-cpp)

# Talk with Llama V2
set(TARGET customchar)
add_executable(
# Build CustomChar-core
set(TARGET customchar-core)
add_library(
${TARGET}
customchar/main.cpp
customchar/common.cpp
customchar/llm.cpp
customchar/speech_recognizer.cpp
customchar/voice_synthesizer.cpp
customchar/voice_recorder.cpp
customchar/helpers.cpp

libs/llama-cpp/llama.cpp
libs/whisper-cpp/examples/common.cpp
libs/whisper-cpp/examples/common-sdl.cpp
)
target_include_directories(
${TARGET} PRIVATE
${TARGET} PUBLIC
${SDL2_INCLUDE_DIRS}
libs
.
)
target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} whisper)
target_link_libraries(${TARGET} PUBLIC ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} whisper)

# CustomChar - cli
add_executable(
customchar-cli
customchar/main.cpp
)
target_link_libraries(customchar-cli customchar-core)


option(BUILD_GUI "Build GUI" ON)
if (BUILD_GUI)
find_package(OpenGL REQUIRED)
find_package(GLEW REQUIRED)
find_package(glfw3 REQUIRED)

include_directories(libs/imgui/include)
set(IMGUI_DIR libs/imgui)
set(IMGUI_SRCS
${IMGUI_DIR}/imgui.cpp
${IMGUI_DIR}/imgui_tables.cpp
${IMGUI_DIR}/imgui_draw.cpp
${IMGUI_DIR}/imgui_widgets.cpp
${IMGUI_DIR}/backends/imgui_impl_glfw.cpp
${IMGUI_DIR}/backends/imgui_impl_opengl3.cpp
)

add_library(imgui STATIC ${IMGUI_SRCS})
target_include_directories(imgui PUBLIC ${IMGUI_DIR} ${IMGUI_DIR}/backends)

if (UNIX AND NOT APPLE)
message(STATUS "Building for Linux")
set(LINUX_GL_LIBS GL GLEW)
target_link_libraries(${TARGET} PUBLIC ${LINUX_GL_LIBS} glfw)
target_compile_definitions(${TARGET} PUBLIC LINUX)
elseif (APPLE)
message(STATUS "Building for Mac OS X")
target_link_libraries(${TARGET} PUBLIC "-framework OpenGL" "-framework Cocoa" "-framework IOKit" "-framework CoreVideo" glfw)
target_compile_definitions(${TARGET} PUBLIC APPLE)
include_directories(/usr/local/include /opt/local/include /opt/homebrew/include)
else()
message(STATUS "Building for Windows")
target_link_libraries(${TARGET} PUBLIC glfw opengl32 imm32)
target_compile_definitions(${TARGET} PUBLIC WINDOWS)
endif()

add_executable(
customchar
customchar/main-ui.cpp
customchar/gui/chat_history.cpp
customchar/gui/chat_message.cpp
)
include_directories(customchar/gui)
target_link_libraries(customchar customchar-core imgui)

endif()
6 changes: 0 additions & 6 deletions customchar-ui/.gitignore

This file was deleted.

10 changes: 0 additions & 10 deletions customchar-ui/README.md

This file was deleted.

119 changes: 119 additions & 0 deletions customchar/character.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#pragma once

#include "customchar/common.h"
#include "customchar/helpers.h"
#include "customchar/llm.h"
#include "customchar/speech_recognizer.h"
#include "customchar/voice_recorder.h"
#include "customchar/voice_synthesizer.h"

#include <cassert>
#include <cstdio>
#include <fstream>
#include <regex>
#include <string>
#include <thread>
#include <vector>

namespace CC {

class Character {
private:
CCParams params;
std::shared_ptr<SpeechRecognizer> speech_recognizer;
std::shared_ptr<VoiceRecorder> voice_recoder;
std::shared_ptr<VoiceSynthesizer> voice_synthesizer;
std::shared_ptr<LLM> llm;

public:
Character(CCParams init_params) {
params = init_params;

// CC components
speech_recognizer = std::make_shared<SpeechRecognizer>(
params.sr_model_path, params.language, params.audio_ctx,
params.n_threads, params.max_tokens, params.translate,
params.no_timestamps, params.print_special, params.speed_up);
voice_recoder = std::make_shared<VoiceRecorder>();
voice_synthesizer = std::make_shared<VoiceSynthesizer>();

// Load LLM
llm = std::make_shared<LLM>(params.llm_model_path, params.path_session,
params.person, params.bot_name);
llm->eval_model();
}

void run() {
// Start talking
printf("Start speaking in the microphone\n");
printf("%s%s", params.person.c_str(), params.chat_symb.c_str());
fflush(stdout);

// Clear audio buffer to avoid processing old audio
voice_recoder->clear_audio_buffer();

std::vector<llama_token> embd;
int n_iter = 0;
bool is_running = true;
while (is_running) {
// Handle Ctrl + C
is_running = sdl_poll_events();
if (!is_running) {
break;
}

// Delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));
float prob = 0.0f;
int64_t t_ms = 0;

// Sample audio
voice_recoder->sample_audio();
if (!voice_recoder->finished_talking()) {
continue;
}

// Get recorded audio
std::vector<float> audio_buff;
voice_recoder->get_audio(audio_buff);

// Recognize speech
std::string text_heard =
speech_recognizer->recognize(audio_buff, prob, t_ms);

// Tokenize user input
auto tokens = llm->tokenize(text_heard, false);

// Skip if nothing was heard
if (text_heard.empty() || tokens.empty()) {
printf("Heard nothing, skipping ...\n");
voice_recoder->clear_audio_buffer();
continue;
}

// Append the new input tokens to the session_tokens vector
llm->add_tokens_to_current_session(tokens);

// Print user input
text_heard.insert(0, 1, ' ');
text_heard += "\n" + params.bot_name + params.chat_symb;
printf("%s%s%s", "\033[1m", text_heard.c_str(), "\033[0m");
fflush(stdout);

// Get answer from LLM
embd = llm->tokenize(text_heard, false);

// Get answer from LLM
std::string text_to_speak = llm->get_answer(embd);

// Play speak
voice_synthesizer->say(text_to_speak);

// Clean up
voice_recoder->clear_audio_buffer();
++n_iter;
}
}
};

} // namespace CC
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
18 changes: 16 additions & 2 deletions customchar-ui/main.cpp → customchar/main-ui.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,17 @@ bool handleSend(char* text, std::shared_ptr<ChatHistory> history) {
return true;
}

/**
* @brief Checks if IP_ADDRESS and PORT global variables are valid
*
* @return true If both are valid
* @return false If at least one is invalid
*/
bool connectionDataIsValid() {
// TODO: Check IP_ADDRESS and PORT are valid in format
return true;
}

/**
* @brief Main ImGUI loop
*/
Expand Down Expand Up @@ -134,7 +145,7 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
// - Remember that in C/C++ if you want to include a backslash \ in a string
// literal you need to write a double backslash \\ !
// io.Fonts->AddFontDefault();
// io.Fonts->AddFontFromFileTTF("BaiJamjuree-Regular.ttf", 16.0f);
// io.Fonts->AddFontFromFileTTF("../../misc/fonts/Roboto-Medium.ttf", 16.0f);
// io.Fonts->AddFontFromFileTTF("../../misc/fonts/Cousine-Regular.ttf", 15.0f);
// io.Fonts->AddFontFromFileTTF("../../misc/fonts/DroidSans.ttf", 16.0f);
// io.Fonts->AddFontFromFileTTF("../../misc/fonts/ProggyTiny.ttf", 10.0f);
Expand Down Expand Up @@ -173,6 +184,7 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
* @brief Shows connection window if not connected, otherwise show
* basic chat window
*/

// Is connected
int TEXTBOX_HEIGHT = ImGui::GetTextLineHeight() * 4;

Expand Down Expand Up @@ -211,6 +223,7 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
ImGui::PopStyleVar();

// Text input area flags
// Capture input from IME (for Asian languages, Windows OS)
ImGuiInputTextFlags input_flags = ImGuiInputTextFlags_EnterReturnsTrue |
ImGuiInputTextFlags_CtrlEnterForNewLine |
ImGuiInputTextFlags_AllowTabInput |
Expand All @@ -230,7 +243,6 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
};

ImGui::End();
break;

// Rendering
ImGui::Render();
Expand All @@ -245,6 +257,8 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
glfwSwapBuffers(window);
}

std::cout << "Main ImGUI loop ended" << std::endl;

ImGui_ImplOpenGL3_Shutdown();
ImGui_ImplGlfw_Shutdown();
ImGui::DestroyContext();
Expand Down
90 changes: 5 additions & 85 deletions customchar/main.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
#include "customchar/character.h"
#include "customchar/common.h"
#include "customchar/helpers.h"
#include "customchar/llm.h"
#include "customchar/speech_recognizer.h"
#include "customchar/voice_recorder.h"
#include "customchar/voice_synthesizer.h"

#include <cassert>
#include <cstdio>
Expand All @@ -27,88 +24,11 @@ int main(int argc, char** argv) {
exit(1);
}

// CC components
SpeechRecognizer speech_recognizer(
params.sr_model_path, params.language, params.audio_ctx, params.n_threads,
params.max_tokens, params.translate, params.no_timestamps,
params.print_special, params.speed_up);
VoiceRecorder voice_recoder;
VoiceSynthesizer voice_synthesizer;
// Create character
Character character(params);

// Load LLM
LLM llm(params.llm_model_path, params.path_session, params.person,
params.bot_name);
llm.eval_model();

// Start talking
printf("Start speaking in the microphone\n");
printf("%s%s", params.person.c_str(), params.chat_symb.c_str());
fflush(stdout);

// Clear audio buffer to avoid processing old audio
voice_recoder.clear_audio_buffer();

std::vector<llama_token> embd;
int n_iter = 0;
bool is_running = true;
while (is_running) {
// Handle Ctrl + C
is_running = sdl_poll_events();
if (!is_running) {
break;
}

// Delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));
float prob = 0.0f;
int64_t t_ms = 0;

// Sample audio
voice_recoder.sample_audio();
if (!voice_recoder.finished_talking()) {
continue;
}

// Get recorded audio
std::vector<float> audio_buff;
voice_recoder.get_audio(audio_buff);

// Recognize speech
std::string text_heard =
speech_recognizer.recognize(audio_buff, prob, t_ms);

// Tokenize user input
auto tokens = llm.tokenize(text_heard, false);

// Skip if nothing was heard
if (text_heard.empty() || tokens.empty()) {
printf("Heard nothing, skipping ...\n");
voice_recoder.clear_audio_buffer();
continue;
}

// Append the new input tokens to the session_tokens vector
llm.add_tokens_to_current_session(tokens);

// Print user input
text_heard.insert(0, 1, ' ');
text_heard += "\n" + params.bot_name + params.chat_symb;
printf("%s%s%s", "\033[1m", text_heard.c_str(), "\033[0m");
fflush(stdout);

// Get answer from LLM
embd = llm.tokenize(text_heard, false);

// Get answer from LLM
std::string text_to_speak = llm.get_answer(embd);

// Play speak
voice_synthesizer.say(text_to_speak);

// Clean up
voice_recoder.clear_audio_buffer();
++n_iter;
}
// Run character
character.run();

return 0;
}
Loading

0 comments on commit c1851ed

Please sign in to comment.