Compile with CMake

nrl-ai · Jul 25, 2023 · c1851ed · c1851ed
1 parent b3b2994
commit c1851ed
Show file tree

Hide file tree

Showing 27 changed files with 208 additions and 110 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -21,26 +21,79 @@ message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
 # Add whisper-cpp
 add_subdirectory(libs/whisper-cpp)
 
-# Talk with Llama V2
-set(TARGET customchar)
-add_executable(
+# Build CustomChar-core
+set(TARGET customchar-core)
+add_library(
     ${TARGET}
-    customchar/main.cpp
     customchar/common.cpp
     customchar/llm.cpp
     customchar/speech_recognizer.cpp
     customchar/voice_synthesizer.cpp
     customchar/voice_recorder.cpp
     customchar/helpers.cpp
-
     libs/llama-cpp/llama.cpp
     libs/whisper-cpp/examples/common.cpp
     libs/whisper-cpp/examples/common-sdl.cpp
 )
 target_include_directories(
-    ${TARGET} PRIVATE
+    ${TARGET} PUBLIC
     ${SDL2_INCLUDE_DIRS}
     libs
     .
 )
-target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} whisper)
+target_link_libraries(${TARGET} PUBLIC ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} whisper)
+
+# CustomChar - cli
+add_executable(
+    customchar-cli
+    customchar/main.cpp
+)
+target_link_libraries(customchar-cli customchar-core)
+
+
+option(BUILD_GUI "Build GUI" ON)
+if (BUILD_GUI)
+    find_package(OpenGL REQUIRED)
+    find_package(GLEW REQUIRED)
+    find_package(glfw3 REQUIRED)
+
+    include_directories(libs/imgui/include)
+    set(IMGUI_DIR libs/imgui)
+    set(IMGUI_SRCS
+        ${IMGUI_DIR}/imgui.cpp
+        ${IMGUI_DIR}/imgui_tables.cpp
+        ${IMGUI_DIR}/imgui_draw.cpp
+        ${IMGUI_DIR}/imgui_widgets.cpp
+        ${IMGUI_DIR}/backends/imgui_impl_glfw.cpp
+        ${IMGUI_DIR}/backends/imgui_impl_opengl3.cpp
+    )
+
+    add_library(imgui STATIC ${IMGUI_SRCS})
+    target_include_directories(imgui PUBLIC ${IMGUI_DIR} ${IMGUI_DIR}/backends)
+
+    if (UNIX AND NOT APPLE)
+        message(STATUS "Building for Linux")
+        set(LINUX_GL_LIBS GL GLEW)
+        target_link_libraries(${TARGET} PUBLIC ${LINUX_GL_LIBS} glfw)
+        target_compile_definitions(${TARGET} PUBLIC LINUX)
+    elseif (APPLE)
+        message(STATUS "Building for Mac OS X")
+        target_link_libraries(${TARGET} PUBLIC "-framework OpenGL" "-framework Cocoa" "-framework IOKit" "-framework CoreVideo" glfw)
+        target_compile_definitions(${TARGET} PUBLIC APPLE)
+        include_directories(/usr/local/include /opt/local/include /opt/homebrew/include)
+    else()
+        message(STATUS "Building for Windows")
+        target_link_libraries(${TARGET} PUBLIC glfw opengl32 imm32)
+        target_compile_definitions(${TARGET} PUBLIC WINDOWS)
+    endif()
+
+    add_executable(
+        customchar
+        customchar/main-ui.cpp
+        customchar/gui/chat_history.cpp
+        customchar/gui/chat_message.cpp
+    )
+    include_directories(customchar/gui)
+    target_link_libraries(customchar customchar-core imgui)
+
+endif()
diff --git a/customchar-ui/.gitignore b/customchar-ui/.gitignore
diff --git a/customchar-ui/README.md b/customchar-ui/README.md
diff --git a/customchar/character.h b/customchar/character.h
@@ -0,0 +1,119 @@
+#pragma once
+
+#include "customchar/common.h"
+#include "customchar/helpers.h"
+#include "customchar/llm.h"
+#include "customchar/speech_recognizer.h"
+#include "customchar/voice_recorder.h"
+#include "customchar/voice_synthesizer.h"
+
+#include <cassert>
+#include <cstdio>
+#include <fstream>
+#include <regex>
+#include <string>
+#include <thread>
+#include <vector>
+
+namespace CC {
+
+class Character {
+ private:
+  CCParams params;
+  std::shared_ptr<SpeechRecognizer> speech_recognizer;
+  std::shared_ptr<VoiceRecorder> voice_recoder;
+  std::shared_ptr<VoiceSynthesizer> voice_synthesizer;
+  std::shared_ptr<LLM> llm;
+
+ public:
+  Character(CCParams init_params) {
+    params = init_params;
+
+    // CC components
+    speech_recognizer = std::make_shared<SpeechRecognizer>(
+        params.sr_model_path, params.language, params.audio_ctx,
+        params.n_threads, params.max_tokens, params.translate,
+        params.no_timestamps, params.print_special, params.speed_up);
+    voice_recoder = std::make_shared<VoiceRecorder>();
+    voice_synthesizer = std::make_shared<VoiceSynthesizer>();
+
+    // Load LLM
+    llm = std::make_shared<LLM>(params.llm_model_path, params.path_session,
+                                params.person, params.bot_name);
+    llm->eval_model();
+  }
+
+  void run() {
+    // Start talking
+    printf("Start speaking in the microphone\n");
+    printf("%s%s", params.person.c_str(), params.chat_symb.c_str());
+    fflush(stdout);
+
+    // Clear audio buffer to avoid processing old audio
+    voice_recoder->clear_audio_buffer();
+
+    std::vector<llama_token> embd;
+    int n_iter = 0;
+    bool is_running = true;
+    while (is_running) {
+      // Handle Ctrl + C
+      is_running = sdl_poll_events();
+      if (!is_running) {
+        break;
+      }
+
+      // Delay
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+      float prob = 0.0f;
+      int64_t t_ms = 0;
+
+      // Sample audio
+      voice_recoder->sample_audio();
+      if (!voice_recoder->finished_talking()) {
+        continue;
+      }
+
+      // Get recorded audio
+      std::vector<float> audio_buff;
+      voice_recoder->get_audio(audio_buff);
+
+      // Recognize speech
+      std::string text_heard =
+          speech_recognizer->recognize(audio_buff, prob, t_ms);
+
+      // Tokenize user input
+      auto tokens = llm->tokenize(text_heard, false);
+
+      // Skip if nothing was heard
+      if (text_heard.empty() || tokens.empty()) {
+        printf("Heard nothing, skipping ...\n");
+        voice_recoder->clear_audio_buffer();
+        continue;
+      }
+
+      // Append the new input tokens to the session_tokens vector
+      llm->add_tokens_to_current_session(tokens);
+
+      // Print user input
+      text_heard.insert(0, 1, ' ');
+      text_heard += "\n" + params.bot_name + params.chat_symb;
+      printf("%s%s%s", "\033[1m", text_heard.c_str(), "\033[0m");
+      fflush(stdout);
+
+      // Get answer from LLM
+      embd = llm->tokenize(text_heard, false);
+
+      // Get answer from LLM
+      std::string text_to_speak = llm->get_answer(embd);
+
+      // Play speak
+      voice_synthesizer->say(text_to_speak);
+
+      // Clean up
+      voice_recoder->clear_audio_buffer();
+      ++n_iter;
+    }
+  }
+};
+
+}  // namespace CC
diff --git a/customchar-ui/Makefile → customchar/gui/Makefile b/customchar-ui/Makefile → customchar/gui/Makefile
diff --git a/customchar-ui/chat_history.cpp → customchar/gui/chat_history.cpp b/customchar-ui/chat_history.cpp → customchar/gui/chat_history.cpp
diff --git a/customchar-ui/chat_history.h → customchar/gui/chat_history.h b/customchar-ui/chat_history.h → customchar/gui/chat_history.h
diff --git a/customchar-ui/chat_message.cpp → customchar/gui/chat_message.cpp b/customchar-ui/chat_message.cpp → customchar/gui/chat_message.cpp
diff --git a/customchar-ui/chat_message.h → customchar/gui/chat_message.h b/customchar-ui/chat_message.h → customchar/gui/chat_message.h
diff --git a/customchar-ui/main.cpp → customchar/main-ui.cpp b/customchar-ui/main.cpp → customchar/main-ui.cpp
@@ -62,6 +62,17 @@ bool handleSend(char* text, std::shared_ptr<ChatHistory> history) {
   return true;
 }
 
+/**
+ * @brief Checks if IP_ADDRESS and PORT global variables are valid
+ *
+ * @return true If both are valid
+ * @return false If at least one is invalid
+ */
+bool connectionDataIsValid() {
+  // TODO: Check IP_ADDRESS and PORT are valid in format
+  return true;
+}
+
 /**
  * @brief Main ImGUI loop
  */
@@ -134,7 +145,7 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
   // - Remember that in C/C++ if you want to include a backslash \ in a string
   // literal you need to write a double backslash \\ !
   // io.Fonts->AddFontDefault();
-  // io.Fonts->AddFontFromFileTTF("BaiJamjuree-Regular.ttf", 16.0f);
+  // io.Fonts->AddFontFromFileTTF("../../misc/fonts/Roboto-Medium.ttf", 16.0f);
   // io.Fonts->AddFontFromFileTTF("../../misc/fonts/Cousine-Regular.ttf", 15.0f);
   // io.Fonts->AddFontFromFileTTF("../../misc/fonts/DroidSans.ttf", 16.0f);
   // io.Fonts->AddFontFromFileTTF("../../misc/fonts/ProggyTiny.ttf", 10.0f);
@@ -173,6 +184,7 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
      * @brief Shows connection window if not connected, otherwise show
      * basic chat window
      */
+
     // Is connected
     int TEXTBOX_HEIGHT = ImGui::GetTextLineHeight() * 4;
 
@@ -211,6 +223,7 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
     ImGui::PopStyleVar();
 
     // Text input area flags
+    // Capture input from IME (for Asian languages, Windows OS)
     ImGuiInputTextFlags input_flags = ImGuiInputTextFlags_EnterReturnsTrue |
                                       ImGuiInputTextFlags_CtrlEnterForNewLine |
                                       ImGuiInputTextFlags_AllowTabInput |
@@ -230,7 +243,6 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
     };
 
     ImGui::End();
-    break;
 
     // Rendering
     ImGui::Render();
@@ -245,6 +257,8 @@ void runImgui(std::shared_ptr<ChatHistory> history) {
     glfwSwapBuffers(window);
   }
 
+  std::cout << "Main ImGUI loop ended" << std::endl;
+
   ImGui_ImplOpenGL3_Shutdown();
   ImGui_ImplGlfw_Shutdown();
   ImGui::DestroyContext();

diff --git a/customchar/main.cpp b/customchar/main.cpp
@@ -1,9 +1,6 @@
+#include "customchar/character.h"
 #include "customchar/common.h"
 #include "customchar/helpers.h"
-#include "customchar/llm.h"
-#include "customchar/speech_recognizer.h"
-#include "customchar/voice_recorder.h"
-#include "customchar/voice_synthesizer.h"
 
 #include <cassert>
 #include <cstdio>
@@ -27,88 +24,11 @@ int main(int argc, char** argv) {
     exit(1);
   }
 
-  // CC components
-  SpeechRecognizer speech_recognizer(
-      params.sr_model_path, params.language, params.audio_ctx, params.n_threads,
-      params.max_tokens, params.translate, params.no_timestamps,
-      params.print_special, params.speed_up);
-  VoiceRecorder voice_recoder;
-  VoiceSynthesizer voice_synthesizer;
+  // Create character
+  Character character(params);
 
-  // Load LLM
-  LLM llm(params.llm_model_path, params.path_session, params.person,
-          params.bot_name);
-  llm.eval_model();
-
-  // Start talking
-  printf("Start speaking in the microphone\n");
-  printf("%s%s", params.person.c_str(), params.chat_symb.c_str());
-  fflush(stdout);
-
-  // Clear audio buffer to avoid processing old audio
-  voice_recoder.clear_audio_buffer();
-
-  std::vector<llama_token> embd;
-  int n_iter = 0;
-  bool is_running = true;
-  while (is_running) {
-    // Handle Ctrl + C
-    is_running = sdl_poll_events();
-    if (!is_running) {
-      break;
-    }
-
-    // Delay
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    float prob = 0.0f;
-    int64_t t_ms = 0;
-
-    // Sample audio
-    voice_recoder.sample_audio();
-    if (!voice_recoder.finished_talking()) {
-      continue;
-    }
-
-    // Get recorded audio
-    std::vector<float> audio_buff;
-    voice_recoder.get_audio(audio_buff);
-
-    // Recognize speech
-    std::string text_heard =
-        speech_recognizer.recognize(audio_buff, prob, t_ms);
-
-    // Tokenize user input
-    auto tokens = llm.tokenize(text_heard, false);
-
-    // Skip if nothing was heard
-    if (text_heard.empty() || tokens.empty()) {
-      printf("Heard nothing, skipping ...\n");
-      voice_recoder.clear_audio_buffer();
-      continue;
-    }
-
-    // Append the new input tokens to the session_tokens vector
-    llm.add_tokens_to_current_session(tokens);
-
-    // Print user input
-    text_heard.insert(0, 1, ' ');
-    text_heard += "\n" + params.bot_name + params.chat_symb;
-    printf("%s%s%s", "\033[1m", text_heard.c_str(), "\033[0m");
-    fflush(stdout);
-
-    // Get answer from LLM
-    embd = llm.tokenize(text_heard, false);
-
-    // Get answer from LLM
-    std::string text_to_speak = llm.get_answer(embd);
-
-    // Play speak
-    voice_synthesizer.say(text_to_speak);
-
-    // Clean up
-    voice_recoder.clear_audio_buffer();
-    ++n_iter;
-  }
+  // Run character
+  character.run();
 
   return 0;
 }