diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 51b90ee..cea5c43 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -28,3 +28,4 @@ jobs: # Run unit tests RESOURCES_PATH=test/resources build/sentencepiece_test + build/tiktoken_test diff --git a/CMakeLists.txt b/CMakeLists.txt index 0514387..9d0c865 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,4 +53,9 @@ if(TOKENIZERS_BUILD_TEST) sentencepiece_test PUBLIC third-party/sentencepiece/src third-party/sentencepiece include GTEST_INCLUDE_PATH) target_link_libraries(sentencepiece_test PUBLIC tokenizers gtest_main) + + # tiktoken tests + add_executable(tiktoken_test test/test_base64.cpp) + target_include_directories(tiktoken_test PUBLIC include GTEST_INCLUDE_PATH) + target_link_libraries(tiktoken_test PUBLIC gtest_main) endif() diff --git a/test/test_base64.cpp b/test/test_base64.cpp new file mode 100644 index 0000000..99c9f79 --- /dev/null +++ b/test/test_base64.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "base64.h" +#include "gtest/gtest.h" + +namespace tokenizers { + +TEST(Base64Test, TestDecodeSmoke) { + std::string text = "bGxhbWE="; + auto result = base64::decode(text); + EXPECT_TRUE(result.ok()); + EXPECT_EQ(result.get(), "llama"); +} + +TEST(Base64Test, TestDecodeEmptyStringReturnsError) { + std::string text = ""; + auto result = base64::decode(text); + EXPECT_FALSE(result.ok()); + EXPECT_EQ(result.error(), Error::Base64DecodeFailure); +} + +TEST(Base64Test, TestInvalidStringDecodeReturnsError) { + std::string text = "llama"; + auto result = base64::decode(text); + EXPECT_FALSE(result.ok()); + EXPECT_EQ(result.error(), Error::Base64DecodeFailure); +} + +} // namespace tokenizers