From 632fd7ba710ff54b8c6de1220bb47253132b94e5 Mon Sep 17 00:00:00 2001 From: Wang Fenjin Date: Mon, 25 Nov 2024 15:58:37 +0800 Subject: [PATCH] Fix windows debug crash (#162) --- src/simple_tokenizer.cc | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/simple_tokenizer.cc b/src/simple_tokenizer.cc index 955fe5e..983670e 100644 --- a/src/simple_tokenizer.cc +++ b/src/simple_tokenizer.cc @@ -2,11 +2,9 @@ #include #include -#include -#include -#include +#include +#include #include -#include #include namespace simple_tokenizer { @@ -22,13 +20,18 @@ PinYin *SimpleTokenizer::get_pinyin() { } static TokenCategory from_char(char c) { - if (std::isdigit(c)) { + auto uc = static_cast(c); + // ASCII should in 0..127 + if (uc > 127) { + return TokenCategory::OTHER; + } + if (std::isdigit(uc)) { return TokenCategory::DIGIT; } - if (std::isspace(c) || std::iscntrl(c)) { + if (std::isspace(uc) || std::iscntrl(uc)) { return TokenCategory::SPACE; } - if (std::isalpha(c)) { + if (std::isalpha(uc)) { return TokenCategory::ASCII_ALPHABETIC; } return TokenCategory::OTHER;