Skip to content

Commit

Permalink
Fix windows debug crash (#162)
Browse files Browse the repository at this point in the history
  • Loading branch information
wangfenjin authored Nov 25, 2024
1 parent b2c59ee commit 632fd7b
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions src/simple_tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@

#include <algorithm>
#include <cctype>
#include <iostream>
#include <memory>
#include <sstream>
#include <cstdlib>
#include <set>
#include <string>
#include <utility>
#include <vector>

namespace simple_tokenizer {
Expand All @@ -22,13 +20,18 @@ PinYin *SimpleTokenizer::get_pinyin() {
}

static TokenCategory from_char(char c) {
if (std::isdigit(c)) {
auto uc = static_cast<unsigned char>(c);
// ASCII should in 0..127
if (uc > 127) {
return TokenCategory::OTHER;
}
if (std::isdigit(uc)) {
return TokenCategory::DIGIT;
}
if (std::isspace(c) || std::iscntrl(c)) {
if (std::isspace(uc) || std::iscntrl(uc)) {
return TokenCategory::SPACE;
}
if (std::isalpha(c)) {
if (std::isalpha(uc)) {
return TokenCategory::ASCII_ALPHABETIC;
}
return TokenCategory::OTHER;
Expand Down

0 comments on commit 632fd7b

Please sign in to comment.