forked from aseprite/laf
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add base::tok::split_tokens() as a better alternative to split_strings()
- Loading branch information
Showing
2 changed files
with
157 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
// LAF Base Library | ||
// Copyright (c) 2024 Igara Studio S.A. | ||
// Copyright (c) 2020 David Capello | ||
// | ||
// This file is released under the terms of the MIT license. | ||
// Read LICENSE.txt for more information. | ||
// | ||
// Based on https://github.com/dacap/tok | ||
|
||
#ifndef BASE_TOK_H_INCLUDED | ||
#define BASE_TOK_H_INCLUDED | ||
#pragma once | ||
|
||
#include <iterator> | ||
#include <string> | ||
|
||
namespace base { | ||
namespace tok { | ||
|
||
struct ignore_empties { enum { allow_empty = false }; }; | ||
struct include_empties { enum { allow_empty = true }; }; | ||
|
||
template<typename T, typename EmptyPolicy> | ||
class token_iterator { | ||
public: | ||
using iterator_category = std::forward_iterator_tag; | ||
using internal_iterator = typename T::const_iterator; | ||
using char_type = typename T::value_type; | ||
using value_type = typename std::remove_const<T>::type; | ||
using difference_type = typename T::difference_type; | ||
using pointer = T*; | ||
using reference = T&; | ||
using const_reference = const T&; | ||
|
||
token_iterator() = delete; | ||
token_iterator(const token_iterator&) = default; | ||
token_iterator(const internal_iterator& begin, | ||
const internal_iterator& end, | ||
char_type chr) : | ||
begin_(begin), | ||
inter_(begin), | ||
end_(end), | ||
chr_(chr) { | ||
operator++(); // Find first word to fill "str_" field | ||
} | ||
|
||
token_iterator& operator++() { | ||
if constexpr (EmptyPolicy::allow_empty) { | ||
if (inter_ != end_ && *inter_ == chr_) { | ||
++inter_; | ||
} | ||
} | ||
else { | ||
while (inter_ != end_ && *inter_ == chr_) { | ||
++inter_; | ||
} | ||
} | ||
begin_ = inter_; | ||
while (inter_ != end_ && *inter_ != chr_) { | ||
++inter_; | ||
} | ||
str_.assign(begin_, inter_); | ||
return *this; | ||
} | ||
|
||
const_reference operator*() { | ||
return str_; | ||
} | ||
|
||
bool operator!=(const token_iterator& that) const { | ||
return (begin_ != that.end_); | ||
} | ||
|
||
private: | ||
internal_iterator begin_, inter_, end_; | ||
char_type chr_; | ||
value_type str_; | ||
}; | ||
|
||
template<typename T, typename Empties> | ||
class token_range { | ||
public: | ||
using char_type = typename T::value_type; | ||
using iterator = token_iterator<T, Empties>; | ||
|
||
token_range(const T& str, char_type chr) : str_(str), chr_(chr) { } | ||
|
||
iterator begin() const { return iterator(str_.begin(), str_.end(), chr_); } | ||
iterator end() const { return iterator(str_.end(), str_.end(), chr_); } | ||
|
||
private: | ||
const T& str_; | ||
char_type chr_; | ||
}; | ||
|
||
template<typename T> | ||
token_range<T, ignore_empties> | ||
split_tokens(const T& str, | ||
typename T::value_type chr) { | ||
return token_range<T, ignore_empties>(str, chr); | ||
} | ||
|
||
template<typename T> | ||
token_range<T, include_empties> | ||
csv(const T& str, | ||
typename T::value_type chr = ',') { | ||
return token_range<T, include_empties>(str, chr); | ||
} | ||
|
||
} // namespace tok | ||
} // namespace base | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// LAF Base Library | ||
// Copyright (c) 2024 Igara Studio S.A. | ||
// Copyright (c) 2020 David Capello | ||
// | ||
// This file is released under the terms of the MIT license. | ||
// Read LICENSE.txt for more information. | ||
// | ||
// Based on https://github.com/dacap/tok | ||
|
||
#include <gtest/gtest.h> | ||
|
||
#include <iostream> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "base/tok.h" | ||
|
||
TEST(Tok, SplitTokens) | ||
{ | ||
int i = 0; | ||
auto a_result = std::vector<std::string>{ "This", "is", "a", "phrase.", "Several", "whitespaces", "are", "ignored." }; | ||
std::string a = "This is a phrase. Several whitespaces are ignored."; | ||
for (auto& tok : base::tok::split_tokens(a, ' ')) { | ||
std::cout << "\"" << tok << "\"\n"; | ||
EXPECT_EQ(tok, a_result[i++]); | ||
} | ||
} | ||
|
||
TEST(Tok, Csv) | ||
{ | ||
int i = 0; | ||
auto b_result = std::vector<std::string>{ "In comma", "separated", "", "values", "", "", "empties are included" }; | ||
std::string b = "In comma,separated,,values,,,empties are included"; | ||
for (auto& tok : base::tok::csv(b, ',')) { | ||
std::cout << "\"" << tok << "\"\n"; | ||
EXPECT_EQ(tok, b_result[i++]); | ||
} | ||
} | ||
|
||
int main(int argc, char** argv) | ||
{ | ||
::testing::InitGoogleTest(&argc, argv); | ||
return RUN_ALL_TESTS(); | ||
} |