From 5de03f63ab605992f00c5df42cf8b8220e5480ef Mon Sep 17 00:00:00 2001 From: David Capello Date: Thu, 6 Jun 2024 11:41:29 -0300 Subject: [PATCH] Add base::tok::split_tokens() as a better alternative to split_strings() --- base/tok.h | 113 +++++++++++++++++++++++++++++++++++++++++++++ base/tok_tests.cpp | 44 ++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 base/tok.h create mode 100644 base/tok_tests.cpp diff --git a/base/tok.h b/base/tok.h new file mode 100644 index 000000000..e7c870d76 --- /dev/null +++ b/base/tok.h @@ -0,0 +1,113 @@ +// LAF Base Library +// Copyright (c) 2024 Igara Studio S.A. +// Copyright (c) 2020 David Capello +// +// This file is released under the terms of the MIT license. +// Read LICENSE.txt for more information. +// +// Based on https://github.com/dacap/tok + +#ifndef BASE_TOK_H_INCLUDED +#define BASE_TOK_H_INCLUDED +#pragma once + +#include +#include + +namespace base { +namespace tok { + +struct ignore_empties { enum { allow_empty = false }; }; +struct include_empties { enum { allow_empty = true }; }; + +template +class token_iterator { +public: + using iterator_category = std::forward_iterator_tag; + using internal_iterator = typename T::const_iterator; + using char_type = typename T::value_type; + using value_type = typename std::remove_const::type; + using difference_type = typename T::difference_type; + using pointer = T*; + using reference = T&; + using const_reference = const T&; + + token_iterator() = delete; + token_iterator(const token_iterator&) = default; + token_iterator(const internal_iterator& begin, + const internal_iterator& end, + char_type chr) : + begin_(begin), + inter_(begin), + end_(end), + chr_(chr) { + operator++(); // Find first word to fill "str_" field + } + + token_iterator& operator++() { + if constexpr (EmptyPolicy::allow_empty) { + if (inter_ != end_ && *inter_ == chr_) { + ++inter_; + } + } + else { + while (inter_ != end_ && *inter_ == chr_) { + ++inter_; + } + } + begin_ = inter_; + while (inter_ != end_ && *inter_ != chr_) { + ++inter_; + } + str_.assign(begin_, inter_); + return *this; + } + + const_reference operator*() { + return str_; + } + + bool operator!=(const token_iterator& that) const { + return (begin_ != that.end_); + } + +private: + internal_iterator begin_, inter_, end_; + char_type chr_; + value_type str_; +}; + +template +class token_range { +public: + using char_type = typename T::value_type; + using iterator = token_iterator; + + token_range(const T& str, char_type chr) : str_(str), chr_(chr) { } + + iterator begin() const { return iterator(str_.begin(), str_.end(), chr_); } + iterator end() const { return iterator(str_.end(), str_.end(), chr_); } + +private: + const T& str_; + char_type chr_; +}; + +template +token_range +split_tokens(const T& str, + typename T::value_type chr) { + return token_range(str, chr); +} + +template +token_range +csv(const T& str, + typename T::value_type chr = ',') { + return token_range(str, chr); +} + +} // namespace tok +} // namespace base + +#endif diff --git a/base/tok_tests.cpp b/base/tok_tests.cpp new file mode 100644 index 000000000..140848c5c --- /dev/null +++ b/base/tok_tests.cpp @@ -0,0 +1,44 @@ +// LAF Base Library +// Copyright (c) 2024 Igara Studio S.A. +// Copyright (c) 2020 David Capello +// +// This file is released under the terms of the MIT license. +// Read LICENSE.txt for more information. +// +// Based on https://github.com/dacap/tok + +#include + +#include +#include +#include + +#include "base/tok.h" + +TEST(Tok, SplitTokens) +{ + int i = 0; + auto a_result = std::vector{ "This", "is", "a", "phrase.", "Several", "whitespaces", "are", "ignored." }; + std::string a = "This is a phrase. Several whitespaces are ignored."; + for (auto& tok : base::tok::split_tokens(a, ' ')) { + std::cout << "\"" << tok << "\"\n"; + EXPECT_EQ(tok, a_result[i++]); + } +} + +TEST(Tok, Csv) +{ + int i = 0; + auto b_result = std::vector{ "In comma", "separated", "", "values", "", "", "empties are included" }; + std::string b = "In comma,separated,,values,,,empties are included"; + for (auto& tok : base::tok::csv(b, ',')) { + std::cout << "\"" << tok << "\"\n"; + EXPECT_EQ(tok, b_result[i++]); + } +} + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}