Skip to content

Commit

Permalink
Add base::tok::split_tokens() as a better alternative to split_strings()
Browse files Browse the repository at this point in the history
  • Loading branch information
dacap committed Jun 6, 2024
1 parent c82fd98 commit 5de03f6
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 0 deletions.
113 changes: 113 additions & 0 deletions base/tok.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// LAF Base Library
// Copyright (c) 2024 Igara Studio S.A.
// Copyright (c) 2020 David Capello
//
// This file is released under the terms of the MIT license.
// Read LICENSE.txt for more information.
//
// Based on https://github.com/dacap/tok

#ifndef BASE_TOK_H_INCLUDED
#define BASE_TOK_H_INCLUDED
#pragma once

#include <iterator>
#include <string>

namespace base {
namespace tok {

struct ignore_empties { enum { allow_empty = false }; };
struct include_empties { enum { allow_empty = true }; };

template<typename T, typename EmptyPolicy>
class token_iterator {
public:
using iterator_category = std::forward_iterator_tag;
using internal_iterator = typename T::const_iterator;
using char_type = typename T::value_type;
using value_type = typename std::remove_const<T>::type;
using difference_type = typename T::difference_type;
using pointer = T*;
using reference = T&;
using const_reference = const T&;

token_iterator() = delete;
token_iterator(const token_iterator&) = default;
token_iterator(const internal_iterator& begin,
const internal_iterator& end,
char_type chr) :
begin_(begin),
inter_(begin),
end_(end),
chr_(chr) {
operator++(); // Find first word to fill "str_" field
}

token_iterator& operator++() {
if constexpr (EmptyPolicy::allow_empty) {
if (inter_ != end_ && *inter_ == chr_) {
++inter_;
}
}
else {
while (inter_ != end_ && *inter_ == chr_) {
++inter_;
}
}
begin_ = inter_;
while (inter_ != end_ && *inter_ != chr_) {
++inter_;
}
str_.assign(begin_, inter_);
return *this;
}

const_reference operator*() {
return str_;
}

bool operator!=(const token_iterator& that) const {
return (begin_ != that.end_);
}

private:
internal_iterator begin_, inter_, end_;
char_type chr_;
value_type str_;
};

template<typename T, typename Empties>
class token_range {
public:
using char_type = typename T::value_type;
using iterator = token_iterator<T, Empties>;

token_range(const T& str, char_type chr) : str_(str), chr_(chr) { }

iterator begin() const { return iterator(str_.begin(), str_.end(), chr_); }
iterator end() const { return iterator(str_.end(), str_.end(), chr_); }

private:
const T& str_;
char_type chr_;
};

template<typename T>
token_range<T, ignore_empties>
split_tokens(const T& str,
typename T::value_type chr) {
return token_range<T, ignore_empties>(str, chr);
}

template<typename T>
token_range<T, include_empties>
csv(const T& str,
typename T::value_type chr = ',') {
return token_range<T, include_empties>(str, chr);
}

} // namespace tok
} // namespace base

#endif
44 changes: 44 additions & 0 deletions base/tok_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// LAF Base Library
// Copyright (c) 2024 Igara Studio S.A.
// Copyright (c) 2020 David Capello
//
// This file is released under the terms of the MIT license.
// Read LICENSE.txt for more information.
//
// Based on https://github.com/dacap/tok

#include <gtest/gtest.h>

#include <iostream>
#include <string>
#include <vector>

#include "base/tok.h"

TEST(Tok, SplitTokens)
{
int i = 0;
auto a_result = std::vector<std::string>{ "This", "is", "a", "phrase.", "Several", "whitespaces", "are", "ignored." };
std::string a = "This is a phrase. Several whitespaces are ignored.";
for (auto& tok : base::tok::split_tokens(a, ' ')) {
std::cout << "\"" << tok << "\"\n";
EXPECT_EQ(tok, a_result[i++]);
}
}

TEST(Tok, Csv)
{
int i = 0;
auto b_result = std::vector<std::string>{ "In comma", "separated", "", "values", "", "", "empties are included" };
std::string b = "In comma,separated,,values,,,empties are included";
for (auto& tok : base::tok::csv(b, ',')) {
std::cout << "\"" << tok << "\"\n";
EXPECT_EQ(tok, b_result[i++]);
}
}

int main(int argc, char** argv)
{
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

0 comments on commit 5de03f6

Please sign in to comment.