diff --git a/docs/package.md b/docs/package.md
index 90a51257..b4bf2c12 100644
--- a/docs/package.md
+++ b/docs/package.md
@@ -32,10 +32,10 @@ Basic usage for the language-aware tokenizer:
 from g2p import make_tokenizer
 tokenizer = make_tokenizer("dan")
 for token in tokenizer.tokenize_text("Åh, hvordan har du det, Åbenrå?"):
-    if token["is_word"]:
-        word = token["text"]
+    if token.is_word
+        word = token.text
     else:
-        interword_punctuation_and_spaces = token["text"]
+        interword_punctuation_and_spaces = token.text
 ```
 
 Note that selecting the tokenizer language is important to make sure punctuation-like letters are handled correctly. For example `:` and `'` are punctuation in English but they will be part of the word tokens in Kanien'kéha (moh):
diff --git a/g2p/__init__.py b/g2p/__init__.py
index d4513d50..e60a2e13 100644
--- a/g2p/__init__.py
+++ b/g2p/__init__.py
@@ -16,10 +16,10 @@
     from g2p import make_tokenizer
     tokenizer = make_tokenizer(lang)
     for token in tokenizer.tokenize_text(input_text):
-        if token["is_word"]:
-            word = token["text"]
+        if token.is_word:
+            word = token.text
         else:
-            interword_punctuation_and_spaces = token["text"]
+            interword_punctuation_and_spaces = token.text
 
     from g2p import get_arpabet_langs
     LANGS, LANG_NAMES = get_arpabet_langs()
@@ -29,7 +29,7 @@
 from typing import Dict, Optional, Tuple, Union
 
 from g2p.exceptions import InvalidLanguageCode, NoPath
-from g2p.shared_types import BaseTokenizer, BaseTransducer
+from g2p.shared_types import BaseTokenizer, BaseTransducer, Token
 
 if sys.version_info < (3, 7):  # pragma: no cover
     sys.exit(
@@ -47,7 +47,7 @@ def make_g2p(  # noqa: C901
     *,
     tokenize: bool = True,
     custom_tokenizer: Optional[BaseTokenizer] = None,
-):
+) -> BaseTransducer:
     """Make a g2p Transducer for mapping text from in_lang to out_lang via the
     shortest path between them.
 
@@ -132,13 +132,13 @@ def make_g2p(  # noqa: C901
     return transducer
 
 
-def tokenize_and_map(tokenizer, transducer, input: str):
+def tokenize_and_map(tokenizer: BaseTokenizer, transducer: BaseTransducer, input: str):
     result = ""
     for token in tokenizer.tokenize_text(input):
-        if token["is_word"]:
-            result += transducer(token["text"]).output_string
+        if token.is_word:
+            result += transducer(token.text).output_string
         else:
-            result += token["text"]
+            result += token.text
     return result
 
 
@@ -213,7 +213,7 @@ def get_arpabet_langs():
         return _langs_cache, _lang_names_cache
 
 
-def make_tokenizer(in_lang=None, out_lang=None, tok_path=None):
+def make_tokenizer(in_lang=None, out_lang=None, tok_path=None) -> BaseTokenizer:
     """Make the tokenizer for input in language in_lang
 
     Logic used when only in_lang is provided:
@@ -234,3 +234,18 @@ def make_tokenizer(in_lang=None, out_lang=None, tok_path=None):
     from g2p.mappings.tokenizer import make_tokenizer as _make_tokenizer
 
     return _make_tokenizer(in_lang, out_lang, tok_path)
+
+
+# Declare what's actually part of g2p's programmatic API.
+# Please don't import anything else from g2p directly.
+__all__ = [
+    "BaseTokenizer",
+    "BaseTransducer",
+    "InvalidLanguageCode",
+    "NoPath",
+    "Token",
+    "get_arpabet_langs",
+    "make_g2p",
+    "make_tokenizer",
+    "tokenize_and_map",
+]
diff --git a/g2p/api_v2.py b/g2p/api_v2.py
index 9ed4551e..680a7f8e 100644
--- a/g2p/api_v2.py
+++ b/g2p/api_v2.py
@@ -300,7 +300,7 @@ def convert_one_writing_or_phonetic_system_to_another(  # noqa: C901
             tokenizer = g2p.make_tokenizer(in_lang)
             tokens = tokenizer.tokenize_text(request.text)
         else:
-            tokens = [{"text": request.text, "is_word": True}]
+            tokens = [g2p.Token(request.text, is_word=True)]
     except NoPath:
         raise HTTPException(
             status_code=400, detail=f"No path from {in_lang} to {out_lang}"
@@ -314,8 +314,8 @@ def convert_one_writing_or_phonetic_system_to_another(  # noqa: C901
     segments: List[Segment] = []
     for token in tokens:
         conversions: List[Conversion] = []
-        if not token["is_word"]:  # non-word, has no in_lang/out_lang
-            tg = TransductionGraph(token["text"])
+        if not token.is_word:  # non-word, has no in_lang/out_lang
+            tg = TransductionGraph(token.text)
             conv = Conversion(substring_alignments=tg.substring_alignments())
             if request.indices:
                 conv.alignments = tg.alignments()
@@ -323,7 +323,7 @@ def convert_one_writing_or_phonetic_system_to_another(  # noqa: C901
                 conv.output_nodes = list(tg.output_string)
             conversions.append(conv)
         else:
-            tg = transducer(token["text"])
+            tg = transducer(token.text)
             if request.compose_from:
                 composed_tiers: List[TransductionGraph] = []
                 for tr, tier in zip(transducer.transducers, tg.tiers):
diff --git a/g2p/mappings/tokenizer.py b/g2p/mappings/tokenizer.py
index d799241b..12b6dd2f 100644
--- a/g2p/mappings/tokenizer.py
+++ b/g2p/mappings/tokenizer.py
@@ -11,10 +11,10 @@
 
 from g2p.exceptions import MappingMissing
 from g2p.log import LOGGER
-from g2p.mappings import Mapping
+from g2p.mappings import Mapping, utils
 from g2p.mappings.langs import LANGS_NETWORK
-from g2p.mappings.utils import get_unicode_category, is_ipa, merge_if_same_label
-from g2p.shared_types import BaseTokenizer
+from g2p.mappings.utils import is_ipa
+from g2p.shared_types import BaseTokenizer, Token
 
 
 class Tokenizer(BaseTokenizer):
@@ -42,23 +42,18 @@ def is_word_character(self, c):
         if self.delim and c == self.delim:
             return True
         assert len(c) <= 1
-        if get_unicode_category(c) in ["letter", "number", "diacritic"]:
+        if utils.get_unicode_category(c) in ["letter", "number", "diacritic"]:
             return True
         return False
 
-    def tokenize_text(self, text):
+    def tokenize_text(self, text: str) -> List[Token]:
         matches = self.tokenize_aux(text)
-        units = [{"text": m, "is_word": self.is_word_character(m)} for m in matches]
+        units = [Token(m, self.is_word_character(m)) for m in matches]
         if self.dot_is_letter:
             for i, unit in enumerate(units):
-                if (
-                    unit["text"] == "."
-                    and i + 1 < len(units)
-                    and units[i + 1]["is_word"]
-                ):
-                    unit["is_word"] = True
-        units = merge_if_same_label(units, "text", "is_word")
-        return units
+                if unit.text == "." and i + 1 < len(units) and units[i + 1].is_word:
+                    unit.is_word = True
+        return utils.merge_same_type_tokens(units)
 
 
 class SpecializedTokenizer(Tokenizer):
@@ -98,6 +93,51 @@ def tokenize_aux(self, text):
         return self.regex.findall(text)
 
 
+class LexiconTokenizer(Tokenizer):
+    """Lexicon-based tokenizer will consider any entry in the lexicon a token,
+    even if it contains punctuation characters. For text not in the lexicon,
+    falls back to the default tokenization.
+    """
+
+    def __init__(self, mapping: Mapping):
+        super().__init__()
+        self.mapping = mapping
+        self.lang = mapping.language_name
+
+    def _recursive_helper(self, tokens: list, output_tokens: list):
+        """Emit the longest prefix found in the lexicon, if any, as a token.
+        If None, emit the first unit as a token.
+        Recursively process the rest of the units.
+        """
+        if not tokens:
+            return
+        if len(tokens) == 1:
+            output_tokens.append(tokens[0])
+            return
+        for i in range(len(tokens), 0, -1):
+            candidate = "".join([u.text for u in tokens[:i]])
+            if utils.find_alignment(self.mapping.alignments, candidate.lower()):
+                output_tokens.append(Token(candidate, True))
+                return self._recursive_helper(tokens[i:], output_tokens)
+        # No prefix found, emit the first unit as a token
+        output_tokens.append(tokens[0])
+        self._recursive_helper(tokens[1:], output_tokens)
+
+    def tokenize_text(self, text: str) -> List[Token]:
+        blocks = re.split(r"(\s+)", text)
+        output_tokens = []
+        for i, block in enumerate(blocks):
+            if i % 2 == 1 and block:
+                output_tokens.append(Token(block, False))
+            else:
+                default_tokens = super().tokenize_text(block)
+                # Split non-word tokens into smaller parts for lexicon lookup
+                candidate_tokens = utils.split_non_word_tokens(default_tokens)
+                self._recursive_helper(candidate_tokens, output_tokens)
+
+        return utils.merge_non_word_tokens(output_tokens)
+
+
 class MultiHopTokenizer(SpecializedTokenizer):
     def __init__(self, mappings: List[Mapping]):
         self.delim = ""
@@ -202,7 +242,10 @@ def make_tokenizer(  # noqa C901
                 # Build a one-hop tokenizer
                 try:
                     mapping = Mapping.find_mapping(in_lang=in_lang, out_lang=out_lang)
-                    self.tokenizers[tokenizer_key] = SpecializedTokenizer(mapping)
+                    if mapping.type == utils.MAPPING_TYPE.lexicon:
+                        self.tokenizers[tokenizer_key] = LexiconTokenizer(mapping)
+                    else:
+                        self.tokenizers[tokenizer_key] = SpecializedTokenizer(mapping)
                 except MappingMissing:
                     self.tokenizers[tokenizer_key] = self.tokenizers[None]
                     LOGGER.warning(
diff --git a/g2p/mappings/utils.py b/g2p/mappings/utils.py
index fcd3e294..7b248dc1 100644
--- a/g2p/mappings/utils.py
+++ b/g2p/mappings/utils.py
@@ -10,7 +10,6 @@
 import unicodedata as ud
 from bisect import bisect_left
 from collections import defaultdict
-from copy import deepcopy
 from enum import Enum
 from pathlib import Path
 from typing import (
@@ -43,6 +42,7 @@
 from g2p import exceptions
 from g2p.log import LOGGER
 from g2p.mappings import langs
+from g2p.shared_types import Token
 
 GEN_DIR = os.path.join(os.path.dirname(langs.__file__), "generated")
 GEN_CONFIG = os.path.join(GEN_DIR, "config-g2p.yaml")
@@ -151,7 +151,7 @@ def normalize(inp: str, norm_form: Union[str, None]):
     if norm_form is None or norm_form == "none":
         return unicode_escape(inp)
     if norm_form not in ["NFC", "NFD", "NFKC", "NFKD"]:
-        raise exceptions.InvalidNormalization(normalize)
+        raise exceptions.InvalidNormalization(norm_form)
     # Sadly mypy doesn't do narrowing to literals properly
     norm_form = cast(Literal["NFC", "NFD", "NFKC", "NFKD"], norm_form)
     normalized = ud.normalize(norm_form, unicode_escape(inp))
@@ -178,8 +178,8 @@ def compose_indices(
     """Compose indices1 + indices2 into direct arcs from the inputs of indices1
     to the outputs of indices 2.
 
-    E.g., [(0,1), (1,4)] composed with [(0,0), (1,2), (1,3), (4,2)] is
-    [(0,2), (0,3), (1,2)]
+    >>> compose_indices([(0,1), (1,4)], [(0,0), (1,2), (1,3), (4,2)])
+    [(0, 2), (0, 3), (1, 2)]
     """
     # for O(1) lookup of arcs leaving indices2
     indices2_as_dict = defaultdict(dict)  # type: ignore
@@ -239,7 +239,7 @@ def normalize_with_indices(
         return normalize_to_NFD_with_indices(inp, norm_form)
     if norm_form in ("none", None):
         return inp, [(i, i) for i in range(len(inp))]
-    raise exceptions.InvalidNormalization(normalize)
+    raise exceptions.InvalidNormalization(norm_form)
 
 
 def unicode_escape(text):
@@ -596,22 +596,76 @@ def ignore_aliases(self, *_args):
         return True
 
 
-def merge_if_same_label(lst_of_dicts, text_key, label_key):
-    results = []
-    current_item = None
-    for dct in lst_of_dicts:
-        if label_key not in dct:
-            dct[label_key] = None
-        if not current_item:
-            current_item = deepcopy(dct)
-        elif dct[label_key] == current_item[label_key]:
-            current_item[text_key] += dct[text_key]
+def merge_same_type_tokens(tokens: List[Token]) -> List[Token]:
+    """Merge tokens that have the same type.  Destroys tokens in the process.
+
+    >>> merge_same_type_tokens([Token("test", True), Token("b", True), Token(":", False), Token(",", False)])
+    [Token(text='testb', is_word=True), Token(text=':,', is_word=False)]
+    >>> merge_same_type_tokens([])
+    []
+    """
+    if not tokens:
+        return []
+    merged_tokens = [tokens[0]]
+    for token in tokens[1:]:
+        if token.is_word == merged_tokens[-1].is_word:
+            merged_tokens[-1].text += token.text
+        else:
+            merged_tokens.append(token)
+    return merged_tokens
+
+
+def split_non_word_tokens(tokens: List[Token]) -> List[Token]:
+    """Split non-word units into characters. Reuses the word tokens.
+
+    Generates a maximum of 5 units per non-word token: if the input token is
+    more than 5 non-word characters, the output will be the first two
+    individually, the middle as a block, and the last two individually, because
+    lexicon-based tokenization does not need more granularity than that.
+    This prevents degenerate input like a large number of consecutive punctuation
+    marks from taking quadratic time in lexicon-based tokenization.
+
+    >>> split_non_word_tokens([Token("test", True), Token(":,- ", False), Token("", False)])
+    [Token(text='test', is_word=True), Token(text=':', is_word=False), Token(text=',', is_word=False), Token(text='-', is_word=False), Token(text=' ', is_word=False)]
+    >>> split_non_word_tokens([])
+    []
+    >>> split_non_word_tokens([Token(".,.,.,.", False)])
+    [Token(text='.', is_word=False), Token(text=',', is_word=False), Token(text='.,.', is_word=False), Token(text=',', is_word=False), Token(text='.', is_word=False)]
+    """
+    new_tokens = []
+    for token in tokens:
+        if not token.is_word:
+            text = token.text
+            if len(text) > 5:
+                new_tokens.append(Token(text[0], False))
+                new_tokens.append(Token(text[1], False))
+                new_tokens.append(Token(text[2:-2], False))
+                new_tokens.append(Token(text[-2], False))
+                new_tokens.append(Token(text[-1], False))
+            else:
+                new_tokens.extend([Token(char, False) for char in text])
+        else:
+            new_tokens.append(token)
+    return new_tokens
+
+
+def merge_non_word_tokens(tokens: List[Token]) -> List[Token]:
+    """Merge consecutive non-word units into a single token. Destroys tokens in the process.
+
+    >>> merge_non_word_tokens([Token("test", True), Token(":", False), Token(",", False)])
+    [Token(text='test', is_word=True), Token(text=':,', is_word=False)]
+    >>> merge_non_word_tokens([])
+    []
+    """
+    if not tokens:
+        return tokens
+    merged_tokens = [tokens[0]]
+    for token in tokens[1:]:
+        if not token.is_word and not merged_tokens[-1].is_word:
+            merged_tokens[-1].text += token.text
         else:
-            results.append(current_item)
-            current_item = deepcopy(dct)
-    if current_item:
-        results.append(current_item)
-    return results
+            merged_tokens.append(token)
+    return merged_tokens
 
 
 CATEGORIES = {
diff --git a/g2p/shared_types.py b/g2p/shared_types.py
index 2472d0fb..1296c35f 100644
--- a/g2p/shared_types.py
+++ b/g2p/shared_types.py
@@ -4,6 +4,43 @@
 """
 
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import List
+
+from typing_extensions import deprecated
+
+
+@dataclass
+class Token:
+    """A token from the g2p tokenizer."""
+
+    text: str
+    is_word: bool
+
+    @deprecated(
+        "Accessing g2p Token objects as dicts is deprecated since g2p 2.2.0. "
+        "Please use the 'text' and 'is_word' attributes instead.",
+    )
+    def __getitem__(self, key):
+        """For backward compatibility only, allow access as if it were a dict."""
+        if key == "text":
+            return self.text
+        if key == "is_word":
+            return self.is_word
+        raise KeyError(key)
+
+    @deprecated(
+        "Accessing g2p Token objects as dicts is deprecated since g2p 2.2.0. "
+        "Please use the 'text' and 'is_word' attributes instead.",
+    )
+    def __setitem__(self, key, value):
+        """For backward compatibility only, allow setting values as if it were a dict."""
+        if key == "text":
+            self.text = value
+        elif key == "is_word":
+            self.is_word = value
+        else:
+            raise KeyError(key)
 
 
 class BaseTransducer(ABC):
@@ -13,6 +50,21 @@ class BaseTransducer(ABC):
     def __call__(self, to_convert: str):
         """Transduce to_convert."""
 
+    @property
+    @abstractmethod
+    def transducers(self):
+        """A list of BaseTransducer objects for each tier in the transducer."""
+
+    @property
+    @abstractmethod
+    def in_lang(self) -> str:
+        """The input language code of the transducer."""
+
+    @property
+    @abstractmethod
+    def out_lang(self) -> str:
+        """The output language code of the transducer."""
+
 
 class BaseTransductionGraph(ABC):
     """Base class to typecheck transduction graphs without having to import them."""
@@ -27,5 +79,5 @@ class BaseTokenizer(ABC):
     """Base class to typecheck tokenizers without having to import them."""
 
     @abstractmethod
-    def tokenize_text(self, text):
+    def tokenize_text(self, text: str) -> List[Token]:
         """Tokenize text."""
diff --git a/g2p/tests/test_tokenizer.py b/g2p/tests/test_tokenizer.py
index d51f764c..b15b8611 100755
--- a/g2p/tests/test_tokenizer.py
+++ b/g2p/tests/test_tokenizer.py
@@ -17,32 +17,50 @@ def test_tokenize_fra(self):
         tokenizer = tok.make_tokenizer("fra")
         tokens = tokenizer.tokenize_text(input)
         self.assertEqual(len(tokens), 8)
-        self.assertTrue(tokens[0]["is_word"])
-        self.assertEqual(tokens[0]["text"], "ceci")
-        self.assertFalse(tokens[1]["is_word"])
-        self.assertEqual(tokens[1]["text"], " ")
-        self.assertTrue(tokens[2]["is_word"])
-        self.assertEqual(tokens[2]["text"], "était")
-        self.assertFalse(tokens[3]["is_word"])
-        self.assertEqual(tokens[3]["text"], " '")
-        self.assertTrue(tokens[4]["is_word"])
-        self.assertEqual(tokens[4]["text"], "un")
-        self.assertFalse(tokens[5]["is_word"])
-        self.assertEqual(tokens[5]["text"], "' ")
-        self.assertTrue(tokens[6]["is_word"])
-        self.assertEqual(tokens[6]["text"], "test")
-        self.assertFalse(tokens[7]["is_word"])
-        self.assertEqual(tokens[7]["text"], ".")
+        self.assertTrue(tokens[0].is_word)
+        self.assertEqual(tokens[0].text, "ceci")
+        self.assertFalse(tokens[1].is_word)
+        self.assertEqual(tokens[1].text, " ")
+        self.assertTrue(tokens[2].is_word)
+        self.assertEqual(tokens[2].text, "était")
+        self.assertFalse(tokens[3].is_word)
+        self.assertEqual(tokens[3].text, " '")
+        self.assertTrue(tokens[4].is_word)
+        self.assertEqual(tokens[4].text, "un")
+        self.assertFalse(tokens[5].is_word)
+        self.assertEqual(tokens[5].text, "' ")
+        self.assertTrue(tokens[6].is_word)
+        self.assertEqual(tokens[6].text, "test")
+        self.assertFalse(tokens[7].is_word)
+        self.assertEqual(tokens[7].text, ".")
 
     def test_tokenize_eng(self):
         input = "This is éçà test."
         tokenizer = tok.make_tokenizer("eng")
         tokens = tokenizer.tokenize_text(input)
         self.assertEqual(len(tokens), 8)
-        self.assertTrue(tokens[0]["is_word"])
-        self.assertEqual(tokens[0]["text"], "This")
-        self.assertFalse(tokens[1]["is_word"])
-        self.assertEqual(tokens[1]["text"], " ")
+        self.assertTrue(tokens[0].is_word)
+        self.assertEqual(tokens[0].text, "This")
+        self.assertFalse(tokens[1].is_word)
+        self.assertEqual(tokens[1].text, " ")
+
+    def test_lexicon_tokenizer(self):
+        tokenizer = tok.make_tokenizer("eng")
+        tests = [
+            ("It's", ["It's"]),
+            ("'cause", ["'cause"]),
+            ('"\'cause"', ['"', "'cause", '"']),
+            ("aardvark's", ["aardvark", "'s"]),
+            ("'aardvark's'", ["'", "aardvark", "'s", "'"]),
+            ("ten a.m.", ["ten", " ", "a.m."]),
+            ('ten "a.m.,!"', ["ten", ' "', "a.m.", ',!"']),
+            ("all-out war", ["all-out", " ", "war"]),  # all-out is in the lexicon
+            ("all-in: nonsense", ["all", "-", "in", ": ", "nonsense"]),  # all-in is not
+        ]
+        for input_text, expected_tokens in tests:
+            with self.subTest(input_text=input_text):
+                tokens = tokenizer.tokenize_text(input_text)
+                self.assertEqual([x.text for x in tokens], expected_tokens)
 
     def test_tokenize_win(self):
         """win is easy to tokenize because win -> win-ipa exists and has ' in its inventory"""
@@ -52,8 +70,8 @@ def test_tokenize_win(self):
         tokenizer = tok.make_tokenizer("win")
         tokens = tokenizer.tokenize_text(input)
         self.assertEqual(len(tokens), 1)
-        self.assertTrue(tokens[0]["is_word"])
-        self.assertEqual(tokens[0]["text"], "p'ōį̄ą")
+        self.assertTrue(tokens[0].is_word)
+        self.assertEqual(tokens[0].text, "p'ōį̄ą")
 
     def test_tokenize_tce(self):
         """tce is hard to tokenize correctly because we have tce -> tce-equiv -> tce-ipa, and ' is
@@ -71,14 +89,14 @@ def test_tokenize_tce(self):
         tokenizer = tok.make_tokenizer("tce")
         tokens = tokenizer.tokenize_text(input)
         self.assertEqual(len(tokens), 1)
-        self.assertTrue(tokens[0]["is_word"])
-        self.assertEqual(tokens[0]["text"], "ts'nj")
+        self.assertTrue(tokens[0].is_word)
+        self.assertEqual(tokens[0].text, "ts'nj")
 
     def test_tokenize_tce_equiv(self):
         input = "ts'e ts`e ts‘e ts’"
         self.assertEqual(len(tok.make_tokenizer("fra").tokenize_text(input)), 14)
         # tce_tokens = tok.make_tokenizer("tce").tokenize_text(input)
-        # LOGGER.warning([x["text"] for x in tce_tokens])
+        # LOGGER.warning([x.text for x in tce_tokens])
         self.assertEqual(len(tok.make_tokenizer("tce").tokenize_text(input)), 7)
 
     def test_tokenizer_identity_tce(self):
diff --git a/g2p/tests/test_utils.py b/g2p/tests/test_utils.py
index 5d9b003a..44c8ce2d 100755
--- a/g2p/tests/test_utils.py
+++ b/g2p/tests/test_utils.py
@@ -4,9 +4,11 @@
 """
 
 import doctest
+import io
 import os
 import re
 from collections import defaultdict
+from contextlib import redirect_stderr
 from pathlib import Path
 from unittest import TestCase, main
 
@@ -14,9 +16,9 @@
 from pep440 import is_canonical
 
 import g2p
+import g2p.exceptions
 from g2p import get_arpabet_langs
 from g2p._version import VERSION, version_tuple
-from g2p.exceptions import IncorrectFileType, RecursionError
 from g2p.log import LOGGER
 from g2p.mappings import Mapping, utils
 from g2p.mappings.utils import RULE_ORDERING_ENUM, Rule
@@ -60,7 +62,7 @@ def test_abb_expand(self):
         )  # shouldn't allow self-referential abbreviations
         expanded_plain = utils.expand_abbreviations("test", test_dict)
         expanded_bad_plain = utils.expand_abbreviations("test", bad_dict)
-        with self.assertRaises(RecursionError):
+        with self.assertRaises(g2p.exceptions.RecursionError):
             utils.expand_abbreviations("HIGH_VOWELS", bad_dict)
         expanded_non_recursive = utils.expand_abbreviations("HIGH_VOWELS", test_dict)
         expanded_recursive = utils.expand_abbreviations("VOWELS", test_dict)
@@ -156,7 +158,7 @@ def test_escape_special(self):
         )
 
     def test_load_abbs(self):
-        with self.assertRaises(IncorrectFileType):
+        with self.assertRaises(g2p.exceptions.IncorrectFileType):
             utils.load_abbreviations_from_file(
                 os.path.join(PUBLIC_DIR, "mappings", "abbreviations.json")
             )
@@ -212,6 +214,10 @@ def test_generated_mapping(self):
             test_config_added.display_name, "test custom to test-out custom"
         )
 
+    def test_bad_normalization(self):
+        with self.assertRaises(g2p.exceptions.InvalidNormalization):
+            utils.normalize_with_indices("test", "bad")
+
     def test_normalize_to_NFD_with_indices(self):
         # Usefull site to get combining character code points:
         # http://www.alanwood.net/unicode/combining_diacritical_marks.html
@@ -323,6 +329,34 @@ def test_scm_pretend_version_is_up_to_date(self):
             # This is fine, it's only used in development
             pass
 
+    def test_token_class(self):
+        from g2p.shared_types import Token
+
+        t1 = Token("test", True)
+        t2 = Token(":", False)
+
+        f = io.StringIO()
+        with redirect_stderr(f):
+            # Current usage and deprecated usage
+            for t in t1, t2:
+                self.assertEqual(t.text, t["text"])
+                self.assertEqual(t.is_word, t["is_word"])
+            # new way to set
+            t1.text = "test2"
+            t1.is_word = False
+            self.assertEqual(t1.text, "test2")
+            self.assertEqual(t1.is_word, False)
+            # deprecated way to set
+            t1["text"] = "test3"
+            t1["is_word"] = True
+            self.assertEqual(t1.text, "test3")
+            self.assertEqual(t1.is_word, True)
+
+            with self.assertRaises(KeyError):
+                t1["bad_key"] = "test"
+            with self.assertRaises(KeyError):
+                _ = t2["bad_key"]
+
 
 if __name__ == "__main__":
     main()
diff --git a/g2p/transducer/__init__.py b/g2p/transducer/__init__.py
index e02835a9..8a167660 100644
--- a/g2p/transducer/__init__.py
+++ b/g2p/transducer/__init__.py
@@ -1219,11 +1219,11 @@ def __call__(self, to_convert: str):
         tg.clear_debugger()  # clear the meaningless initial debugger
 
         for token in self._tokenizer.tokenize_text(to_convert):
-            if token["is_word"]:
-                word_tg = self._transducer(token["text"])
+            if token.is_word:
+                word_tg = self._transducer(token.text)
                 tg += word_tg
             else:
-                non_word_tg = TransductionGraph(token["text"])
+                non_word_tg = TransductionGraph(token.text)
                 tg += non_word_tg
         return tg
 
@@ -1256,8 +1256,8 @@ def check(self, tg: TransductionGraph, shallow=False, display_warnings=False):
         # by step. I don't like this solution, but I don't see how to get around it.
         result = True
         for token in self._tokenizer.tokenize_text(tg.input_string):
-            if token["is_word"] and not self._transducer.check(
-                self._transducer(token["text"]),
+            if token.is_word and not self._transducer.check(
+                self._transducer(token.text),
                 shallow,
                 display_warnings=display_warnings,
             ):