-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Color and font information for chars, words and boxes (#39)
The information originates in chars, but is pushed to words, lines and boxes in case the values don't differ.
- Loading branch information
1 parent
518ead3
commit 62086d0
Showing
6 changed files
with
200 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
"""Test catalog extraction.""" | ||
|
||
import libpdf | ||
from tests.conftest import PDF_COLOR_STYLE | ||
|
||
|
||
def test_colors_0() -> None: | ||
"""Test word colors in given chapter paragraph.""" | ||
objects = libpdf.load(PDF_COLOR_STYLE) | ||
assert objects is not None | ||
assert objects.flattened.chapters | ||
|
||
for chapter in objects.flattened.chapters: | ||
if chapter.title == "Color in Text and Heading": | ||
assert chapter.textbox.ncolor == (1, 0, 0) | ||
|
||
|
||
def test_colors_1() -> None: | ||
"""Test word colors in given chapter paragraph.""" | ||
objects = libpdf.load(PDF_COLOR_STYLE) | ||
assert objects is not None | ||
assert objects.flattened.chapters | ||
|
||
for chapter in objects.flattened.chapters: | ||
if chapter.title == "HorizontalLine": | ||
for content in chapter.content: | ||
if ( | ||
content.type == "paragraph" | ||
and "Paragraph text is blue" in content.textbox.text | ||
): | ||
assert content.textbox.ncolor == (0, 0, 1) | ||
if ( | ||
content.type == "paragraph" | ||
and "This chapter is for" in content.textbox.text | ||
): | ||
assert content.textbox.ncolor == (0, 0, 0) | ||
|
||
|
||
def test_colors_2() -> None: | ||
"""Test word colors in given chapter paragraph.""" | ||
objects = libpdf.load(PDF_COLOR_STYLE) | ||
assert objects is not None | ||
assert objects.flattened.chapters | ||
|
||
for chapter in objects.flattened.chapters: | ||
if chapter.title == "HorizontalBox": | ||
for content in chapter.content: | ||
if content.type == "paragraph": | ||
assert content.textbox.ncolor == (0, 1, 0) | ||
elif chapter.title == "UncoloredHorizontalbox": | ||
for content in chapter.content: | ||
if content.type == "paragraph": | ||
assert content.textbox.ncolor is None | ||
for line in content.textbox.lines: | ||
assert line.ncolor is not None | ||
|
||
|
||
def test_colors_3() -> None: | ||
"""Test word colors in given chapter paragraph.""" | ||
objects = libpdf.load(PDF_COLOR_STYLE) | ||
assert objects is not None | ||
assert objects.flattened.chapters | ||
|
||
for chapter in objects.flattened.chapters: | ||
if "Words" in chapter.title: | ||
for content in chapter.content: | ||
if ( | ||
content.type == "paragraph" | ||
and "This line has no color" in content.textbox.text | ||
): | ||
assert content.textbox.ncolor is None | ||
|
||
for word in content.textbox.words: | ||
if word.text == "has": | ||
assert word.ncolor == (0, 0, 1) | ||
elif word.text == "color": | ||
assert word.ncolor in [(0, 1, 0), (0, 0, 0)] | ||
elif word.text == "changes": | ||
assert word.ncolor == (1, 0, 0) | ||
elif word.text == "words": | ||
assert word.ncolor == (0, 0, 1) | ||
|
||
|
||
def test_colors_4() -> None: | ||
"""Test word colors in given chapter paragraph.""" | ||
objects = libpdf.load(PDF_COLOR_STYLE) | ||
assert objects is not None | ||
assert objects.flattened.chapters | ||
|
||
for chapter in objects.flattened.chapters: | ||
if "Words" in chapter.title: | ||
for content in chapter.content: | ||
if "This words have no color" in content.textbox.text: | ||
assert content.textbox.ncolor is None | ||
|
||
for word in content.textbox.words: | ||
assert word.ncolor is None or word.ncolor == (0, 0, 0) | ||
|
||
|
||
def test_colors_5() -> None: | ||
"""Test word colors in given chapter paragraph.""" | ||
objects = libpdf.load(PDF_COLOR_STYLE) | ||
assert objects is not None | ||
assert objects.flattened.chapters | ||
|
||
for chapter in objects.flattened.chapters: | ||
if "Words" in chapter.title: | ||
for content in chapter.content: | ||
if "These words are printed" in content.textbox.text: | ||
assert content.textbox.ncolor is None | ||
|
||
for word in content.textbox.words: | ||
if word.text in ["words", "but"]: | ||
assert word.ncolor == (0, 1, 0) | ||
elif word.text == "printed": | ||
assert word.ncolor == (0, 0, 1) | ||
elif word.text == "background": | ||
assert word.ncolor == (1, 0, 0) | ||
|
||
|
||
def test_colors_6() -> None: | ||
"""Test word colors in given chapter paragraph.""" | ||
objects = libpdf.load(PDF_COLOR_STYLE) | ||
assert objects is not None | ||
assert objects.flattened.chapters | ||
|
||
for chapter in objects.flattened.chapters: | ||
if "Styled Text" in chapter.title: | ||
for content in chapter.content: | ||
if "bold text format" in content.textbox.text: | ||
for word in content.textbox.words: | ||
if word.text == "bold": | ||
assert "Bold" in word.fontname | ||
else: | ||
assert "Bold" not in word.fontname | ||
elif "italic text format" in content.textbox.text: | ||
if word.text == "italic": | ||
assert "Italic" in word.fontname | ||
else: | ||
assert "Italic" not in word.fontname | ||
elif "underline text format" in content.textbox.text: | ||
# this seems to be exracted as rect | ||
pass |