From 9941099b80dee1db6bfc747535b9d822b0fb0617 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 23 Apr 2022 14:50:17 +0200 Subject: [PATCH] TST: Newlines in text extraction (#807) --- Resources/crazyones.txt | 19 ++++++++++++++++++- Tests/test_workflows.py | 5 ++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Resources/crazyones.txt b/Resources/crazyones.txt index 468a57e90..ed75e0aae 100644 --- a/Resources/crazyones.txt +++ b/Resources/crazyones.txt @@ -1 +1,18 @@ - The Cr azy Ones Octob er 14, 1998 Heres to the crazy ones. The mis˝ts. The reb els. The troublemak ers. The round p egs in the square holes. The ones who see things di˙eren tly . Theyre not fond of rules. And they ha v e no resp ect for the status quo. Y ou can quote them, disagree with them, glorify or vilify them. Ab out the only thing y ou cant do is ignore them. Because they c hange things. They in v en t. They imagine. They heal. They explore. They create. They inspire. They push the h uman race forw ard. Ma yb e they ha v e to b e crazy . Ho w else can y ou stare at an empt y can v as and see a w ork of art? Or sit in silence and hear a song thats nev er b een written? Or gaze at a red planet and see a lab oratory on wheels? W e mak e to ols for these kinds of p eople. While some see them as the crazy ones, w e see genius. Because the p eople who are crazy enough to think they can c hange the w orld, are the ones who do. \ No newline at end of file + The Cr azy Ones + Octob er 14, 1998 + Heres to the crazy ones. The mis˝ts. The reb els. The troublemak ers. + The round p egs in the square holes. + The ones who see things di˙eren tly . Theyre not fond of rules. And + they ha v e no resp ect for the status quo. Y ou can quote them, + disagree with them, glorify or vilify them. + Ab out the only thing y ou cant do is ignore them. Because they c hange + things. They in v en t. They imagine. They heal. They explore. They + create. They inspire. They push the h uman race forw ard. + Ma yb e they ha v e to b e crazy . + Ho w else can y ou stare at an empt y can v as and see a w ork of art? Or + sit in silence and hear a song thats nev er b een written? Or gaze at + a red planet and see a lab oratory on wheels? + W e mak e to ols for these kinds of p eople. + While some see them as the crazy ones, w e see genius. Because the + p eople who are crazy enough to think they can c hange the w orld, + are the ones who do. diff --git a/Tests/test_workflows.py b/Tests/test_workflows.py index eb313fa8c..d1d5c15e5 100644 --- a/Tests/test_workflows.py +++ b/Tests/test_workflows.py @@ -31,9 +31,12 @@ def test_PdfReaderFileLoad(): with open(os.path.join(RESOURCE_ROOT, "crazyones.txt"), "rb") as pdftext_file: pdftext = pdftext_file.read() - text = page.extractText().replace("\n", "").encode("utf-8") + text = page.extractText().encode("utf-8") # Compare the text of the PDF to a known source + for expected_line, actual_line in zip(text.split(b"\n"), pdftext.split(b"\n")): + assert expected_line == actual_line + assert text == pdftext, ( "PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n" % (pdftext, text)