Skip to content

Commit

Permalink
allow more flexibility for OCR variations in a PDFParser test
Browse files Browse the repository at this point in the history
  • Loading branch information
tballison committed May 25, 2018
1 parent 7e3e34c commit ac73693
Showing 1 changed file with 7 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1252,11 +1252,14 @@ public void testEmbeddedDocsWithOCROnly() throws Exception {
assertContains("Needle", xmlResult.xml);
if (! strategy.equals(PDFParserConfig.OCR_STRATEGY.NO_OCR)) {
// Tesseract may see the t in haystack as a ! some times...
String div = "<div class=\"ocr\">pdf_hays";
if (xmlResult.xml.contains(div+"!ack")) {
assertContains(div+"!ack", xmlResult.xml);
//or it might see dehayslack...
//TODO: figure out how to make this test less hacky
String div = "<div class=\"ocr\">";
if (xmlResult.xml.contains(div+"pdf_hays!ack")) {
} else if (xmlResult.xml.contains(div+"pdf_haystack")) {
} else if (xmlResult.xml.contains(div+"dehayslack")) {
} else {
assertContains(div+"tack", xmlResult.xml);
fail("couldn't find acceptable variants of haystack");
}
} else {
assertNotContained("<div class=\"ocr\">pdf_haystack", xmlResult.xml);
Expand Down

0 comments on commit ac73693

Please # to comment.