Skip to content

Commit

Permalink
smallish bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom committed Apr 5, 2013
1 parent ebfd7de commit a79c658
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 9 deletions.
16 changes: 11 additions & 5 deletions ocrolib/lstm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import common as ocrolib
import pdb
from pylab import *
import sys
Expand Down Expand Up @@ -368,7 +369,8 @@ def forward(self,xs):
ni,ns,na = self.dims
assert len(xs[0])==ni
n = len(xs)
if n>len(self.gi): raise RangeError("input too large")
if n>len(self.gi):
raise ocrolib.RecognitionError("input too large for LSTM model")
self.last_n = n
self.reset(n)
for t in range(n):
Expand Down Expand Up @@ -398,7 +400,8 @@ def forward(self,xs):
def backward(self,deltas):
"""Perform backward propagation of deltas."""
n = len(deltas)
if n>len(self.gi): raise RangeError("input too large")
if n>len(self.gi):
raise ocrolib.RecognitionError("input too large")
assert n==self.last_n
ni,ns,na = self.dims
for t in reversed(range(n)):
Expand Down Expand Up @@ -595,7 +598,8 @@ def log_mul(x,y):

def log_add(x,y):
"Perform addition in the log domain."
return where(abs(x-y)>10,maximum(x,y),log(exp(x-y)+1)+y)
#return where(abs(x-y)>10,maximum(x,y),log(exp(x-y)+1)+y)
return where(abs(x-y)>10,maximum(x,y),log(exp(clip(x-y,-20,20))+1)+y)

def forward_algorithm(match,skip=-5.0):
"""Apply the forward algorithm to an array of log state
Expand Down Expand Up @@ -631,9 +635,11 @@ def ctc_align_targets(outputs,targets,threshold=100.0,verbose=0,debug=0,lo=1e-5)
assert not isnan(lmatch).any()
both = forwardbackward(lmatch)
epath = exp(both-amax(both))
epath /= sum(epath,axis=0)[newaxis,:]
l = sum(epath,axis=0)[newaxis,:]
epath /= where(l==0.0,1e-9,l)
aligned = maximum(lo,dot(epath,targets))
aligned /= sum(aligned,axis=1)[:,newaxis]
l = sum(aligned,axis=1)[:,newaxis]
aligned /= where(l==0.0,1e-9,l)
if debug:
subplot(413); imshow(epath.T,cmap=cm.hot,interpolation='nearest')
subplot(414); imshow(aligned.T,cmap=cm.hot,interpolation='nearest')
Expand Down
2 changes: 1 addition & 1 deletion ocropus-gtedit
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ p_org.add_argument('files',nargs='+')

p_html = subparsers.add_parser("html",help="generate html")
p_html.add_argument('-d','--debug',action="store_true")
p_html.add_argument('-o','--output',default="correct.html")
p_html.add_argument('-o','--output',default="correction.html")
p_html.add_argument('-x','--extension',default='.txt')
p_html.add_argument('-f','--fontsize',default=14,type=int)
p_html.add_argument('-H','--height',default=24,type=int)
Expand Down
6 changes: 4 additions & 2 deletions ocropus-hocr
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ dirs = [ocrolib.allsplitext(name)[0] for name in args.files]
xhfiles = python.sum([glob.glob(d+"/??????.xheight") for d in dirs],[])
if len(xhfiles)>5:
xheights = [float(ocrolib.read_text(f)) for f in xhfiles]
median_xheight = median(xheights)
if len(xheights)>0:
median_xheight = median(xheights)
else:
lfiles = python.sum([glob.glob(d+"/??????.bin.png") for d in dirs],[])
pyrandom.shuffle(lfiles)
median_xheight = 0.5*median([imread(f).shape[0] for f in lfiles[:100]])
if len(lfiles)>0:
median_xheight = 0.5*median([imread(f).shape[0] for f in lfiles[:100]])
E("median_xheight",median_xheight)

P(hocr.header())
Expand Down
3 changes: 3 additions & 0 deletions ocropus-rpred
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ def process1(arg):
def safe_process1(arg):
try:
return process1(arg)
except ocrolib.RecognitionError as e:
print e
return None
except:
traceback.print_exc()
return None
Expand Down
4 changes: 3 additions & 1 deletion ocropus-rtrain
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import os.path
import glob
import ocrolib
import argparse
import lstm
import scipy
import matplotlib
import numpy
Expand Down Expand Up @@ -218,6 +217,9 @@ for trial in range(start,args.ntrain):
else:
assert "dew.png" in fname,"input must already be dewarped"

if line.size<10 or amax(line)==amin(line):
print "EMPTY-INPUT"
continue
line = line * 1.0/amax(line)
line = amax(line)-line
line = line.T
Expand Down

0 comments on commit a79c658

Please # to comment.