3 # Copyright(c)2008-2010 Internet Archive. Software license AGPL version 3.
5 # This file is part of BookReader.
7 # BookReader is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # BookReader is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU Affero General Public License for more details.
17 # You should have received a copy of the GNU Affero General Public License
18 # along with BookReader. If not, see <http://www.gnu.org/licenses/>.
20 # The BookReader source is hosted at http://github.com/openlibrary/bookreader/
23 #watch out for blank lines (<LINE></LINE>)
25 from lxml import etree
34 pageNum = int(sys.argv[2])
35 callback = sys.argv[3]
37 if not re.match('^/\d{1,2}/items/.+_djvu.xml$', path):
40 if ('ttsNextPageCB' != callback):
41 callback = 'ttsStartCB'
43 tree = etree.parse(path)
45 objects = tree.findall('//OBJECT')
47 #print 'got %s objects' % len(objects)
49 if pageNum > (len(objects)-1):
52 page = objects[pageNum]
54 lines = page.findall('.//LINE')
56 #print 'got %s .//lines' % len(lines)
73 words = line.findall('.//WORD')
75 #print 'at start of line, rects ='
83 #print 'got text ' + text
85 coords = word.get('coords').split(',') #l,b,r,t
86 coords = map(int, coords)
88 if int(coords[0]) < left:
91 if coords[1] > bottom:
100 block += word.text + ' '
103 if text.endswith('.') and (numWords>minWordsInBlock):
104 #print 'end of block with numWords=%d' % numWords
105 #print 'block = ' + block
107 rects.append([left, bottom, right, top])
109 #textBlocks.append(block.strip())
110 rects.insert(0, block.strip())
111 textBlocks.append(rects)
122 if numWordsInLine > 0:
123 rects.append([left, bottom, right, top])
125 if numWords>maxWordsInBlock:
126 #textBlocks.append(block.strip())
127 rects.insert(0, block.strip())
128 textBlocks.append(rects)
133 #print 'at end of line, rects ='
137 #textBlocks.append(block.strip())
138 rects.insert(0, block.strip())
139 textBlocks.append(rects)
141 print 'br.%s(%s);' % (callback, json.dumps(textBlocks))