4 Builds metadata about a book on the Internet Archive in json(p) format so that the book
5 can be accessed by other software including the Internet Archive BookReader.
7 Michael Ang <http://github.com/mangtronix>
9 Copyright (c) 2008-2010 Internet Archive. Software license AGPL version 3.
11 This file is part of BookReader.
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 class BookReaderMeta {
29 // Builds metadata object (to be encoded as JSON)
30 function buildMetadata($id, $itemPath, $bookId, $server) {
37 $subItemPath = $itemPath . '/' . $bookId;
40 $this->BRFatal("No identifier specified!");
43 if ("" == $itemPath) {
44 $this->BRFatal("No itemPath specified!");
48 $this->BRFatal("No server specified!");
51 if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
52 $this->BRFatal("Bad id!");
55 // XXX check here that subitem is okay
57 $filesDataFile = "$itemPath/${id}_files.xml";
59 if (file_exists($filesDataFile)) {
60 $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
62 $this->BRfatal("File metadata not found!");
65 $imageStackInfo = $this->findImageStack($bookId, $filesData);
66 if ($imageStackInfo['imageFormat'] == 'unknown') {
67 $this->BRfatal('Couldn\'t find image stack');
70 $imageFormat = $imageStackInfo['imageFormat'];
71 $archiveFormat = $imageStackInfo['archiveFormat'];
72 $imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
74 if ("unknown" == $imageFormat) {
75 $this->BRfatal("Unknown image format");
78 if ("unknown" == $archiveFormat) {
79 $this->BRfatal("Unknown archive format");
83 $scanDataFile = "${subItemPath}_scandata.xml";
84 $scanDataZip = "$itemPath/scandata.zip";
85 if (file_exists($scanDataFile)) {
86 $scanData = simplexml_load_file($scanDataFile);
87 } else if (file_exists($scanDataZip)) {
88 $cmd = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml';
89 exec($cmd, $output, $retval);
91 $this->BRFatal("Could not unzip ScanData!");
94 $dump = join("\n", $output);
95 $scanData = simplexml_load_string($dump);
96 } else if (file_exists("$itemPath/scandata.xml")) {
97 // For e.g. Scribe v.0 books!
98 $scanData = simplexml_load_file("$itemPath/scandata.xml");
100 $this->BRFatal("ScanData file not found!");
103 $metaDataFile = "$itemPath/{$id}_meta.xml";
104 if (!file_exists($metaDataFile)) {
105 $this->BRFatal("MetaData file not found!");
109 $metaData = simplexml_load_file($metaDataFile);
111 /* Find pages by type */
113 $coverLeafs = array();
114 foreach ($scanData->pageData->page as $page) {
115 if (("Title Page" == $page->pageType) || ("Title" == $page->pageType)) {
116 if ('' == $titleLeaf) {
118 $titleLeaf = "{$page['leafNum']}";
122 if (('Cover' == $page->pageType) || ('Cover Page' == $page->pageType)) {
123 array_push($coverLeafs, $page['leafNum']);
127 // These arrays map accessible page index numbers to width, height, scanned leaf numbers
128 // and page number strings (NB: these may not be unique)
129 $pageWidths = array();
130 $pageHeights = array();
134 foreach ($scanData->pageData->page as $page) {
135 if ($this->shouldAddPage($page)) {
136 $pageWidths[$i] = intval($page->cropBox->w);
137 $pageHeights[$i] = intval($page->cropBox->h);
138 $totalHeight += intval($page->cropBox->h/4) + 10;
139 $leafNums[$i] = intval($page['leafNum']);
140 $pageNums[$i] = $page->pageNumber . '';
145 # Load some values from meta.xml
146 $pageProgression = 'lr'; // default
147 if ('' != $metaData->{'page-progression'}) {
148 $pageProgression = $metaData->{"page-progression"};
152 $response['title'] = $metaData->title . ''; // XXX renamed
153 $response['numPages'] = count($pageNums); // XXX renamed
154 if ('' != $titleLeaf) {
155 $response['titleLeaf'] = $titleLeaf; // XXX change to titleIndex - do leaf mapping here
156 $titleIndex = $this->indexForLeaf($titleLeaf, $leafNums);
157 if ($titleIndex !== NULL) {
158 $response['titleIndex'] = intval($titleIndex);
161 $response['url'] = "http://www.archive.org/details/$id";
162 $response['pageProgression'] = $pageProgression . '';
163 $response['pageWidths'] = $pageWidths;
164 $response['pageHeights'] = $pageHeights;
165 $response['pageNums'] = $pageNums;
167 // Internet Archive specific
168 $response['itemId'] = $id; // XXX renamed
169 $response['bookId'] = $bookId; // XXX renamed
170 $response['itemPath'] = $itemPath;
171 $response['zip'] = $imageStackFile;
172 $response['server'] = $server;
173 $response['imageFormat'] = $imageFormat;
174 $response['archiveFormat'] = $archiveFormat;
175 $response['leafNums'] = $leafNums;
176 $response['previewImage'] = $this->previewURL('preview', $response);
178 // URL to title image
179 if ('' != $titleLeaf) {
180 $response['titleImage'] = $this->previewURL('title', $response);
183 if (count($coverLeafs) > 0) {
184 $coverIndices = array();
185 $coverImages = array();
186 foreach ($coverLeafs as $key => $leafNum) {
187 array_push($coverIndices, $this->indexForLeaf($leafNum, $leafNums));
188 // $$$ TODO use preview API once it supports multiple covers
189 array_push($coverImages, $this->imageUrl($leafNum, $response));
192 $response['coverIndices'] = $coverIndices;
193 $response['coverImages'] = $coverImages;
199 function emitResponse($metadata) {
200 $callback = $_REQUEST['callback'];
202 $contentType = 'application/json'; // default
204 if (! $this->isValidCallback($callback) ) {
205 $this->BRfatal("Invalid callback");
207 $contentType = 'text/javascript'; // JSONP is not JSON
210 header('Content-type: ' . $contentType . ';charset=UTF-8');
211 header('Access-Control-Allow-Origin: *'); // allow cross-origin requests
214 print $callback . '( ';
216 print json_encode($metadata);
222 function BRFatal($string) {
223 // $$$ TODO log error
224 throw new Exception("Metadata error: $string");
225 //echo "alert('$string');\n";
229 // Returns true if a page should be added based on it's information in
231 function shouldAddPage($page) {
232 // Return false only if the page is marked addToAccessFormats false.
233 // If there is no assertion we assume it should be added.
234 if (isset($page->addToAccessFormats)) {
235 if ("false" == strtolower(trim($page->addToAccessFormats))) {
243 // Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
244 function findImageStack($subPrefix, $filesData) {
246 // $$$ The order of the image formats determines which will be returned first
247 $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
248 $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
249 $imageGroup = implode('|', array_keys($imageFormats));
250 $archiveGroup = implode('|', array_keys($archiveFormats));
251 // $$$ Currently only return processed images
252 $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
254 foreach ($filesData->file as $file) {
255 if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
256 if (preg_match($imageStackRegex, $file->format, $matches)) {
258 // Make sure we have a regular image stack
259 $imageFormat = $imageFormats[$matches[2]];
260 if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {
261 return array('imageFormat' => $imageFormat,
262 'archiveFormat' => $archiveFormats[$matches[3]],
263 'imageStackFile' => $file['name']);
269 return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
272 function isValidCallback($identifier) {
273 $pattern = '/^[a-zA-Z_$][a-zA-Z0-9_$]*$/';
274 return preg_match($pattern, $identifier) == 1;
277 function indexForLeaf($leafNum, $leafNums) {
278 $key = array_search($leafNum, $leafNums);
279 if ($key === FALSE) {
286 function leafForIndex($index, $leafNums) {
287 return $leafNums[$index]; // $$$ todo change to instance variables
290 function imageURL($leafNum, $metadata, $scale, $rotate) {
291 // "Under the hood", non-public, dynamically changing (achtung!) image URLs currently look like:
292 // http://{server}/BookReader/BookReaderImages.php?zip={zipPath}&file={filePath}&scale={scale}&rotate={rotate}
293 // e.g. http://ia311213.us.archive.org/BookReader/BookReaderImages.php?zip=/0/items/coloritsapplicat00andriala/coloritsapplicat00andriala_jp2.zip&file=coloritsapplicat00andriala_jp2/coloritsapplicat00andriala_0009.jp2&scale=8&rotate=0
296 $filePath = $this->imageFilePath($leafNum, $metadata['bookId'], $metadata['imageFormat']);
297 $url = 'http://' . $metadata['server'] . '/BookReader/BookReaderImages.php?zip=' . $metadata['zip'] . '&file=' . $filePath;
299 if (defined($scale)) {
300 $url .= '&scale=' . $scale;
302 if (defined($rotate)) {
303 $url .= '&rotate=' . $rotate;
309 // $$$ move inside BookReaderPreview
310 function previewURL($page, $metadata) {
312 'id' => $metadata['itemId'],
313 'bookId' => $metadata['bookId'],
314 'itemPath' => $metadata['itemPath'],
315 'server' => $metadata['server'],
319 return 'http://' . $metadata['server'] . '/BookReader/BookReaderPreview.php?' . http_build_query($query, '', '&');
322 function imageFilePath($leafNum, $bookId, $format) {
323 return sprintf("%s_%s/%s_%04d.%s", $bookId, $format, $bookId, intval($leafNum), $format);
326 function processRequest($requestEnv) {
327 $id = $requestEnv['itemId']; // XXX renamed
328 $itemPath = $requestEnv['itemPath'];
329 $bookId = $requestEnv['bookId']; // XXX renamed
330 $server = $requestEnv['server'];
332 // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
333 // $$$ TODO consolidate this logic
334 if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
335 $server .= ':80/~mang';
336 } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
337 $server .= ':80/~testflip';
340 $this->emitResponse( $this->buildMetadata($id, $itemPath, $bookId, $server) );