BookReaderIA/datanode/BookReaderJSIA.php

   1 <?
   2 /*
   3 Copyright(c)2008 Internet Archive. Software license AGPL version 3.
   4
   5 This file is part of BookReader.
   6
   7     BookReader is free software: you can redistribute it and/or modify
   8     it under the terms of the GNU Affero General Public License as published by
   9     the Free Software Foundation, either version 3 of the License, or
  10     (at your option) any later version.
  11
  12     BookReader is distributed in the hope that it will be useful,
  13     but WITHOUT ANY WARRANTY; without even the implied warranty of
  14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15     GNU Affero General Public License for more details.
  16
  17     You should have received a copy of the GNU Affero General Public License
  18     along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 $id = $_REQUEST['id'];
  22 $itemPath = $_REQUEST['itemPath'];
  23 $subPrefix = $_REQUEST['subPrefix'];
  24 $server = $_REQUEST['server'];
  25
  26 // $$$mang this code has been refactored into BookReaderMeta.inc.php for use e.g. by
  27 //         BookReaderPreview.php and BookReaderImages.php.  The code below should be
  28 //         taken out and replaced by calls into BookReaderMeta
  29
  30 // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
  31
  32 // $$$ TODO consolidate this logic
  33 if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
  34     $server .= ':80/~mang';
  35 } else if (strpos($_SERVER["REQUEST_URI"], "/~rkumar") === 0) { // Serving out of home dir
  36     $server .= ':80/~rkumar';
  37 } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
  38     $server .= ':80/~testflip';
  39 }
  40
  41 if (! $subPrefix) {
  42     $subPrefix = $id;
  43 }
  44 $subItemPath = $itemPath . '/' . $subPrefix;
  45
  46 if ("" == $id) {
  47     BRFatal("No identifier specified!");
  48 }
  49
  50 if ("" == $itemPath) {
  51     BRFatal("No itemPath specified!");
  52 }
  53
  54 if ("" == $server) {
  55     BRFatal("No server specified!");
  56 }
  57
  58 if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
  59     BRFatal("Bad id!");
  60 }
  61
  62 // XXX check here that subitem is okay
  63
  64 $filesDataFile = "$itemPath/${id}_files.xml";
  65
  66 if (file_exists($filesDataFile)) {
  67     $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
  68 } else {
  69     BRfatal("File metadata not found!");
  70 }
  71
  72 $imageStackInfo = findImageStack($subPrefix, $filesData);
  73 if ($imageStackInfo['imageFormat'] == 'unknown') {
  74     BRfatal('Couldn\'t find image stack');
  75 }
  76
  77 $imageFormat = $imageStackInfo['imageFormat'];
  78 $archiveFormat = $imageStackInfo['archiveFormat'];
  79 $imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
  80
  81 if ("unknown" == $imageFormat) {
  82   BRfatal("Unknown image format");
  83 }
  84
  85 if ("unknown" == $archiveFormat) {
  86   BRfatal("Unknown archive format");
  87 }
  88
  89
  90 $scanDataFile = "${subItemPath}_scandata.xml";
  91 $scanDataZip  = "$itemPath/scandata.zip";
  92 if (file_exists($scanDataFile)) {
  93     $scanData = simplexml_load_file($scanDataFile);
  94 } else if (file_exists($scanDataZip)) {
  95     $cmd  = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml';
  96     exec($cmd, $output, $retval);
  97     if ($retval != 0) BRFatal("Could not unzip ScanData!");
  98
  99     $dump = join("\n", $output);
 100     $scanData = simplexml_load_string($dump);
 101 } else if (file_exists("$itemPath/scandata.xml")) {
 102     // For e.g. Scribe v.0 books!
 103     $scanData = simplexml_load_file("$itemPath/scandata.xml");
 104 } else {
 105     BRFatal("ScanData file not found!");
 106 }
 107
 108 $metaDataFile = "$itemPath/{$id}_meta.xml";
 109 if (!file_exists($metaDataFile)) {
 110     BRFatal("MetaData file not found!");
 111 }
 112
 113
 114 $metaData = simplexml_load_file($metaDataFile);
 115
 116 //$firstLeaf = $scanData->pageData->page[0]['leafNum'];
 117 ?>
 118
 119 br = new BookReader();
 120
 121 <?
 122 /* Output title leaf if marked */
 123 $titleLeaf = '';
 124 foreach ($scanData->pageData->page as $page) {
 125     if (("Title Page" == $page->pageType) || ("Title" == $page->pageType)) {
 126         $titleLeaf = "{$page['leafNum']}";
 127         break;
 128     }
 129 }
 130
 131 if ('' != $titleLeaf) {
 132     printf("br.titleLeaf = %d;\n", $titleLeaf);
 133 }
 134 ?>
 135
 136 br.getPageWidth = function(index) {
 137     return this.pageW[index];
 138 }
 139
 140 br.getPageHeight = function(index) {
 141     return this.pageH[index];
 142 }
 143
 144 // Returns true if page image is available rotated
 145 br.canRotatePage = function(index) {
 146     return 'jp2' == this.imageFormat; // Assume single format for now
 147 }
 148
 149 // reduce defaults to 1 (no reduction)
 150 // rotate defaults to 0 (no rotation)
 151 br.getPageURI = function(index, reduce, rotate) {
 152     var _reduce;
 153     var _rotate;
 154
 155     if ('undefined' == typeof(reduce)) {
 156         _reduce = 1;
 157     } else {
 158         _reduce = reduce;
 159     }
 160     if ('undefined' == typeof(rotate)) {
 161         _rotate = 0;
 162     } else {
 163         _rotate = rotate;
 164     }
 165
 166     var file = this._getPageFile(index);
 167
 168     // $$$ add more image stack formats here
 169     return 'http://'+this.server+'/BookReader/BookReaderImages.php?zip='+this.zip+'&file='+file+'&scale='+_reduce+'&rotate='+_rotate;
 170 }
 171
 172 br._getPageFile = function(index) {
 173     var leafStr = '0000';
 174     var imgStr = this.leafMap[index].toString();
 175     var re = new RegExp("0{"+imgStr.length+"}$");
 176
 177     var insideZipPrefix = this.subPrefix.match('[^/]+$');
 178     var file = insideZipPrefix + '_' + this.imageFormat + '/' + insideZipPrefix + '_' + leafStr.replace(re, imgStr) + '.' + this.imageFormat;
 179
 180     return file;
 181 }
 182
 183 br.getPageSide = function(index) {
 184     //assume the book starts with a cover (right-hand leaf)
 185     //we should really get handside from scandata.xml
 186
 187     <? // Use special function if we should infer the page sides based off the title page index
 188     if (preg_match('/goog$/', $id) && ('' != $titleLeaf)) {
 189     ?>
 190     // assume page side based on title pagex
 191     var titleIndex = br.leafNumToIndex(br.titleLeaf);
 192     // assume title page is RHS
 193     var delta = titleIndex - index;
 194     if (0 == (delta & 0x1)) {
 195         // even delta
 196         return 'R';
 197     } else {
 198         return 'L';
 199     }
 200     <?
 201     }
 202     ?>
 203
 204     // $$$ we should get this from scandata instead of assuming the accessible
 205     //     leafs are contiguous
 206     if ('rl' != this.pageProgression) {
 207         // If pageProgression is not set RTL we assume it is LTR
 208         if (0 == (index & 0x1)) {
 209             // Even-numbered page
 210             return 'R';
 211         } else {
 212             // Odd-numbered page
 213             return 'L';
 214         }
 215     } else {
 216         // RTL
 217         if (0 == (index & 0x1)) {
 218             return 'L';
 219         } else {
 220             return 'R';
 221         }
 222     }
 223 }
 224
 225 br.getPageNum = function(index) {
 226     var pageNum = this.pageNums[index];
 227     if (pageNum) {
 228         return pageNum;
 229     } else {
 230         return 'n' + index;
 231     }
 232 }
 233
 234 // Single images in the Internet Archive scandata.xml metadata are (somewhat incorrectly)
 235 // given a "leaf" number.  Some of these images from the scanning process should not
 236 // be displayed in the BookReader (for example colour calibration cards).  Since some
 237 // of the scanned images will not be displayed in the BookReader (those marked with
 238 // addToAccessFormats false in the scandata.xml) leaf numbers and BookReader page
 239 // indexes are generally not the same.  This function returns the BookReader page
 240 // index given a scanned leaf number.
 241 //
 242 // This function is used, for example, to map between search results (that use the
 243 // leaf numbers) and the displayed pages in the BookReader.
 244 br.leafNumToIndex = function(leafNum) {
 245     for (var index = 0; index < this.leafMap.length; index++) {
 246         if (this.leafMap[index] == leafNum) {
 247             return index;
 248         }
 249     }
 250
 251     return null;
 252 }
 253
 254 // This function returns the left and right indices for the user-visible
 255 // spread that contains the given index.  The return values may be
 256 // null if there is no facing page or the index is invalid.
 257 br.getSpreadIndices = function(pindex) {
 258     // $$$ we could make a separate function for the RTL case and
 259     //      only bind it if necessary instead of always checking
 260     // $$$ we currently assume there are no gaps
 261
 262     var spreadIndices = [null, null];
 263     if ('rl' == this.pageProgression) {
 264         // Right to Left
 265         if (this.getPageSide(pindex) == 'R') {
 266             spreadIndices[1] = pindex;
 267             spreadIndices[0] = pindex + 1;
 268         } else {
 269             // Given index was LHS
 270             spreadIndices[0] = pindex;
 271             spreadIndices[1] = pindex - 1;
 272         }
 273     } else {
 274         // Left to right
 275         if (this.getPageSide(pindex) == 'L') {
 276             spreadIndices[0] = pindex;
 277             spreadIndices[1] = pindex + 1;
 278         } else {
 279             // Given index was RHS
 280             spreadIndices[1] = pindex;
 281             spreadIndices[0] = pindex - 1;
 282         }
 283     }
 284
 285     //console.log("   index %d mapped to spread %d,%d", pindex, spreadIndices[0], spreadIndices[1]);
 286
 287     return spreadIndices;
 288 }
 289
 290 // Remove the page number assertions for all but the highest index page with
 291 // a given assertion.  Ensures there is only a single page "{pagenum}"
 292 // e.g. the last page asserted as page 5 retains that assertion.
 293 br.uniquifyPageNums = function() {
 294     var seen = {};
 295
 296     for (var i = br.pageNums.length - 1; i--; i >= 0) {
 297         var pageNum = br.pageNums[i];
 298         if ( !seen[pageNum] ) {
 299             seen[pageNum] = true;
 300         } else {
 301             br.pageNums[i] = null;
 302         }
 303     }
 304
 305 }
 306
 307 br.cleanupMetadata = function() {
 308     br.uniquifyPageNums();
 309 }
 310
 311 // getEmbedURL
 312 //________
 313 // Returns a URL for an embedded version of the current book
 314 br.getEmbedURL = function() {
 315     // We could generate a URL hash fragment here but for now we just leave at defaults
 316     var url = 'http://' + window.location.host + '/stream/'+this.bookId;
 317     if (this.subPrefix != this.bookId) { // Only include if needed
 318         url += '/' + this.subPrefix;
 319     }
 320     url += '?ui=embed';
 321     return url;
 322 }
 323
 324 // getEmbedCode
 325 //________
 326 // Returns the embed code HTML fragment suitable for copy and paste
 327 br.getEmbedCode = function() {
 328     return "<iframe src='" + this.getEmbedURL() + "' width='480px' height='430px'></iframe>";
 329 }
 330
 331 // getOpenLibraryRecord
 332 br.getOpenLibraryRecord = function(callback) {
 333     // Try looking up by ocaid first, then by source_record
 334
 335     var jsonURL = 'http://openlibrary.org/query.json?type=/type/edition&*=&ocaid=' + br.bookId;
 336     $.ajax({
 337         url: jsonURL,
 338         success: function(data) {
 339             if (data && data.length > 0) {
 340                 callback(br, data[0]);
 341             } else {
 342                 // try sourceid
 343                 jsonURL = 'http://openlibrary.org/query.json?type=/type/edition&*=&source_records=ia:' + br.bookId;
 344                 $.ajax({
 345                     url: jsonURL,
 346                     success: function(data) {
 347                         if (data && data.length > 0) {
 348                             callback(br, data[0]);
 349                         }
 350                     },
 351                     dataType: 'jsonp'
 352                 });
 353             }
 354         },
 355         dataType: 'jsonp'
 356     });
 357 }
 358
 359 br.pageW =  [
 360             <?
 361             $i=0;
 362             foreach ($scanData->pageData->page as $page) {
 363                 if (shouldAddPage($page)) {
 364                     if(0 != $i) echo ",";   //stupid IE
 365                     echo "{$page->cropBox->w}";
 366                     $i++;
 367                 }
 368             }
 369             ?>
 370             ];
 371
 372 br.pageH =  [
 373             <?
 374             $totalHeight = 0;
 375             $i=0;
 376             foreach ($scanData->pageData->page as $page) {
 377                 if (shouldAddPage($page)) {
 378                     if(0 != $i) echo ",";   //stupid IE
 379                     echo "{$page->cropBox->h}";
 380                     $totalHeight += intval($page->cropBox->h/4) + 10;
 381                     $i++;
 382                 }
 383             }
 384             ?>
 385             ];
 386 br.leafMap = [
 387             <?
 388             $i=0;
 389             foreach ($scanData->pageData->page as $page) {
 390                 if (shouldAddPage($page)) {
 391                     if(0 != $i) echo ",";   //stupid IE
 392                     echo "{$page['leafNum']}";
 393                     $i++;
 394                 }
 395             }
 396             ?>
 397             ];
 398
 399 br.pageNums = [
 400             <?
 401             $i=0;
 402             foreach ($scanData->pageData->page as $page) {
 403                 if (shouldAddPage($page)) {
 404                     if(0 != $i) echo ",";   //stupid IE
 405                     if (array_key_exists('pageNumber', $page) && ('' != $page->pageNumber)) {
 406                         echo "'{$page->pageNumber}'";
 407                     } else {
 408                         echo "null";
 409                     }
 410                     $i++;
 411                 }
 412             }
 413             ?>
 414             ];
 415
 416
 417 br.numLeafs = br.pageW.length;
 418
 419 br.bookId   = '<?echo $id;?>';
 420 br.zip      = '<?echo $imageStackFile;?>';
 421 br.subPrefix = '<?echo $subPrefix;?>';
 422 br.server   = '<?echo $server;?>';
 423 br.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
 424 br.bookPath = '<?echo $subItemPath;?>';
 425 br.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
 426 br.imageFormat = '<?echo $imageFormat;?>';
 427 br.archiveFormat = '<?echo $archiveFormat;?>';
 428
 429 <?
 430
 431 # Load some values from meta.xml
 432 if ('' != $metaData->{'page-progression'}) {
 433   echo "br.pageProgression = '" . $metaData->{"page-progression"} . "';";
 434 } else {
 435   // Assume page progression is Left To Right
 436   echo "br.pageProgression = 'lr';";
 437 }
 438
 439 # Special cases
 440 if ('bandersnatchhsye00scarrich' == $id) {
 441     echo "br.mode     = 2;\n";
 442     echo "br.auto     = true;\n";
 443 }
 444
 445 ?>
 446
 447 // Check for config object
 448 // $$$ change this to use the newer params object
 449 if (typeof(brConfig) != 'undefined') {
 450     if (typeof(brConfig["ui"]) != 'undefined') {
 451         br.ui = brConfig["ui"];
 452     }
 453
 454     if (brConfig['mode'] == 1) {
 455         br.mode = 1;
 456         if (typeof(brConfig['reduce'] != 'undefined')) {
 457             br.reduce = brConfig['reduce'];
 458         }
 459     } else if (brConfig['mode'] == 2) {
 460         br.mode = 2;
 461
 462 <?
 463         //$$$mang hack to override request for 2up for books with attribution page
 464         //   as first page until we can display that page in 2up
 465         $needle = 'goog';
 466         if (strrpos($id, $needle) === strlen($id)-strlen($needle)) {
 467             print "// override for books with attribution page\n";
 468             print "br.mode = 1;\n";
 469         }
 470 ?>
 471     }
 472 } // brConfig
 473
 474 br.cleanupMetadata();
 475 br.init();
 476
 477 <?
 478
 479
 480 function BRFatal($string) {
 481     // $$$ TODO log error
 482     echo "alert('$string')\n";
 483     die(-1);
 484 }
 485
 486 // Returns true if a page should be added based on it's information in
 487 // the metadata
 488 function shouldAddPage($page) {
 489     // Return false only if the page is marked addToAccessFormats false.
 490     // If there is no assertion we assume it should be added.
 491     if (isset($page->addToAccessFormats)) {
 492         if ("false" == strtolower(trim($page->addToAccessFormats))) {
 493             return false;
 494         }
 495     }
 496
 497     return true;
 498 }
 499
 500 // Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
 501 function findImageStack($subPrefix, $filesData) {
 502
 503     // $$$ The order of the image formats determines which will be returned first
 504     $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
 505     $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
 506     $imageGroup = implode('|', array_keys($imageFormats));
 507     $archiveGroup = implode('|', array_keys($archiveFormats));
 508     // $$$ Currently only return processed images
 509     $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
 510
 511     foreach ($filesData->file as $file) {
 512         if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
 513             if (preg_match($imageStackRegex, $file->format, $matches)) {
 514
 515                 // Make sure we have a regular image stack
 516                 $imageFormat = $imageFormats[$matches[2]];
 517                 if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {
 518                     return array('imageFormat' => $imageFormat,
 519                                  'archiveFormat' => $archiveFormats[$matches[3]],
 520                                  'imageStackFile' => $file['name']);
 521                 }
 522             }
 523         }
 524     }
 525
 526     return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
 527
 528 }
 529
 530 ?>