BookReaderIA/datanode/BookReaderJSIA.php

   1 <?
   2 /*
   3 Copyright(c)2008 Internet Archive. Software license AGPL version 3.
   4
   5 This file is part of BookReader.
   6
   7     BookReader is free software: you can redistribute it and/or modify
   8     it under the terms of the GNU Affero General Public License as published by
   9     the Free Software Foundation, either version 3 of the License, or
  10     (at your option) any later version.
  11
  12     BookReader is distributed in the hope that it will be useful,
  13     but WITHOUT ANY WARRANTY; without even the implied warranty of
  14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15     GNU Affero General Public License for more details.
  16
  17     You should have received a copy of the GNU Affero General Public License
  18     along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 header('Content-Type: application/javascript');
  22
  23 $id = $_REQUEST['id'];
  24 $itemPath = $_REQUEST['itemPath'];
  25 $subPrefix = $_REQUEST['subPrefix'];
  26 $server = $_REQUEST['server'];
  27
  28 // $$$mang this code has been refactored into BookReaderMeta.inc.php for use e.g. by
  29 //         BookReaderPreview.php and BookReaderImages.php.  The code below should be
  30 //         taken out and replaced by calls into BookReaderMeta
  31
  32 // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
  33
  34 // $$$ TODO consolidate this logic
  35 if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
  36     $server .= ':80/~mang';
  37 } else if (strpos($_SERVER["REQUEST_URI"], "/~rkumar") === 0) { // Serving out of home dir
  38     $server .= ':80/~rkumar';
  39 } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
  40     $server .= ':80/~testflip';
  41 }
  42
  43 if (! $subPrefix) {
  44     $subPrefix = $id;
  45 }
  46 $subItemPath = $itemPath . '/' . $subPrefix;
  47
  48 if ("" == $id) {
  49     BRFatal("No identifier specified!");
  50 }
  51
  52 if ("" == $itemPath) {
  53     BRFatal("No itemPath specified!");
  54 }
  55
  56 if ("" == $server) {
  57     BRFatal("No server specified!");
  58 }
  59
  60 if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
  61     BRFatal("Bad id!");
  62 }
  63
  64 // XXX check here that subitem is okay
  65
  66 $filesDataFile = "$itemPath/${id}_files.xml";
  67
  68 if (file_exists($filesDataFile)) {
  69     $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
  70 } else {
  71     BRfatal("File metadata not found!");
  72 }
  73
  74 $imageStackInfo = findImageStack($subPrefix, $filesData);
  75 if ($imageStackInfo['imageFormat'] == 'unknown') {
  76     BRfatal('Couldn\'t find image stack');
  77 }
  78
  79 $imageFormat = $imageStackInfo['imageFormat'];
  80 $archiveFormat = $imageStackInfo['archiveFormat'];
  81 $imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
  82
  83 if ("unknown" == $imageFormat) {
  84   BRfatal("Unknown image format");
  85 }
  86
  87 if ("unknown" == $archiveFormat) {
  88   BRfatal("Unknown archive format");
  89 }
  90
  91
  92 $scanDataFile = "${subItemPath}_scandata.xml";
  93 $scanDataZip  = "$itemPath/scandata.zip";
  94 if (file_exists($scanDataFile)) {
  95     $scanData = simplexml_load_file($scanDataFile);
  96 } else if (file_exists($scanDataZip)) {
  97     $cmd  = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml';
  98     exec($cmd, $output, $retval);
  99     if ($retval != 0) BRFatal("Could not unzip ScanData!");
 100
 101     $dump = join("\n", $output);
 102     $scanData = simplexml_load_string($dump);
 103 } else if (file_exists("$itemPath/scandata.xml")) {
 104     // For e.g. Scribe v.0 books!
 105     $scanData = simplexml_load_file("$itemPath/scandata.xml");
 106 } else {
 107     BRFatal("ScanData file not found!");
 108 }
 109
 110 $metaDataFile = "$itemPath/{$id}_meta.xml";
 111 if (!file_exists($metaDataFile)) {
 112     BRFatal("MetaData file not found!");
 113 }
 114
 115
 116 $metaData = simplexml_load_file($metaDataFile);
 117
 118 //$firstLeaf = $scanData->pageData->page[0]['leafNum'];
 119 ?>
 120
 121 br = new BookReader();
 122
 123 <?
 124 /* Output title leaf if marked */
 125 $titleLeaf = '';
 126 foreach ($scanData->pageData->page as $page) {
 127     if (("Title Page" == $page->pageType) || ("Title" == $page->pageType)) {
 128         $titleLeaf = "{$page['leafNum']}";
 129         break;
 130     }
 131 }
 132
 133 if ('' != $titleLeaf) {
 134     printf("br.titleLeaf = %d;\n", $titleLeaf);
 135 }
 136 ?>
 137
 138 br.getPageWidth = function(index) {
 139     return this.pageW[index];
 140 }
 141
 142 br.getPageHeight = function(index) {
 143     return this.pageH[index];
 144 }
 145
 146 // Returns true if page image is available rotated
 147 br.canRotatePage = function(index) {
 148     return 'jp2' == this.imageFormat; // Assume single format for now
 149 }
 150
 151 // reduce defaults to 1 (no reduction)
 152 // rotate defaults to 0 (no rotation)
 153 br.getPageURI = function(index, reduce, rotate) {
 154     var _reduce;
 155     var _rotate;
 156
 157     if ('undefined' == typeof(reduce)) {
 158         _reduce = 1;
 159     } else {
 160         _reduce = reduce;
 161     }
 162     if ('undefined' == typeof(rotate)) {
 163         _rotate = 0;
 164     } else {
 165         _rotate = rotate;
 166     }
 167
 168     var file = this._getPageFile(index);
 169
 170     // $$$ add more image stack formats here
 171     return 'http://'+this.server+'/BookReader/BookReaderImages.php?zip='+this.zip+'&file='+file+'&scale='+_reduce+'&rotate='+_rotate;
 172 }
 173
 174 br._getPageFile = function(index) {
 175     var leafStr = '0000';
 176     var imgStr = this.leafMap[index].toString();
 177     var re = new RegExp("0{"+imgStr.length+"}$");
 178
 179     var insideZipPrefix = this.subPrefix.match('[^/]+$');
 180     var file = insideZipPrefix + '_' + this.imageFormat + '/' + insideZipPrefix + '_' + leafStr.replace(re, imgStr) + '.' + this.imageFormat;
 181
 182     return file;
 183 }
 184
 185 br.getPageSide = function(index) {
 186     //assume the book starts with a cover (right-hand leaf)
 187     //we should really get handside from scandata.xml
 188
 189     <? // Use special function if we should infer the page sides based off the title page index
 190     if (preg_match('/goog$/', $id) && ('' != $titleLeaf)) {
 191     ?>
 192     // assume page side based on title pagex
 193     var titleIndex = br.leafNumToIndex(br.titleLeaf);
 194     // assume title page is RHS
 195     var delta = titleIndex - index;
 196     if (0 == (delta & 0x1)) {
 197         // even delta
 198         return 'R';
 199     } else {
 200         return 'L';
 201     }
 202     <?
 203     }
 204     ?>
 205
 206     // $$$ we should get this from scandata instead of assuming the accessible
 207     //     leafs are contiguous
 208     if ('rl' != this.pageProgression) {
 209         // If pageProgression is not set RTL we assume it is LTR
 210         if (0 == (index & 0x1)) {
 211             // Even-numbered page
 212             return 'R';
 213         } else {
 214             // Odd-numbered page
 215             return 'L';
 216         }
 217     } else {
 218         // RTL
 219         if (0 == (index & 0x1)) {
 220             return 'L';
 221         } else {
 222             return 'R';
 223         }
 224     }
 225 }
 226
 227 br.getPageNum = function(index) {
 228     var pageNum = this.pageNums[index];
 229     if (pageNum) {
 230         return pageNum;
 231     } else {
 232         return 'n' + index;
 233     }
 234 }
 235
 236 // Single images in the Internet Archive scandata.xml metadata are (somewhat incorrectly)
 237 // given a "leaf" number.  Some of these images from the scanning process should not
 238 // be displayed in the BookReader (for example colour calibration cards).  Since some
 239 // of the scanned images will not be displayed in the BookReader (those marked with
 240 // addToAccessFormats false in the scandata.xml) leaf numbers and BookReader page
 241 // indexes are generally not the same.  This function returns the BookReader page
 242 // index given a scanned leaf number.
 243 //
 244 // This function is used, for example, to map between search results (that use the
 245 // leaf numbers) and the displayed pages in the BookReader.
 246 br.leafNumToIndex = function(leafNum) {
 247     for (var index = 0; index < this.leafMap.length; index++) {
 248         if (this.leafMap[index] == leafNum) {
 249             return index;
 250         }
 251     }
 252
 253     return null;
 254 }
 255
 256 // This function returns the left and right indices for the user-visible
 257 // spread that contains the given index.  The return values may be
 258 // null if there is no facing page or the index is invalid.
 259 br.getSpreadIndices = function(pindex) {
 260     // $$$ we could make a separate function for the RTL case and
 261     //      only bind it if necessary instead of always checking
 262     // $$$ we currently assume there are no gaps
 263
 264     var spreadIndices = [null, null];
 265     if ('rl' == this.pageProgression) {
 266         // Right to Left
 267         if (this.getPageSide(pindex) == 'R') {
 268             spreadIndices[1] = pindex;
 269             spreadIndices[0] = pindex + 1;
 270         } else {
 271             // Given index was LHS
 272             spreadIndices[0] = pindex;
 273             spreadIndices[1] = pindex - 1;
 274         }
 275     } else {
 276         // Left to right
 277         if (this.getPageSide(pindex) == 'L') {
 278             spreadIndices[0] = pindex;
 279             spreadIndices[1] = pindex + 1;
 280         } else {
 281             // Given index was RHS
 282             spreadIndices[1] = pindex;
 283             spreadIndices[0] = pindex - 1;
 284         }
 285     }
 286
 287     //console.log("   index %d mapped to spread %d,%d", pindex, spreadIndices[0], spreadIndices[1]);
 288
 289     return spreadIndices;
 290 }
 291
 292 // Remove the page number assertions for all but the highest index page with
 293 // a given assertion.  Ensures there is only a single page "{pagenum}"
 294 // e.g. the last page asserted as page 5 retains that assertion.
 295 br.uniquifyPageNums = function() {
 296     var seen = {};
 297
 298     for (var i = br.pageNums.length - 1; i--; i >= 0) {
 299         var pageNum = br.pageNums[i];
 300         if ( !seen[pageNum] ) {
 301             seen[pageNum] = true;
 302         } else {
 303             br.pageNums[i] = null;
 304         }
 305     }
 306
 307 }
 308
 309 br.cleanupMetadata = function() {
 310     br.uniquifyPageNums();
 311 }
 312
 313 // getEmbedURL
 314 //________
 315 // Returns a URL for an embedded version of the current book
 316 br.getEmbedURL = function() {
 317     // We could generate a URL hash fragment here but for now we just leave at defaults
 318     var url = 'http://' + window.location.host + '/stream/'+this.bookId;
 319     if (this.subPrefix != this.bookId) { // Only include if needed
 320         url += '/' + this.subPrefix;
 321     }
 322     url += '?ui=embed';
 323     return url;
 324 }
 325
 326 // getEmbedCode
 327 //________
 328 // Returns the embed code HTML fragment suitable for copy and paste
 329 br.getEmbedCode = function() {
 330     return "<iframe src='" + this.getEmbedURL() + "' width='480px' height='430px'></iframe>";
 331 }
 332
 333 // getOpenLibraryRecord
 334 br.getOpenLibraryRecord = function(callback) {
 335     // Try looking up by ocaid first, then by source_record
 336
 337     var jsonURL = 'http://openlibrary.org/query.json?type=/type/edition&*=&ocaid=' + br.bookId;
 338     $.ajax({
 339         url: jsonURL,
 340         success: function(data) {
 341             if (data && data.length > 0) {
 342                 callback(br, data[0]);
 343             } else {
 344                 // try sourceid
 345                 jsonURL = 'http://openlibrary.org/query.json?type=/type/edition&*=&source_records=ia:' + br.bookId;
 346                 $.ajax({
 347                     url: jsonURL,
 348                     success: function(data) {
 349                         if (data && data.length > 0) {
 350                             callback(br, data[0]);
 351                         }
 352                     },
 353                     dataType: 'jsonp'
 354                 });
 355             }
 356         },
 357         dataType: 'jsonp'
 358     });
 359 }
 360
 361 br.pageW =  [
 362             <?
 363             $i=0;
 364             foreach ($scanData->pageData->page as $page) {
 365                 if (shouldAddPage($page)) {
 366                     if(0 != $i) echo ",";   //stupid IE
 367                     echo "{$page->cropBox->w}";
 368                     $i++;
 369                 }
 370             }
 371             ?>
 372             ];
 373
 374 br.pageH =  [
 375             <?
 376             $totalHeight = 0;
 377             $i=0;
 378             foreach ($scanData->pageData->page as $page) {
 379                 if (shouldAddPage($page)) {
 380                     if(0 != $i) echo ",";   //stupid IE
 381                     echo "{$page->cropBox->h}";
 382                     $totalHeight += intval($page->cropBox->h/4) + 10;
 383                     $i++;
 384                 }
 385             }
 386             ?>
 387             ];
 388 br.leafMap = [
 389             <?
 390             $i=0;
 391             foreach ($scanData->pageData->page as $page) {
 392                 if (shouldAddPage($page)) {
 393                     if(0 != $i) echo ",";   //stupid IE
 394                     echo "{$page['leafNum']}";
 395                     $i++;
 396                 }
 397             }
 398             ?>
 399             ];
 400
 401 br.pageNums = [
 402             <?
 403             $i=0;
 404             foreach ($scanData->pageData->page as $page) {
 405                 if (shouldAddPage($page)) {
 406                     if(0 != $i) echo ",";   //stupid IE
 407                     if (array_key_exists('pageNumber', $page) && ('' != $page->pageNumber)) {
 408                         echo "'{$page->pageNumber}'";
 409                     } else {
 410                         echo "null";
 411                     }
 412                     $i++;
 413                 }
 414             }
 415             ?>
 416             ];
 417
 418
 419 br.numLeafs = br.pageW.length;
 420
 421 br.bookId   = '<?echo $id;?>';
 422 br.zip      = '<?echo $imageStackFile;?>';
 423 br.subPrefix = '<?echo $subPrefix;?>';
 424 br.server   = '<?echo $server;?>';
 425 br.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
 426 br.bookPath = '<?echo $subItemPath;?>';
 427 br.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
 428 br.imageFormat = '<?echo $imageFormat;?>';
 429 br.archiveFormat = '<?echo $archiveFormat;?>';
 430
 431 <?
 432
 433 # Load some values from meta.xml
 434 if ('' != $metaData->{'page-progression'}) {
 435   echo "br.pageProgression = '" . $metaData->{"page-progression"} . "';";
 436 } else {
 437   // Assume page progression is Left To Right
 438   echo "br.pageProgression = 'lr';";
 439 }
 440
 441 # Special cases
 442 if ('bandersnatchhsye00scarrich' == $id) {
 443     echo "br.mode     = 2;\n";
 444     echo "br.auto     = true;\n";
 445 }
 446
 447 ?>
 448
 449 // Check for config object
 450 // $$$ change this to use the newer params object
 451 if (typeof(brConfig) != 'undefined') {
 452     if (typeof(brConfig["ui"]) != 'undefined') {
 453         br.ui = brConfig["ui"];
 454     }
 455
 456     if (brConfig['mode'] == 1) {
 457         br.mode = 1;
 458         if (typeof(brConfig['reduce'] != 'undefined')) {
 459             br.reduce = brConfig['reduce'];
 460         }
 461     } else if (brConfig['mode'] == 2) {
 462         br.mode = 2;
 463
 464 <?
 465         //$$$mang hack to override request for 2up for books with attribution page
 466         //   as first page until we can display that page in 2up
 467         $needle = 'goog';
 468         if (strrpos($id, $needle) === strlen($id)-strlen($needle)) {
 469             print "// override for books with attribution page\n";
 470             print "br.mode = 1;\n";
 471         }
 472 ?>
 473     }
 474 } // brConfig
 475
 476 br.cleanupMetadata();
 477 br.init();
 478
 479 <?
 480
 481
 482 function BRFatal($string) {
 483     // $$$ TODO log error
 484     echo "alert('$string')\n";
 485     die(-1);
 486 }
 487
 488 // Returns true if a page should be added based on it's information in
 489 // the metadata
 490 function shouldAddPage($page) {
 491     // Return false only if the page is marked addToAccessFormats false.
 492     // If there is no assertion we assume it should be added.
 493     if (isset($page->addToAccessFormats)) {
 494         if ("false" == strtolower(trim($page->addToAccessFormats))) {
 495             return false;
 496         }
 497     }
 498
 499     return true;
 500 }
 501
 502 // Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
 503 function findImageStack($subPrefix, $filesData) {
 504
 505     // $$$ The order of the image formats determines which will be returned first
 506     $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
 507     $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
 508     $imageGroup = implode('|', array_keys($imageFormats));
 509     $archiveGroup = implode('|', array_keys($archiveFormats));
 510     // $$$ Currently only return processed images
 511     $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
 512
 513     foreach ($filesData->file as $file) {
 514         if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
 515             if (preg_match($imageStackRegex, $file->format, $matches)) {
 516
 517                 // Make sure we have a regular image stack
 518                 $imageFormat = $imageFormats[$matches[2]];
 519                 if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {
 520                     return array('imageFormat' => $imageFormat,
 521                                  'archiveFormat' => $archiveFormats[$matches[3]],
 522                                  'imageStackFile' => $file['name']);
 523                 }
 524             }
 525         }
 526     }
 527
 528     return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
 529
 530 }
 531
 532 ?>