BookReaderIA/datanode/BookReaderJSIA.php

   1 <?
   2 /*
   3 Copyright(c)2008 Internet Archive. Software license AGPL version 3.
   4
   5 This file is part of BookReader.
   6
   7     BookReader is free software: you can redistribute it and/or modify
   8     it under the terms of the GNU Affero General Public License as published by
   9     the Free Software Foundation, either version 3 of the License, or
  10     (at your option) any later version.
  11
  12     BookReader is distributed in the hope that it will be useful,
  13     but WITHOUT ANY WARRANTY; without even the implied warranty of
  14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15     GNU Affero General Public License for more details.
  16
  17     You should have received a copy of the GNU Affero General Public License
  18     along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 header('Content-Type: application/javascript');
  22
  23 $id = $_REQUEST['id'];
  24 $itemPath = $_REQUEST['itemPath'];
  25 $subPrefix = $_REQUEST['subPrefix'];
  26 $server = $_REQUEST['server'];
  27
  28 // $$$mang this code has been refactored into BookReaderMeta.inc.php for use e.g. by
  29 //         BookReaderPreview.php and BookReaderImages.php.  The code below should be
  30 //         taken out and replaced by calls into BookReaderMeta
  31
  32 // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
  33
  34 // $$$ TODO consolidate this logic
  35 if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
  36     $server .= ':80/~mang';
  37 } else if (strpos($_SERVER["REQUEST_URI"], "/~rkumar") === 0) { // Serving out of home dir
  38     $server .= ':80/~rkumar';
  39 } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
  40     $server .= ':80/~testflip';
  41 } else if (strpos($_SERVER["REQUEST_URI"], "/~mccabe") === 0) { // Serving out of home dir
  42     $server .= ':80/~mccabe';
  43 }
  44
  45 if (! $subPrefix) {
  46     $subPrefix = $id;
  47 }
  48 $subItemPath = $itemPath . '/' . $subPrefix;
  49
  50 if ("" == $id) {
  51     BRFatal("No identifier specified!");
  52 }
  53
  54 if ("" == $itemPath) {
  55     BRFatal("No itemPath specified!");
  56 }
  57
  58 if ("" == $server) {
  59     BRFatal("No server specified!");
  60 }
  61
  62 if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
  63     BRFatal("Bad id!");
  64 }
  65
  66 // XXX check here that subitem is okay
  67
  68 $filesDataFile = "$itemPath/${id}_files.xml";
  69
  70 if (file_exists($filesDataFile)) {
  71     $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
  72 } else {
  73     BRfatal("File metadata not found!");
  74 }
  75
  76 $imageStackInfo = findImageStack($subPrefix, $filesData);
  77 if ($imageStackInfo['imageFormat'] == 'unknown') {
  78     BRfatal('Couldn\'t find image stack');
  79 }
  80
  81 $imageFormat = $imageStackInfo['imageFormat'];
  82 $archiveFormat = $imageStackInfo['archiveFormat'];
  83 $imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
  84
  85 if ("unknown" == $imageFormat) {
  86   BRfatal("Unknown image format");
  87 }
  88
  89 if ("unknown" == $archiveFormat) {
  90   BRfatal("Unknown archive format");
  91 }
  92
  93
  94 $scanDataFile = "${subItemPath}_scandata.xml";
  95 $scanDataZip  = "$itemPath/scandata.zip";
  96 if (file_exists($scanDataFile)) {
  97     $scanData = simplexml_load_file($scanDataFile);
  98 } else if (file_exists($scanDataZip)) {
  99     $cmd  = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml';
 100     exec($cmd, $output, $retval);
 101     if ($retval != 0) BRFatal("Could not unzip ScanData!");
 102
 103     $dump = join("\n", $output);
 104     $scanData = simplexml_load_string($dump);
 105 } else if (file_exists("$itemPath/scandata.xml")) {
 106     // For e.g. Scribe v.0 books!
 107     $scanData = simplexml_load_file("$itemPath/scandata.xml");
 108 } else {
 109     BRFatal("ScanData file not found!");
 110 }
 111
 112 $metaDataFile = "$itemPath/{$id}_meta.xml";
 113 if (!file_exists($metaDataFile)) {
 114     BRFatal("MetaData file not found!");
 115 }
 116
 117
 118 $metaData = simplexml_load_file($metaDataFile);
 119
 120 //$firstLeaf = $scanData->pageData->page[0]['leafNum'];
 121 ?>
 122
 123 br = new BookReader();
 124
 125 <?
 126 /* Output title leaf if marked */
 127 $titleLeaf = '';
 128 foreach ($scanData->pageData->page as $page) {
 129     if (("Title Page" == $page->pageType) || ("Title" == $page->pageType)) {
 130         $titleLeaf = "{$page['leafNum']}";
 131         break;
 132     }
 133 }
 134
 135 if ('' != $titleLeaf) {
 136     printf("br.titleLeaf = %d;\n", $titleLeaf);
 137 }
 138 ?>
 139
 140 br.getPageWidth = function(index) {
 141     return this.pageW[index];
 142 }
 143
 144 br.getPageHeight = function(index) {
 145     return this.pageH[index];
 146 }
 147
 148 // Returns true if page image is available rotated
 149 br.canRotatePage = function(index) {
 150     return 'jp2' == this.imageFormat; // Assume single format for now
 151 }
 152
 153 // reduce defaults to 1 (no reduction)
 154 // rotate defaults to 0 (no rotation)
 155 br.getPageURI = function(index, reduce, rotate) {
 156     var _reduce;
 157     var _rotate;
 158
 159     if ('undefined' == typeof(reduce)) {
 160         _reduce = 1;
 161     } else {
 162         _reduce = reduce;
 163     }
 164     if ('undefined' == typeof(rotate)) {
 165         _rotate = 0;
 166     } else {
 167         _rotate = rotate;
 168     }
 169
 170     var file = this._getPageFile(index);
 171
 172     // $$$ add more image stack formats here
 173     return 'http://'+this.server+'/BookReader/BookReaderImages.php?zip='+this.zip+'&file='+file+'&scale='+_reduce+'&rotate='+_rotate;
 174 }
 175
 176 br._getPageFile = function(index) {
 177     var leafStr = '0000';
 178     var imgStr = this.leafMap[index].toString();
 179     var re = new RegExp("0{"+imgStr.length+"}$");
 180
 181     var insideZipPrefix = this.subPrefix.match('[^/]+$');
 182     var file = insideZipPrefix + '_' + this.imageFormat + '/' + insideZipPrefix + '_' + leafStr.replace(re, imgStr) + '.' + this.imageFormat;
 183
 184     return file;
 185 }
 186
 187 br.getPageSide = function(index) {
 188     //assume the book starts with a cover (right-hand leaf)
 189     //we should really get handside from scandata.xml
 190
 191     <? // Use special function if we should infer the page sides based off the title page index
 192     if (preg_match('/goog$/', $id) && ('' != $titleLeaf)) {
 193     ?>
 194     // assume page side based on title pagex
 195     var titleIndex = br.leafNumToIndex(br.titleLeaf);
 196     // assume title page is RHS
 197     var delta = titleIndex - index;
 198     if (0 == (delta & 0x1)) {
 199         // even delta
 200         return 'R';
 201     } else {
 202         return 'L';
 203     }
 204     <?
 205     }
 206     ?>
 207
 208     // $$$ we should get this from scandata instead of assuming the accessible
 209     //     leafs are contiguous
 210     if ('rl' != this.pageProgression) {
 211         // If pageProgression is not set RTL we assume it is LTR
 212         if (0 == (index & 0x1)) {
 213             // Even-numbered page
 214             return 'R';
 215         } else {
 216             // Odd-numbered page
 217             return 'L';
 218         }
 219     } else {
 220         // RTL
 221         if (0 == (index & 0x1)) {
 222             return 'L';
 223         } else {
 224             return 'R';
 225         }
 226     }
 227 }
 228
 229 br.getPageNum = function(index) {
 230     var pageNum = this.pageNums[index];
 231     if (pageNum) {
 232         return pageNum;
 233     } else {
 234         return 'n' + index;
 235     }
 236 }
 237
 238 // Single images in the Internet Archive scandata.xml metadata are (somewhat incorrectly)
 239 // given a "leaf" number.  Some of these images from the scanning process should not
 240 // be displayed in the BookReader (for example colour calibration cards).  Since some
 241 // of the scanned images will not be displayed in the BookReader (those marked with
 242 // addToAccessFormats false in the scandata.xml) leaf numbers and BookReader page
 243 // indexes are generally not the same.  This function returns the BookReader page
 244 // index given a scanned leaf number.
 245 //
 246 // This function is used, for example, to map between search results (that use the
 247 // leaf numbers) and the displayed pages in the BookReader.
 248 br.leafNumToIndex = function(leafNum) {
 249     for (var index = 0; index < this.leafMap.length; index++) {
 250         if (this.leafMap[index] == leafNum) {
 251             return index;
 252         }
 253     }
 254
 255     return null;
 256 }
 257
 258 // This function returns the left and right indices for the user-visible
 259 // spread that contains the given index.  The return values may be
 260 // null if there is no facing page or the index is invalid.
 261 br.getSpreadIndices = function(pindex) {
 262     // $$$ we could make a separate function for the RTL case and
 263     //      only bind it if necessary instead of always checking
 264     // $$$ we currently assume there are no gaps
 265
 266     var spreadIndices = [null, null];
 267     if ('rl' == this.pageProgression) {
 268         // Right to Left
 269         if (this.getPageSide(pindex) == 'R') {
 270             spreadIndices[1] = pindex;
 271             spreadIndices[0] = pindex + 1;
 272         } else {
 273             // Given index was LHS
 274             spreadIndices[0] = pindex;
 275             spreadIndices[1] = pindex - 1;
 276         }
 277     } else {
 278         // Left to right
 279         if (this.getPageSide(pindex) == 'L') {
 280             spreadIndices[0] = pindex;
 281             spreadIndices[1] = pindex + 1;
 282         } else {
 283             // Given index was RHS
 284             spreadIndices[1] = pindex;
 285             spreadIndices[0] = pindex - 1;
 286         }
 287     }
 288
 289     //console.log("   index %d mapped to spread %d,%d", pindex, spreadIndices[0], spreadIndices[1]);
 290
 291     return spreadIndices;
 292 }
 293
 294 // Remove the page number assertions for all but the highest index page with
 295 // a given assertion.  Ensures there is only a single page "{pagenum}"
 296 // e.g. the last page asserted as page 5 retains that assertion.
 297 br.uniquifyPageNums = function() {
 298     var seen = {};
 299
 300     for (var i = br.pageNums.length - 1; i--; i >= 0) {
 301         var pageNum = br.pageNums[i];
 302         if ( !seen[pageNum] ) {
 303             seen[pageNum] = true;
 304         } else {
 305             br.pageNums[i] = null;
 306         }
 307     }
 308
 309 }
 310
 311 br.cleanupMetadata = function() {
 312     br.uniquifyPageNums();
 313 }
 314
 315 // getEmbedURL
 316 //________
 317 // Returns a URL for an embedded version of the current book
 318 br.getEmbedURL = function() {
 319     // We could generate a URL hash fragment here but for now we just leave at defaults
 320     var url = 'http://' + window.location.host + '/stream/'+this.bookId;
 321     if (this.subPrefix != this.bookId) { // Only include if needed
 322         url += '/' + this.subPrefix;
 323     }
 324     url += '?ui=embed';
 325     return url;
 326 }
 327
 328 // getEmbedCode
 329 //________
 330 // Returns the embed code HTML fragment suitable for copy and paste
 331 br.getEmbedCode = function() {
 332     return "<iframe src='" + this.getEmbedURL() + "' width='480px' height='430px'></iframe>";
 333 }
 334
 335 br.pageW =  [
 336             <?
 337             $i=0;
 338             foreach ($scanData->pageData->page as $page) {
 339                 if (shouldAddPage($page)) {
 340                     if(0 != $i) echo ",";   //stupid IE
 341                     echo "{$page->cropBox->w}";
 342                     $i++;
 343                 }
 344             }
 345             ?>
 346             ];
 347
 348 br.pageH =  [
 349             <?
 350             $totalHeight = 0;
 351             $i=0;
 352             foreach ($scanData->pageData->page as $page) {
 353                 if (shouldAddPage($page)) {
 354                     if(0 != $i) echo ",";   //stupid IE
 355                     echo "{$page->cropBox->h}";
 356                     $totalHeight += intval($page->cropBox->h/4) + 10;
 357                     $i++;
 358                 }
 359             }
 360             ?>
 361             ];
 362 br.leafMap = [
 363             <?
 364             $i=0;
 365             foreach ($scanData->pageData->page as $page) {
 366                 if (shouldAddPage($page)) {
 367                     if(0 != $i) echo ",";   //stupid IE
 368                     echo "{$page['leafNum']}";
 369                     $i++;
 370                 }
 371             }
 372             ?>
 373             ];
 374
 375 br.pageNums = [
 376             <?
 377             $i=0;
 378             foreach ($scanData->pageData->page as $page) {
 379                 if (shouldAddPage($page)) {
 380                     if(0 != $i) echo ",";   //stupid IE
 381                     if (array_key_exists('pageNumber', $page) && ('' != $page->pageNumber)) {
 382                         echo "'{$page->pageNumber}'";
 383                     } else {
 384                         echo "null";
 385                     }
 386                     $i++;
 387                 }
 388             }
 389             ?>
 390             ];
 391
 392
 393 br.numLeafs = br.pageW.length;
 394
 395 br.bookId   = '<?echo $id;?>';
 396 br.zip      = '<?echo $imageStackFile;?>';
 397 br.subPrefix = '<?echo $subPrefix;?>';
 398 br.server   = '<?echo $server;?>';
 399 br.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
 400 br.bookPath = '<?echo $subItemPath;?>';
 401 br.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
 402 br.imageFormat = '<?echo $imageFormat;?>';
 403 br.archiveFormat = '<?echo $archiveFormat;?>';
 404
 405 <?
 406
 407 # Load some values from meta.xml
 408 if ('' != $metaData->{'page-progression'}) {
 409   echo "br.pageProgression = '" . $metaData->{"page-progression"} . "';";
 410 } else {
 411   // Assume page progression is Left To Right
 412   echo "br.pageProgression = 'lr';";
 413 }
 414
 415 # Special cases
 416 if ('bandersnatchhsye00scarrich' == $id) {
 417     echo "br.mode     = 2;\n";
 418     echo "br.auto     = true;\n";
 419 }
 420
 421 ?>
 422
 423 // Check for config object
 424 // $$$ change this to use the newer params object
 425 if (typeof(brConfig) != 'undefined') {
 426     if (typeof(brConfig["ui"]) != 'undefined') {
 427         br.ui = brConfig["ui"];
 428     }
 429
 430     if (brConfig['mode'] == 1) {
 431         br.mode = 1;
 432         if (typeof(brConfig['reduce'] != 'undefined')) {
 433             br.reduce = brConfig['reduce'];
 434         }
 435     } else if (brConfig['mode'] == 2) {
 436         br.mode = 2;
 437
 438 <?
 439         //$$$mang hack to override request for 2up for books with attribution page
 440         //   as first page until we can display that page in 2up
 441         $needle = 'goog';
 442         if (strrpos($id, $needle) === strlen($id)-strlen($needle)) {
 443             print "// override for books with attribution page\n";
 444             print "br.mode = 1;\n";
 445         }
 446 ?>
 447     }
 448 } // brConfig
 449
 450 br.cleanupMetadata();
 451 br.init();
 452
 453 <?
 454
 455
 456 function BRFatal($string) {
 457     // $$$ TODO log error
 458     echo "alert('$string')\n";
 459     die(-1);
 460 }
 461
 462 // Returns true if a page should be added based on it's information in
 463 // the metadata
 464 function shouldAddPage($page) {
 465     // Return false only if the page is marked addToAccessFormats false.
 466     // If there is no assertion we assume it should be added.
 467     if (isset($page->addToAccessFormats)) {
 468         if ("false" == strtolower(trim($page->addToAccessFormats))) {
 469             return false;
 470         }
 471     }
 472
 473     return true;
 474 }
 475
 476 // Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
 477 function findImageStack($subPrefix, $filesData) {
 478
 479     // $$$ The order of the image formats determines which will be returned first
 480     $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
 481     $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
 482     $imageGroup = implode('|', array_keys($imageFormats));
 483     $archiveGroup = implode('|', array_keys($archiveFormats));
 484     // $$$ Currently only return processed images
 485     $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
 486
 487     foreach ($filesData->file as $file) {
 488         if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
 489             if (preg_match($imageStackRegex, $file->format, $matches)) {
 490
 491                 // Make sure we have a regular image stack
 492                 $imageFormat = $imageFormats[$matches[2]];
 493                 if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {
 494                     return array('imageFormat' => $imageFormat,
 495                                  'archiveFormat' => $archiveFormats[$matches[3]],
 496                                  'imageStackFile' => $file['name']);
 497                 }
 498             }
 499         }
 500     }
 501
 502     return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
 503
 504 }
 505
 506 ?>