BookReaderIA/datanode/BookReaderJSIA.php

   1 <?
   2 /*
   3 Copyright(c)2008 Internet Archive. Software license AGPL version 3.
   4
   5 This file is part of BookReader.
   6
   7     BookReader is free software: you can redistribute it and/or modify
   8     it under the terms of the GNU Affero General Public License as published by
   9     the Free Software Foundation, either version 3 of the License, or
  10     (at your option) any later version.
  11
  12     BookReader is distributed in the hope that it will be useful,
  13     but WITHOUT ANY WARRANTY; without even the implied warranty of
  14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15     GNU Affero General Public License for more details.
  16
  17     You should have received a copy of the GNU Affero General Public License
  18     along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 $id = $_REQUEST['id'];
  22 $itemPath = $_REQUEST['itemPath'];
  23 $subPrefix = $_REQUEST['subPrefix'];
  24 $server = $_REQUEST['server'];
  25
  26 // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
  27 // $$$ TODO consolidate this logic
  28 if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
  29     $server .= ':80/~mang';
  30 } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
  31     $server .= ':80/~testflip';
  32 }
  33
  34 if (! $subPrefix) {
  35     $subPrefix = $id;
  36 }
  37 $subItemPath = $itemPath . '/' . $subPrefix;
  38
  39 if ("" == $id) {
  40     BRFatal("No identifier specified!");
  41 }
  42
  43 if ("" == $itemPath) {
  44     BRFatal("No itemPath specified!");
  45 }
  46
  47 if ("" == $server) {
  48     BRFatal("No server specified!");
  49 }
  50
  51 if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
  52     BRFatal("Bad id!");
  53 }
  54
  55 // XXX check here that subitem is okay
  56
  57 $filesDataFile = "$itemPath/${id}_files.xml";
  58
  59 if (file_exists($filesDataFile)) {
  60     $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
  61 } else {
  62     BRfatal("File metadata not found!");
  63 }
  64
  65 $imageStackInfo = findImageStack($subPrefix, $filesData);
  66 if ($imageStackInfo['imageFormat'] == 'unknown') {
  67     BRfatal('Couldn\'t find image stack');
  68 }
  69
  70 $imageFormat = $imageStackInfo['imageFormat'];
  71 $archiveFormat = $imageStackInfo['archiveFormat'];
  72 $imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
  73
  74 if ("unknown" == $imageFormat) {
  75   BRfatal("Unknown image format");
  76 }
  77
  78 if ("unknown" == $archiveFormat) {
  79   BRfatal("Unknown archive format");
  80 }
  81
  82
  83 $scanDataFile = "${subItemPath}_scandata.xml";
  84 $scanDataZip  = "$itemPath/scandata.zip";
  85 if (file_exists($scanDataFile)) {
  86     $scanData = simplexml_load_file($scanDataFile);
  87 } else if (file_exists($scanDataZip)) {
  88     $cmd  = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml';
  89     exec($cmd, $output, $retval);
  90     if ($retval != 0) BRFatal("Could not unzip ScanData!");
  91
  92     $dump = join("\n", $output);
  93     $scanData = simplexml_load_string($dump);
  94 } else if (file_exists("$itemPath/scandata.xml")) {
  95     // For e.g. Scribe v.0 books!
  96     $scanData = simplexml_load_file("$itemPath/scandata.xml");
  97 } else {
  98     BRFatal("ScanData file not found!");
  99 }
 100
 101 $metaDataFile = "$itemPath/{$id}_meta.xml";
 102 if (!file_exists($metaDataFile)) {
 103     BRFatal("MetaData file not found!");
 104 }
 105
 106
 107 $metaData = simplexml_load_file($metaDataFile);
 108
 109 //$firstLeaf = $scanData->pageData->page[0]['leafNum'];
 110 ?>
 111
 112 br = new BookReader();
 113
 114 <?
 115 /* Output title leaf if marked */
 116 $titleLeaf = '';
 117 foreach ($scanData->pageData->page as $page) {
 118     if (("Title Page" == $page->pageType) || ("Title" == $page->pageType)) {
 119         $titleLeaf = "{$page['leafNum']}";
 120         break;
 121     }
 122 }
 123
 124 if ('' != $titleLeaf) {
 125     printf("br.titleLeaf = %d;\n", $titleLeaf);
 126 }
 127 ?>
 128
 129 br.getPageWidth = function(index) {
 130     return this.pageW[index];
 131 }
 132
 133 br.getPageHeight = function(index) {
 134     return this.pageH[index];
 135 }
 136
 137 // Returns true if page image is available rotated
 138 br.canRotatePage = function(index) {
 139     return 'jp2' == this.imageFormat; // Assume single format for now
 140 }
 141
 142 // reduce defaults to 1 (no reduction)
 143 // rotate defaults to 0 (no rotation)
 144 br.getPageURI = function(index, reduce, rotate) {
 145     var _reduce;
 146     var _rotate;
 147
 148     if ('undefined' == typeof(reduce)) {
 149         _reduce = 1;
 150     } else {
 151         _reduce = reduce;
 152     }
 153     if ('undefined' == typeof(rotate)) {
 154         _rotate = 0;
 155     } else {
 156         _rotate = rotate;
 157     }
 158
 159     var file = this._getPageFile(index);
 160
 161     // $$$ add more image stack formats here
 162     return 'http://'+this.server+'/BookReader/BookReaderImages.php?zip='+this.zip+'&file='+file+'&scale='+_reduce+'&rotate='+_rotate;
 163 }
 164
 165 br._getPageFile = function(index) {
 166     var leafStr = '0000';
 167     var imgStr = this.leafMap[index].toString();
 168     var re = new RegExp("0{"+imgStr.length+"}$");
 169
 170     var insideZipPrefix = this.subPrefix.match('[^/]+$');
 171     var file = insideZipPrefix + '_' + this.imageFormat + '/' + insideZipPrefix + '_' + leafStr.replace(re, imgStr) + '.' + this.imageFormat;
 172
 173     return file;
 174 }
 175
 176 br.getPageSide = function(index) {
 177     //assume the book starts with a cover (right-hand leaf)
 178     //we should really get handside from scandata.xml
 179
 180     <? // Use special function if we should infer the page sides based off the title page index
 181     if (preg_match('/goog$/', $id) && ('' != $titleLeaf)) {
 182     ?>
 183     // assume page side based on title pagex
 184     var titleIndex = br.leafNumToIndex(br.titleLeaf);
 185     // assume title page is RHS
 186     var delta = titleIndex - index;
 187     if (0 == (delta & 0x1)) {
 188         // even delta
 189         return 'R';
 190     } else {
 191         return 'L';
 192     }
 193     <?
 194     }
 195     ?>
 196
 197     // $$$ we should get this from scandata instead of assuming the accessible
 198     //     leafs are contiguous
 199     if ('rl' != this.pageProgression) {
 200         // If pageProgression is not set RTL we assume it is LTR
 201         if (0 == (index & 0x1)) {
 202             // Even-numbered page
 203             return 'R';
 204         } else {
 205             // Odd-numbered page
 206             return 'L';
 207         }
 208     } else {
 209         // RTL
 210         if (0 == (index & 0x1)) {
 211             return 'L';
 212         } else {
 213             return 'R';
 214         }
 215     }
 216 }
 217
 218 br.getPageNum = function(index) {
 219     var pageNum = this.pageNums[index];
 220     if (pageNum) {
 221         return pageNum;
 222     } else {
 223         return 'n' + index;
 224     }
 225 }
 226
 227 // Single images in the Internet Archive scandata.xml metadata are (somewhat incorrectly)
 228 // given a "leaf" number.  Some of these images from the scanning process should not
 229 // be displayed in the BookReader (for example colour calibration cards).  Since some
 230 // of the scanned images will not be displayed in the BookReader (those marked with
 231 // addToAccessFormats false in the scandata.xml) leaf numbers and BookReader page
 232 // indexes are generally not the same.  This function returns the BookReader page
 233 // index given a scanned leaf number.
 234 //
 235 // This function is used, for example, to map between search results (that use the
 236 // leaf numbers) and the displayed pages in the BookReader.
 237 br.leafNumToIndex = function(leafNum) {
 238     for (var index = 0; index < this.leafMap.length; index++) {
 239         if (this.leafMap[index] == leafNum) {
 240             return index;
 241         }
 242     }
 243
 244     return null;
 245 }
 246
 247 // This function returns the left and right indices for the user-visible
 248 // spread that contains the given index.  The return values may be
 249 // null if there is no facing page or the index is invalid.
 250 br.getSpreadIndices = function(pindex) {
 251     // $$$ we could make a separate function for the RTL case and
 252     //      only bind it if necessary instead of always checking
 253     // $$$ we currently assume there are no gaps
 254
 255     var spreadIndices = [null, null];
 256     if ('rl' == this.pageProgression) {
 257         // Right to Left
 258         if (this.getPageSide(pindex) == 'R') {
 259             spreadIndices[1] = pindex;
 260             spreadIndices[0] = pindex + 1;
 261         } else {
 262             // Given index was LHS
 263             spreadIndices[0] = pindex;
 264             spreadIndices[1] = pindex - 1;
 265         }
 266     } else {
 267         // Left to right
 268         if (this.getPageSide(pindex) == 'L') {
 269             spreadIndices[0] = pindex;
 270             spreadIndices[1] = pindex + 1;
 271         } else {
 272             // Given index was RHS
 273             spreadIndices[1] = pindex;
 274             spreadIndices[0] = pindex - 1;
 275         }
 276     }
 277
 278     //console.log("   index %d mapped to spread %d,%d", pindex, spreadIndices[0], spreadIndices[1]);
 279
 280     return spreadIndices;
 281 }
 282
 283 // Remove the page number assertions for all but the highest index page with
 284 // a given assertion.  Ensures there is only a single page "{pagenum}"
 285 // e.g. the last page asserted as page 5 retains that assertion.
 286 br.uniquifyPageNums = function() {
 287     var seen = {};
 288
 289     for (var i = br.pageNums.length - 1; i--; i >= 0) {
 290         var pageNum = br.pageNums[i];
 291         if ( !seen[pageNum] ) {
 292             seen[pageNum] = true;
 293         } else {
 294             br.pageNums[i] = null;
 295         }
 296     }
 297
 298 }
 299
 300 br.cleanupMetadata = function() {
 301     br.uniquifyPageNums();
 302 }
 303
 304 // getEmbedURL
 305 //________
 306 // Returns a URL for an embedded version of the current book
 307 br.getEmbedURL = function() {
 308     // We could generate a URL hash fragment here but for now we just leave at defaults
 309     var url = 'http://' + window.location.host + '/stream/'+this.bookId;
 310     if (this.subPrefix != this.bookId) { // Only include if needed
 311         url += '/' + this.subPrefix;
 312     }
 313     url += '?ui=embed';
 314     return url;
 315 }
 316
 317 // getEmbedCode
 318 //________
 319 // Returns the embed code HTML fragment suitable for copy and paste
 320 br.getEmbedCode = function() {
 321     return "<iframe src='" + this.getEmbedURL() + "' width='480px' height='430px'></iframe>";
 322 }
 323
 324 br.pageW =  [
 325             <?
 326             $i=0;
 327             foreach ($scanData->pageData->page as $page) {
 328                 if (shouldAddPage($page)) {
 329                     if(0 != $i) echo ",";   //stupid IE
 330                     echo "{$page->cropBox->w}";
 331                     $i++;
 332                 }
 333             }
 334             ?>
 335             ];
 336
 337 br.pageH =  [
 338             <?
 339             $totalHeight = 0;
 340             $i=0;
 341             foreach ($scanData->pageData->page as $page) {
 342                 if (shouldAddPage($page)) {
 343                     if(0 != $i) echo ",";   //stupid IE
 344                     echo "{$page->cropBox->h}";
 345                     $totalHeight += intval($page->cropBox->h/4) + 10;
 346                     $i++;
 347                 }
 348             }
 349             ?>
 350             ];
 351 br.leafMap = [
 352             <?
 353             $i=0;
 354             foreach ($scanData->pageData->page as $page) {
 355                 if (shouldAddPage($page)) {
 356                     if(0 != $i) echo ",";   //stupid IE
 357                     echo "{$page['leafNum']}";
 358                     $i++;
 359                 }
 360             }
 361             ?>
 362             ];
 363
 364 br.pageNums = [
 365             <?
 366             $i=0;
 367             foreach ($scanData->pageData->page as $page) {
 368                 if (shouldAddPage($page)) {
 369                     if(0 != $i) echo ",";   //stupid IE
 370                     if (array_key_exists('pageNumber', $page) && ('' != $page->pageNumber)) {
 371                         echo "'{$page->pageNumber}'";
 372                     } else {
 373                         echo "null";
 374                     }
 375                     $i++;
 376                 }
 377             }
 378             ?>
 379             ];
 380
 381
 382 br.numLeafs = br.pageW.length;
 383
 384 br.bookId   = '<?echo $id;?>';
 385 br.zip      = '<?echo $imageStackFile;?>';
 386 br.subPrefix = '<?echo $subPrefix;?>';
 387 br.server   = '<?echo $server;?>';
 388 br.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
 389 br.bookPath = '<?echo $subItemPath;?>';
 390 br.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
 391 br.imageFormat = '<?echo $imageFormat;?>';
 392 br.archiveFormat = '<?echo $archiveFormat;?>';
 393
 394 <?
 395
 396 # Load some values from meta.xml
 397 if ('' != $metaData->{'page-progression'}) {
 398   echo "br.pageProgression = '" . $metaData->{"page-progression"} . "';";
 399 } else {
 400   // Assume page progression is Left To Right
 401   echo "br.pageProgression = 'lr';";
 402 }
 403
 404 # Special cases
 405 if ('bandersnatchhsye00scarrich' == $id) {
 406     echo "br.mode     = 2;\n";
 407     echo "br.auto     = true;\n";
 408 }
 409
 410 ?>
 411
 412 // Check for config object
 413 // $$$ change this to use the newer params object
 414 if (typeof(brConfig) != 'undefined') {
 415     if (typeof(brConfig["ui"]) != 'undefined') {
 416         br.ui = brConfig["ui"];
 417     }
 418
 419     if (brConfig['mode'] == 1) {
 420         br.mode = 1;
 421         if (typeof(brConfig['reduce'] != 'undefined')) {
 422             br.reduce = brConfig['reduce'];
 423         }
 424     } else if (brConfig['mode'] == 2) {
 425         br.mode = 2;
 426
 427 <?
 428         //$$$mang hack to override request for 2up for books with attribution page
 429         //   as first page until we can display that page in 2up
 430         $needle = 'goog';
 431         if (strrpos($id, $needle) === strlen($id)-strlen($needle)) {
 432             print "// override for books with attribution page\n";
 433             print "br.mode = 1;\n";
 434         }
 435 ?>
 436     }
 437 } // brConfig
 438
 439 br.cleanupMetadata();
 440 br.init();
 441
 442 <?
 443
 444
 445 function BRFatal($string) {
 446     // $$$ TODO log error
 447     echo "alert('$string')\n";
 448     die(-1);
 449 }
 450
 451 // Returns true if a page should be added based on it's information in
 452 // the metadata
 453 function shouldAddPage($page) {
 454     // Return false only if the page is marked addToAccessFormats false.
 455     // If there is no assertion we assume it should be added.
 456     if (isset($page->addToAccessFormats)) {
 457         if ("false" == strtolower(trim($page->addToAccessFormats))) {
 458             return false;
 459         }
 460     }
 461
 462     return true;
 463 }
 464
 465 // Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
 466 function findImageStack($subPrefix, $filesData) {
 467
 468     // $$$ The order of the image formats determines which will be returned first
 469     $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
 470     $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
 471     $imageGroup = implode('|', array_keys($imageFormats));
 472     $archiveGroup = implode('|', array_keys($archiveFormats));
 473     // $$$ Currently only return processed images
 474     $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
 475
 476     foreach ($filesData->file as $file) {
 477         if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
 478             if (preg_match($imageStackRegex, $file->format, $matches)) {
 479
 480                 // Make sure we have a regular image stack
 481                 $imageFormat = $imageFormats[$matches[2]];
 482                 if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {
 483                     return array('imageFormat' => $imageFormat,
 484                                  'archiveFormat' => $archiveFormats[$matches[3]],
 485                                  'imageStackFile' => $file['name']);
 486                 }
 487             }
 488         }
 489     }
 490
 491     return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
 492
 493 }
 494
 495 ?>