GnuBookIA/datanode/GnuBookJSIA.php

   1 <?
   2 /*
   3 Copyright(c)2008 Internet Archive. Software license AGPL version 3.
   4
   5 This file is part of GnuBook.
   6
   7     GnuBook is free software: you can redistribute it and/or modify
   8     it under the terms of the GNU Affero General Public License as published by
   9     the Free Software Foundation, either version 3 of the License, or
  10     (at your option) any later version.
  11
  12     GnuBook is distributed in the hope that it will be useful,
  13     but WITHOUT ANY WARRANTY; without even the implied warranty of
  14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15     GNU Affero General Public License for more details.
  16
  17     You should have received a copy of the GNU Affero General Public License
  18     along with GnuBook.  If not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21 $id = $_REQUEST['id'];
  22 $itemPath = $_REQUEST['itemPath'];
  23 $subPrefix = $_REQUEST['subPrefix'];
  24 $server = $_REQUEST['server'];
  25
  26 // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
  27 // $$$ TODO consolidate this logic
  28 if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
  29     $server .= ':80/~mang';
  30 } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
  31     $server .= ':80/~testflip';
  32 }
  33
  34 if ($subPrefix) {
  35     $subItemPath = $itemPath . '/' . $subPrefix;
  36 } else {
  37     $subItemPath = $itemPath . '/' . $id;
  38 }
  39
  40 if ("" == $id) {
  41     GBFatal("No identifier specified!");
  42 }
  43
  44 if ("" == $itemPath) {
  45     GBFatal("No itemPath specified!");
  46 }
  47
  48 if ("" == $server) {
  49     GBFatal("No server specified!");
  50 }
  51
  52 if (!preg_match("|^/[0-3]/items/{$id}$|", $itemPath)) {
  53     GBFatal("Bad id!");
  54 }
  55
  56 // XXX check here that subitem is okay
  57
  58 $imageFormat = 'unknown';
  59 $zipFile = "${subItemPath}_jp2.zip";
  60
  61 if (file_exists($zipFile)) {
  62     $imageFormat = 'jp2';
  63 } else {
  64   $zipFile = "${subItemPath}_tif.zip";
  65   if (file_exists($zipFile)) {
  66     $imageFormat = 'tif';
  67   }
  68 }
  69
  70 if ("unknown" == $imageFormat) {
  71   GBfatal("Unknown image format");
  72 }
  73
  74 $scanDataFile = "${subItemPath}_scandata.xml";
  75 $scanDataZip  = "$itemPath/scandata.zip";
  76 if (file_exists($scanDataFile)) {
  77     $scanData = simplexml_load_file($scanDataFile);
  78 } else if (file_exists($scanDataZip)) {
  79     $cmd  = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml';
  80     exec($cmd, $output, $retval);
  81     if ($retval != 0) GBFatal("Could not unzip ScanData!");
  82
  83     $dump = join("\n", $output);
  84     $scanData = simplexml_load_string($dump);
  85 } else if (file_exists("$itemPath/scandata.xml")) {
  86     // For e.g. Scribe v.0 books!
  87     $scanData = simplexml_load_file("$itemPath/scandata.xml");
  88 } else {
  89     GBFatal("ScanData file not found!");
  90 }
  91
  92 $metaDataFile = "$itemPath/{$id}_meta.xml";
  93 if (!file_exists($metaDataFile)) {
  94     GBFatal("MetaData file not found!");
  95 }
  96
  97
  98 $metaData = simplexml_load_file($metaDataFile);
  99
 100 //$firstLeaf = $scanData->pageData->page[0]['leafNum'];
 101 ?>
 102
 103 gb = new GnuBook();
 104
 105 <?
 106 /* Output title leaf if marked */
 107 $titleLeaf = '';
 108 foreach ($scanData->pageData->page as $page) {
 109     if (("Title Page" == $page->pageType) || ("Title" == $page->pageType)) {
 110         $titleLeaf = "{$page['leafNum']}";
 111         break;
 112     }
 113 }
 114
 115 if ('' != $titleLeaf) {
 116     printf("gb.titleLeaf = %d;\n", $titleLeaf);
 117 }
 118 ?>
 119
 120 gb.getPageWidth = function(index) {
 121     //return parseInt(this.pageW[index]/this.reduce);
 122     return this.pageW[index];
 123 }
 124
 125 gb.getPageHeight = function(index) {
 126     //return parseInt(this.pageH[index]/this.reduce);
 127     return this.pageH[index];
 128 }
 129
 130 gb.getPageURI = function(index) {
 131     var leafStr = '0000';
 132     var imgStr = this.leafMap[index].toString();
 133     var re = new RegExp("0{"+imgStr.length+"}$");
 134
 135     var insideZipPrefix = this.subPrefix.match('[^/]+$');
 136     var file = insideZipPrefix + '_' + this.imageFormat + '/' + insideZipPrefix + '_' + leafStr.replace(re, imgStr) + '.' + this.imageFormat;
 137
 138     // $$$ add more image stack formats here
 139     if (1==this.mode) {
 140         var url = 'http://'+this.server+'/GnuBook/GnuBookImages.php?zip='+this.zip+'&file='+file+'&scale='+this.reduce;
 141     } else {
 142         var ratio = this.getPageHeight(index) / this.twoPage.height;
 143         var scale;
 144         // $$$ we make an assumption here that the scales are available pow2 (like kakadu)
 145         if (ratio <= 2) {
 146             scale = 1;
 147         } else if (ratio <= 4) {
 148             scale = 2;
 149         } else if (ratio <= 8) {
 150             scale = 4;
 151         } else if (ratio <= 16) {
 152             scale = 8;
 153         } else  if (ratio <= 32) {
 154             scale = 16;
 155         } else {
 156             scale = 32;
 157         }
 158
 159         var url = 'http://'+this.server+'/GnuBook/GnuBookImages.php?zip='+this.zip+'&file='+file+'&scale='+scale;
 160
 161     }
 162     return url;
 163 }
 164
 165 gb.getPageSide = function(index) {
 166     //assume the book starts with a cover (right-hand leaf)
 167     //we should really get handside from scandata.xml
 168
 169     <? // Use special function if we should infer the page sides based off the title page index
 170     if (preg_match('/goog$/', $id) && ('' != $titleLeaf)) {
 171     ?>
 172     // assume page side based on title page
 173     var titleIndex = gb.leafNumToIndex(gb.titleLeaf);
 174     // assume title page is RHS
 175     var delta = titleIndex - index;
 176     if (0 == (delta & 0x1)) {
 177         // even delta
 178         return 'R';
 179     } else {
 180         return 'L';
 181     }
 182     <?
 183     }
 184     ?>
 185
 186     // $$$ we should get this from scandata instead of assuming the accessible
 187     //     leafs are contiguous
 188     if ('rl' != this.pageProgression) {
 189         // If pageProgression is not set RTL we assume it is LTR
 190         if (0 == (index & 0x1)) {
 191             // Even-numbered page
 192             return 'R';
 193         } else {
 194             // Odd-numbered page
 195             return 'L';
 196         }
 197     } else {
 198         // RTL
 199         if (0 == (index & 0x1)) {
 200             return 'L';
 201         } else {
 202             return 'R';
 203         }
 204     }
 205 }
 206
 207 gb.getPageNum = function(index) {
 208     var pageNum = this.pageNums[index];
 209     if (pageNum) {
 210         return pageNum;
 211     } else {
 212         return 'n' + index;
 213     }
 214 }
 215
 216 gb.leafNumToIndex = function(leafNum) {
 217     var index = jQuery.inArray(leafNum, this.leafMap);
 218     if (-1 == index) {
 219         return null;
 220     } else {
 221         return index;
 222     }
 223 }
 224
 225 // This function returns the left and right indices for the user-visible
 226 // spread that contains the given index.  The return values may be
 227 // null if there is no facing page or the index is invalid.
 228 gb.getSpreadIndices = function(pindex) {
 229     // $$$ we could make a separate function for the RTL case and
 230     //      only bind it if necessary instead of always checking
 231     // $$$ we currently assume there are no gaps
 232
 233     var spreadIndices = [null, null];
 234     if ('rl' == this.pageProgression) {
 235         // Right to Left
 236         if (this.getPageSide(pindex) == 'R') {
 237             spreadIndices[1] = pindex;
 238             spreadIndices[0] = pindex + 1;
 239         } else {
 240             // Given index was LHS
 241             spreadIndices[0] = pindex;
 242             spreadIndices[1] = pindex - 1;
 243         }
 244     } else {
 245         // Left to right
 246         if (this.getPageSide(pindex) == 'L') {
 247             spreadIndices[0] = pindex;
 248             spreadIndices[1] = pindex + 1;
 249         } else {
 250             // Given index was RHS
 251             spreadIndices[1] = pindex;
 252             spreadIndices[0] = pindex - 1;
 253         }
 254     }
 255
 256     //console.log("   index %d mapped to spread %d,%d", pindex, spreadIndices[0], spreadIndices[1]);
 257
 258     return spreadIndices;
 259 }
 260
 261 // Remove the page number assertions for all but the highest index page with
 262 // a given assertion.  Ensures there is only a single page "{pagenum}"
 263 // e.g. the last page asserted as page 5 retains that assertion.
 264 gb.uniquifyPageNums = function() {
 265     var seen = {};
 266
 267     for (var i = gb.pageNums.length - 1; i--; i >= 0) {
 268         var pageNum = gb.pageNums[i];
 269         if ( !seen[pageNum] ) {
 270             seen[pageNum] = true;
 271         } else {
 272             gb.pageNums[i] = null;
 273         }
 274     }
 275
 276 }
 277
 278 gb.cleanupMetadata = function() {
 279     gb.uniquifyPageNums();
 280 }
 281
 282 gb.pageW =              [
 283             <?
 284             $i=0;
 285             foreach ($scanData->pageData->page as $page) {
 286                 if (shouldAddPage($page)) {
 287                     if(0 != $i) echo ",";   //stupid IE
 288                     echo "{$page->cropBox->w}";
 289                     $i++;
 290                 }
 291             }
 292             ?>
 293             ];
 294
 295 gb.pageH =              [
 296             <?
 297             $totalHeight = 0;
 298             $i=0;
 299             foreach ($scanData->pageData->page as $page) {
 300                 if (shouldAddPage($page)) {
 301                     if(0 != $i) echo ",";   //stupid IE
 302                     echo "{$page->cropBox->h}";
 303                     $totalHeight += intval($page->cropBox->h/4) + 10;
 304                     $i++;
 305                 }
 306             }
 307             ?>
 308             ];
 309 gb.leafMap = [
 310             <?
 311             $i=0;
 312             foreach ($scanData->pageData->page as $page) {
 313                 if (shouldAddPage($page)) {
 314                     if(0 != $i) echo ",";   //stupid IE
 315                     echo "{$page['leafNum']}";
 316                     $i++;
 317                 }
 318             }
 319             ?>
 320             ];
 321
 322 gb.pageNums = [
 323             <?
 324             $i=0;
 325             foreach ($scanData->pageData->page as $page) {
 326                 if (shouldAddPage($page)) {
 327                     if(0 != $i) echo ",";   //stupid IE
 328                     if (array_key_exists('pageNumber', $page) && ('' != $page->pageNumber)) {
 329                         echo "'{$page->pageNumber}'";
 330                     } else {
 331                         echo "null";
 332                     }
 333                     $i++;
 334                 }
 335             }
 336             ?>
 337             ];
 338
 339
 340 gb.numLeafs = gb.pageW.length;
 341
 342 gb.bookId   = '<?echo $id;?>';
 343 gb.zip      = '<?echo $zipFile;?>';
 344 gb.subPrefix = '<?echo $subPrefix;?>';
 345 gb.server   = '<?echo $server;?>';
 346 gb.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
 347 gb.bookPath = '<?echo $subItemPath;?>';
 348 gb.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
 349 gb.imageFormat = '<?echo $imageFormat;?>';
 350
 351 <?
 352
 353 # Load some values from meta.xml
 354 if ('' != $metaData->{'page-progression'}) {
 355   echo "gb.pageProgression = '" . $metaData->{"page-progression"} . "';";
 356 } else {
 357   // Assume page progression is Left To Right
 358   echo "gb.pageProgression = 'lr';";
 359 }
 360
 361 # Special cases
 362 if ('bandersnatchhsye00scarrich' == $id) {
 363     echo "gb.mode     = 2;\n";
 364     echo "gb.auto     = true;\n";
 365 }
 366
 367 ?>
 368
 369 // Check for config object
 370 // $$$ change this to use the newer params object
 371 if (typeof(gbConfig) != 'undefined') {
 372     if (typeof(gbConfig["ui"]) != 'undefined') {
 373         gb.ui = gbConfig["ui"];
 374     }
 375
 376     if (gbConfig['mode'] == 1) {
 377         gb.mode = 1;
 378         if (typeof(gbConfig['reduce'] != 'undefined')) {
 379             gb.reduce = gbConfig['reduce'];
 380         }
 381     } else if (gbConfig['mode'] == 2) {
 382         gb.mode = 2;
 383
 384 <?
 385         //$$$mang hack to override request for 2up for books with attribution page
 386         //   as first page until we can display that page in 2up
 387         $needle = 'goog';
 388         if (strrpos($id, $needle) === strlen($id)-strlen($needle)) {
 389             print "// override for books with attribution page\n";
 390             print "gb.mode = 1;\n";
 391         }
 392 ?>
 393     }
 394 } // gbConfig
 395
 396 gb.cleanupMetadata();
 397 gb.init();
 398
 399 <?
 400
 401
 402 function GBFatal($string) {
 403     echo "alert('$string')\n";
 404     die(-1);
 405 }
 406
 407 // Returns true if a page should be added based on it's information in
 408 // the metadata
 409 function shouldAddPage($page) {
 410     // Return false only if the page is marked addToAccessFormats false.
 411     // If there is no assertion we assume it should be added.
 412     if (isset($page->addToAccessFormats)) {
 413         if ("false" == strtolower(trim($page->addToAccessFormats))) {
 414             return false;
 415         }
 416     }
 417
 418     return true;
 419 }
 420
 421 ?>