BookReaderIA/inc/BookReader.inc

   1 <?
   2
   3 /*
   4  * Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
   5  *
   6  * This file is part of BookReader.  The full source code can be found at GitHub:
   7  * http://github.com/openlibrary/bookreader
   8  *
   9  * Note: Edits to this file must pass through github.  To submit a patch to this
  10  *       file please contact mang via http://github.com/mangtronix or mang at archive dot org
  11  *       Direct changes to this file may get clobbered when the code is synchronized
  12  *       from github.
  13  */
  14
  15 class BookReader
  16 {
  17
  18
  19   // Operators recognized in BookReader download URLs
  20   public static $downloadOperators = array('page');
  21
  22   // Returns true if can display the book in item with a given prefix (typically the item identifier)
  23   public static function canDisplay($item, $prefix, $checkOldScandata = false)
  24   {
  25
  26     // A "book" is an image stack and scandata.
  27     // 1. Old items may have scandata.xml or scandata.zip and itemid_{imageformat}.{zip,tar}
  28     // 2. Newer items may have multiple {arbitraryname}_scandata.xml and {arbitraryname}_{imageformat}.{zip,tar}
  29
  30     $foundScandata = false;
  31     $foundImageStack = false;
  32
  33     $targetScandata = $prefix . "_scandata.xml";
  34
  35     // $$$ TODO add support for jpg and tar stacks
  36     // https://bugs.edge.launchpad.net/gnubook/+bug/323003
  37     // https://bugs.edge.launchpad.net/gnubook/+bug/385397
  38     $imageFormatRegex = '@' . preg_quote($prefix, '@') . '_(jp2|tif|jpg)\.(zip|tar)$@';
  39
  40     $baseLength = strlen($item->metadataGrabber->mainDir . '/');
  41     foreach ($item->getFiles() as $location => $fileInfo) {
  42         $filename = substr($location, $baseLength);
  43
  44         if ($checkOldScandata) {
  45             if ($filename == 'scandata.xml' || $filename == 'scandata.zip') {
  46                 $foundScandata = $filename;
  47             }
  48         }
  49
  50         if ($filename == $targetScandata) {
  51             $foundScandata = $filename;
  52         }
  53
  54         if (preg_match($imageFormatRegex, $filename)) {
  55             $foundImageStack = $filename;
  56         }
  57     }
  58
  59     if ($foundScandata && $foundImageStack) {
  60         return true;
  61     }
  62
  63     return false;
  64   }
  65
  66   // Finds the prefix to use for the book given the part of the URL trailing after /stream/
  67   public static function findPrefix($urlPortion)
  68   {
  69     if (!preg_match('#[^/&?]+#', $urlPortion, $matches)) {
  70         // URL portion was empty or started with /, &, or ? -- no item identifier
  71         return false;
  72     }
  73
  74     $prefix = $matches[0]; // item identifier
  75
  76     // $$$ Currently swallows the rest of the URL.
  77     //     If we want to support e.g. /stream/itemid/subdir/prefix/page/23 will need to adjust.
  78     if (preg_match('#[^/&?]+/([^&?]+)#', $urlPortion, $matches)) {
  79         // Match is everything after item identifier and slash, up to end or ? or &
  80         // e.g. itemid/{match/these/parts}?foo=bar
  81         $prefix = $matches[1]; // sub prefix --
  82     }
  83
  84     return $prefix;
  85   }
  86
  87   // $$$ would be cleaner to use different templates instead of the uiMode param
  88   //
  89   // @param subprefix Optional prefix to display a book inside an item (e.g. if does not match identifier)
  90   public static function draw($server, $mainDir, $identifier, $subPrefix, $title,
  91                               $coverLeaf=null, $titleStart='Internet Archive', $uiMode='full')
  92   {
  93     // Set title to default if not set
  94     if (!$title) {
  95         $title = 'BookReader';
  96     }
  97
  98     $id = $identifier;
  99
 100     // manually update with Launchpad version number at each checkin so that browsers
 101     // do not use old cached version
 102     // see https://bugs.launchpad.net/gnubook/+bug/330748
 103     $version = "r28";
 104
 105     if (BookReader::getDevHost($server)) {
 106         // On dev host - add time to force reload
 107         // If debugging on IE, remove this line otherwise breakpoints will be invalid after reload
 108         $version .= '_' . time();
 109     }
 110
 111     if ("" == $id) {
 112         echo "No identifier specified!";
 113         die(-1);
 114     }
 115
 116     $metaURL = BookReader::jsMetadataURL($server, $identifier, $mainDir, $subPrefix);
 117     $metaURL .= "&version=" . $version;
 118     $locateURL = BookReader::jsLocateURL($identifier, $subPrefix);
 119     $coverThumb = 'http://www.archive.org/download/' . $identifier . '/'. $subPrefix . '/page/cover_w114.jpg';
 120     // startup-up-image must be exactly 320x460
 121     //$startupImage = 'http://www.archive.org/download/' . $identifier . '/'. $subPrefix . '/page/cover_w512.jpg';
 122
 123 ?>
 124 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 125 <html>
 126 <head>
 127     <meta name="viewport" content="width=device-width, maximum-scale=1.0" />
 128     <meta name="apple-mobile-web-app-capable" content="yes" />
 129     <meta name="apple-mobile-web-app-status-bar-style" content="black" />
 130     <link rel="apple-touch-icon" href="<? echo($coverThumb); ?>" />
 131     <title><? echo $title; ?></title>
 132 <!--[if lte IE 6]>
 133     <meta http-equiv="refresh" content="2; URL=/bookreader/browserunsupported.php?id=<? echo($id); ?>">
 134 <![endif]-->
 135     <link rel="stylesheet" type="text/css" href="/bookreader/BookReader.css?v=<? echo($version); ?>">
 136 <? if ($uiMode == "embed") { ?>
 137     <link rel="stylesheet" type="text/css" href="/bookreader/BookReaderEmbed.css?v=<? echo($version); ?>">
 138 <? } elseif ($uiMode == "touch") { ?>
 139     <link rel="stylesheet" type="text/css" href="/bookreader/touch/BookReaderTouch.css?v=<? echo($version); ?>">
 140 <? } /* uiMode */ ?>
 141     <script src="/includes/jquery-1.4.2.min.js" type="text/javascript"></script>
 142     <script type="text/javascript" src="/bookreader/jquery-ui-1.8.5.custom.min.js?v=<? echo($version); ?>"></script>
 143     <script type="text/javascript" src="http://www.archive.org/includes/analytics.js?v=2"></script>
 144     <script type="text/javascript" src="/bookreader/dragscrollable.js?v=<? echo($version); ?>"></script>
 145     <script type="text/javascript" src="/bookreader/jquery.colorbox-min.js"></script>
 146     <script type="text/javascript" src="/bookreader/jquery.ui.ipad.js"></script>
 147      <!-- THIS ALLOWS BEAUTYTIPS TO WORK ON IE -->
 148         <!--[if lt IE 9]>
 149         <script type="text/javascript" src="/includes/excanvas.compiled.js"></script>
 150         <![endif]-->
 151     <script type="text/javascript" src="/bookreader/jquery.bt.min.js"></script>
 152     <script type="text/javascript" src="/bookreader/BookReader.js?v=<? echo($version); ?>"></script>
 153 <? if ( !preg_match("/mobile/i", $_SERVER['HTTP_USER_AGENT']) ) { ?>
 154     <script type="text/javascript" src="/bookreader/soundmanager/soundmanager2-ia.js?v=<? echo($version); ?>"></script>
 155     <script>
 156         soundManager.debugMode = false;
 157         soundManager.url = '/bookreader/soundmanager/swf/';
 158         soundManager.useHTML5Audio = true;
 159         soundManager.flashVersion = 9; //flash 8 version of swf is buggy when calling play() on a sound that is still loading
 160     </script>
 161 <? } /* mobile user agent */ ?>
 162 </head>
 163 <body style="background-color: ##939598;">
 164
 165 <?
 166 /*
 167 // <? if ($uiMode == 'full') { ?>
 168 // <div id="BookReader" style="left:10px; right:200px; top:10px; bottom:2em;">Internet Archive BookReader <noscript>requires JavaScript to be enabled.</noscript></div>
 169 // <? } else { ?>
 170 // <div id="BookReader" style="left:0; right:0; top:0; bottom:0; border:0">Internet Archive Bookreader <noscript>requires JavaScript to be enabled.</noscript></div>
 171 // <? } ?>
 172 */
 173 ?>
 174
 175 <div id="BookReader">
 176     Internet Archive BookReader - <? echo $title; ?>
 177     <br/>
 178
 179     <noscript>
 180     <p>
 181         The BookReader requires JavaScript to be enabled. Please check that your browser supports JavaScript and that it is enabled in the browser settings.  You can also try one of the <a href="http://www.archive.org/details/<? echo $identifier; ?>"> other formats of the book</a>.
 182     </p>
 183     </noscript>
 184 </div>
 185
 186 <script type="text/javascript">
 187   // Set some config variables -- $$$ NB: Config object format has not been finalized
 188   var brConfig = {};
 189 <? if ($uiMode == 'embed') { ?>
 190   brConfig["mode"] = 1;
 191   brConfig["ui"] = "embed";
 192 <? } else { ?>
 193   brConfig["mode"] = 2;
 194 <? } ?>
 195 </script>
 196 <!-- The script included below is dynamically generated JavaScript that includes the book metadata and page image access functions.
 197      The ia{number}.us.archive.org server referenced below can and does change, so this URL should NOT be used for permanent access.  -->
 198 <script type="text/javascript" src="<? echo($metaURL); ?>"></script>
 199 <!-- This URL will find the item and redirect to the correct server.  Remove the line above and use the URL below for stable access. -->
 200 <!-- <script type="text/javascript" src="<? echo($locateURL); ?>"></script> -->
 201
 202 <script type="text/javascript">
 203     // Usage stats
 204     if(window.archive_analytics) { window.archive_analytics.values['bookreader'] = 'open'};
 205 </script>
 206   <?
 207     exit;
 208   }
 209
 210   // Returns the user part of dev host from URL, or null
 211   public static function getDevHost($server)
 212   {
 213       if (preg_match("/^www-(\w+)/", $_SERVER["SERVER_NAME"], $match)) {
 214         return $match[1];
 215       }
 216
 217       return null;
 218   }
 219
 220
 221   public static function serverBaseURL($server)
 222   {
 223       // Check if we're on a dev vhost and point to JSIA in the user's public_html
 224       // on the datanode
 225       // $$$ the remapping isn't totally automatic yet and requires user to
 226       //     ln -s ~/petabox/www/datanode/BookReader ~/public_html/BookReader
 227       //     so we enable it only for known hosts
 228       $devhost = BookReader::getDevHost($server);
 229       $devhosts = array('mang', 'testflip', 'rkumar');
 230       if (in_array($devhost, $devhosts)) {
 231         $server = $server . "/~" . $devhost;
 232       }
 233       return $server;
 234   }
 235
 236
 237   public static function jsMetadataURL($server, $identifier, $mainDir, $subPrefix = '')
 238   {
 239     $serverBaseURL = BookReader::serverBaseURL($server);
 240
 241     $params = array( 'id' => $identifier, 'itemPath' => $mainDir, 'server' => $server );
 242     if ($subPrefix) {
 243         $params['subPrefix'] = $subPrefix;
 244     }
 245
 246     $keys = array_keys($params);
 247     $lastParam = end($keys);
 248     $url = "http://{$serverBaseURL}/BookReader/BookReaderJSIA.php?";
 249     foreach($params as $param=>$value) {
 250         $url .= $param . '=' . $value;
 251         if ($param != $lastParam) {
 252             $url .= '&';
 253         }
 254     }
 255
 256     return $url;
 257   }
 258
 259   // This returns a URL that finds the item then returns a redirect to BookReaderJSIA.php
 260   // on the item's server.
 261   public static function jsLocateURL($identifier, $subPrefix = '')
 262   {
 263     $locateURL = 'http://www.archive.org/bookreader/BookReaderJSLocate.php?id=' . $identifier;
 264     if ($subPrefix) {
 265         $locateURL .= '&subPrefix=' . $subPrefix;
 266     }
 267     return $locateURL;
 268   }
 269
 270   // Return the URL for the requested /download/$path, or null
 271   public static function getURL($path, $item) {
 272     // $path should look like {itemId}/{operator}/{filename}
 273     // Other operators may be added
 274
 275     $urlParts = BookReader::parsePath($path);
 276
 277     // Check for non-handled cases
 278     $required = array('identifier', 'operator', 'operand');
 279     foreach ($required as $key) {
 280         if (!array_key_exists($key, $urlParts)) {
 281             return null;
 282         }
 283     }
 284
 285     $identifier = $urlParts['identifier'];
 286     $operator = $urlParts['operator'];
 287     $filename = $urlParts['operand'];
 288     $subPrefix = $urlParts['subPrefix'];
 289
 290     $serverBaseURL = BookReader::serverBaseURL($item->getServer());
 291
 292     // Baseline query params
 293     $query = array(
 294         'id' => $identifier,
 295         'itemPath' => $item->getMainDir(),
 296         'server' => $serverBaseURL
 297     );
 298     if ($subPrefix) {
 299         $query['subPrefix'] = $subPrefix;
 300     }
 301
 302     switch ($operator) {
 303         case 'page':
 304
 305             // Look for old-style preview request - e.g. {identifier}_cover.jpg
 306             if (preg_match('/^(.*)_((cover|title|preview).*)/', $filename, $matches) === 1) {
 307                 // Serve preview image
 308                 $page = $matches[2];
 309                 $query['page'] = $page;
 310                 return 'http://' . $serverBaseURL . '/BookReader/BookReaderPreview.php?' . http_build_query($query, '', '&');
 311             }
 312
 313             // New-style preview request - e.g. cover_thumb.jpg
 314             if (preg_match('/^(cover|title|preview)/', $filename, $matches) === 1) {
 315                 $query['page'] = $filename;
 316                 return 'http://' . $serverBaseURL . '/BookReader/BookReaderPreview.php?' . http_build_query($query, '', '&');
 317             }
 318
 319             // Asking for a non-preview page
 320             $query['page'] = $filename;
 321             return 'http://' . $serverBaseURL . '/BookReader/BookReaderImages.php?' . http_build_query($query, '', '&');
 322
 323         default:
 324             // Unknown operator
 325             return null;
 326     }
 327
 328     return null; // was not handled
 329   }
 330
 331   public static function browserFromUserAgent($userAgent) {
 332       $browserPatterns = array(
 333           'ipad' => '/iPad/',
 334           'iphone' => '/iPhone/', // Also cover iPod Touch
 335           'android' => '/Android/',
 336       );
 337
 338       foreach ($browserPatterns as $browser => $pattern) {
 339           if (preg_match($pattern, $userAgent)) {
 340               return $browser;
 341           }
 342       }
 343       return null;
 344   }
 345
 346
 347   // $$$ Ideally we will not rely on user agent, but for the moment we do
 348   public static function paramsFromUserAgent($userAgent) {
 349       // $$$ using 'embed' here for devices with assumed small screens -- really should just use CSS3 media queries
 350       $browserParams = array(
 351           'ipad' => array( 'ui' => 'touch' ),
 352           'iphone' => array( 'ui' => 'embed', 'mode' => '1up' ),
 353           'android' => array( 'ui' => 'embed', 'mode' => '1up' ),
 354       );
 355
 356       $browser = BookReader::browserFromUserAgent($userAgent);
 357       if ($browser) {
 358           return $browserParams[$browser];
 359       }
 360       return array();
 361   }
 362
 363   public static function parsePath($path) {
 364     // Parse the BookReader path and return the parts
 365     // e.g. itemid/some/sub/dir/page/cover.jpg -> array( 'identifier' => 'itemid', 'subPrefix' => 'some/sub/dir',
 366     //            'operator' => 'page', 'filename' => 'cover.jpg')
 367
 368     $parts = array();
 369
 370     // Pull off query, e.g. ?foo=bar
 371     if (preg_match('#(.*?)(\?.*)#', $path, $matches) === 1) {
 372         $parts['query'] = $matches[2];
 373         $path = $matches[1];
 374     }
 375
 376     // Pull off identifier
 377     if (preg_match('#[^/&?]+#', $path, $matches) === 0) {
 378         // no match
 379         return $parts;
 380     }
 381     $parts['identifier'] = $matches[0];
 382     $path = substr($path, strlen($matches[0]));
 383
 384     // Look for operators
 385     // The sub-prefix can be arbitrary, so we match up until the first operator
 386     $operators = '(' . join('|', self::$downloadOperators) . ')';
 387     $pattern = '#(?P<subPrefix>.*?)/(?P<operator>' . $operators . ')/(?P<operand>.*)#';
 388     if (preg_match($pattern, $path, $matches) === 1) {
 389         $parts['subPrefix'] = substr($matches['subPrefix'], 1); // remove leading '/'
 390         $parts['operator'] = $matches['operator'];
 391         $parts['operand'] = $matches['operand'];
 392     } else {
 393         $parts['subPrefix'] = $path;
 394     }
 395
 396     return $parts;
 397   }
 398
 399 }
 400
 401 ?>