BookReaderIA/inc/BookReader.inc

   1 <?
   2
   3 /*
   4  * Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
   5  *
   6  * This file is part of BookReader.  The full source code can be found at GitHub:
   7  * http://github.com/openlibrary/bookreader
   8  *
   9  * Note: Edits to this file must pass through github.  To submit a patch to this
  10  *       file please contact mang via http://github.com/mangtronix or mang at archive dot org
  11  *       Direct changes to this file may get clobbered when the code is synchronized
  12  *       from github.
  13  */
  14
  15 class BookReader
  16 {
  17
  18   // Operators recognized in BookReader download URLs
  19   public static $downloadOperators = array('page');
  20
  21   // Returns true if can display the book in item with a given prefix (typically the item identifier)
  22   public static function canDisplay($item, $prefix, $checkOldScandata = false)
  23   {
  24
  25     // A "book" is an image stack and scandata.
  26     // 1. Old items may have scandata.xml or scandata.zip and itemid_{imageformat}.{zip,tar}
  27     // 2. Newer items may have multiple {arbitraryname}_scandata.xml and {arbitraryname}_{imageformat}.{zip,tar}
  28
  29     $foundScandata = false;
  30     $foundImageStack = false;
  31
  32     $targetScandata = $prefix . "_scandata.xml";
  33
  34     // $$$ TODO add support for jpg and tar stacks
  35     // https://bugs.edge.launchpad.net/gnubook/+bug/323003
  36     // https://bugs.edge.launchpad.net/gnubook/+bug/385397
  37     $imageFormatRegex = '@' . preg_quote($prefix, '@') . '_(jp2|tif|jpg)\.(zip|tar)$@';
  38
  39     $baseLength = strlen($item->metadataGrabber->mainDir . '/');
  40     foreach ($item->getFiles() as $location => $fileInfo) {
  41         $filename = substr($location, $baseLength);
  42
  43         if ($checkOldScandata) {
  44             if ($filename == 'scandata.xml' || $filename == 'scandata.zip') {
  45                 $foundScandata = $filename;
  46             }
  47         }
  48
  49         if ($filename == $targetScandata) {
  50             $foundScandata = $filename;
  51         }
  52
  53         if (preg_match($imageFormatRegex, $filename)) {
  54             $foundImageStack = $filename;
  55         }
  56     }
  57
  58     if ($foundScandata && $foundImageStack) {
  59         return true;
  60     }
  61
  62     return false;
  63   }
  64
  65   // Finds the prefix to use for the book given the part of the URL trailing after /stream/
  66   public static function findPrefix($urlPortion)
  67   {
  68     if (!preg_match('#[^/&?]+#', $urlPortion, $matches)) {
  69         // URL portion was empty or started with /, &, or ? -- no item identifier
  70         return false;
  71     }
  72
  73     $prefix = $matches[0]; // item identifier
  74
  75     // $$$ Currently swallows the rest of the URL.
  76     //     If we want to support e.g. /stream/itemid/subdir/prefix/page/23 will need to adjust.
  77     if (preg_match('#[^/&?]+/([^&?]+)#', $urlPortion, $matches)) {
  78         // Match is everything after item identifier and slash, up to end or ? or &
  79         // e.g. itemid/{match/these/parts}?foo=bar
  80         $prefix = $matches[1]; // sub prefix --
  81     }
  82
  83     return $prefix;
  84   }
  85
  86   // $$$ would be cleaner to use different templates instead of the uiMode param
  87   //
  88   // @param subprefix Optional prefix to display a book inside an item (e.g. if does not match identifier)
  89   public static function draw($server, $mainDir, $identifier, $subPrefix, $title,
  90                               $coverLeaf=null, $titleStart='Internet Archive', $uiMode='full')
  91   {
  92     // Set title to default if not set
  93     if (!$title) {
  94         $title = 'BookReader';
  95     }
  96
  97     $id = $identifier;
  98
  99     // manually update with Launchpad version number at each checkin so that browsers
 100     // do not use old cached version
 101     // see https://bugs.launchpad.net/gnubook/+bug/330748
 102     $version = "r28";
 103
 104     if (BookReader::getDevHost($server)) {
 105         // on dev host - add time to force reload
 106         $version .= '_' . time();
 107     }
 108
 109     if ("" == $id) {
 110         echo "No identifier specified!";
 111         die(-1);
 112     }
 113
 114     $metaURL = BookReader::jsMetadataURL($server, $identifier, $mainDir, $subPrefix);
 115
 116 ?>
 117 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 118 <html>
 119 <head>
 120     <meta name="viewport" content="width=device-width, maximum-scale=1.0" />
 121     <meta name="apple-mobile-web-app-capable" content="yes" />
 122     <title><? echo $title; ?></title>
 123 <!--[if lte IE 6]>
 124     <meta http-equiv="refresh" content="2; URL=/bookreader/browserunsupported.php?id=<? echo($id); ?>">
 125 <![endif]-->
 126     <link rel="stylesheet" type="text/css" href="/bookreader/BookReader.css?v=<? echo($version); ?>">
 127 <? if ($uiMode == "embed") { ?>
 128     <link rel="stylesheet" type="text/css" href="/bookreader/BookReaderEmbed.css?v=<? echo($version); ?>">
 129 <? } elseif ($uiMode == "touch") { ?>
 130     <link rel="stylesheet" type="text/css" href="/bookreader/touch/BookReaderTouch.css?v=<? echo($version); ?>">
 131 <? } /* uiMode */ ?>
 132     <script src="/includes/jquery-1.4.2.min.js" type="text/javascript"></script>
 133     <script type="text/javascript" src="/bookreader/jquery-ui-1.8.5.custom.min.js?v=<? echo($version); ?>"></script>
 134     <script type="text/javascript" src="http://www.archive.org/includes/analytics.js?v=2"></script>
 135     <script type="text/javascript" src="/bookreader/dragscrollable.js?v=<? echo($version); ?>"></script>
 136     <script type="text/javascript" src="/bookreader/jquery.colorbox-min.js"></script>
 137      <!-- THIS ALLOWS BEAUTYTIPS TO WORK ON IE -->
 138         <!--[if lt IE 9]>
 139         <script type="text/javascript" src="excanvas.compiled.js"></script>
 140         <![endif]-->
 141     <script type="text/javascript" src="/bookreader/jquery.bt.min.js"></script>
 142     <script type="text/javascript" src="/bookreader/BookReader.js?v=<? echo($version); ?>"></script>
 143     <script type="text/javascript" src="/bookreader/soundmanager/soundmanager2.js?v=<? echo($version); ?>"></script>
 144     <script>
 145         soundManager.debugMode = false;
 146         soundManager.url = '/bookreader/soundmanager/swf/';
 147         soundManager.useHTML5Audio = true;
 148         soundManager.flashVersion = 9; //flash 8 version of swf is buggy when calling play() on a sound that is still loading
 149     </script>
 150 </head>
 151 <body style="background-color: ##939598;">
 152
 153 <?
 154 /*
 155 // <? if ($uiMode == 'full') { ?>
 156 // <div id="BookReader" style="left:10px; right:200px; top:10px; bottom:2em;">Internet Archive BookReader <noscript>requires JavaScript to be enabled.</noscript></div>
 157 // <? } else { ?>
 158 // <div id="BookReader" style="left:0; right:0; top:0; bottom:0; border:0">Internet Archive Bookreader <noscript>requires JavaScript to be enabled.</noscript></div>
 159 // <? } ?>
 160 */
 161 ?>
 162
 163 <div id="BookReader">Internet Archive BookReader <noscript>requires JavaScript to be enabled.</noscript></div>
 164
 165 <script type="text/javascript">
 166   // Set some config variables -- $$$ NB: Config object format has not been finalized
 167   var brConfig = {};
 168 <? if ($uiMode == 'embed') { ?>
 169   brConfig["mode"] = 1;
 170   brConfig["reduce"] = 8;
 171   brConfig["ui"] = "embed";
 172 <? } else { ?>
 173   brConfig["mode"] = 2;
 174 <? } ?>
 175 </script>
 176 <!-- The script included below is dynamically generated JavaScript that includes the book metadata and page image access functions -->
 177 <script type="text/javascript" src="<? echo($metaURL); ?>"></script>
 178
 179 <script type="text/javascript">
 180     // $$$ hack to workaround sizing bug when starting in two-up mode
 181     $(document).ready(function() {
 182         $(window).trigger('resize');
 183     });
 184
 185     // Usage stats
 186     if(window.archive_analytics) { window.archive_analytics.values['bookreader'] = 'open'}; </script>
 187 </script>
 188   <?
 189     exit;
 190   }
 191
 192   // Returns the user part of dev host from URL, or null
 193   public static function getDevHost($server)
 194   {
 195       if (preg_match("/^www-(\w+)/", $_SERVER["SERVER_NAME"], $match)) {
 196         return $match[1];
 197       }
 198
 199       return null;
 200   }
 201
 202
 203   public static function serverBaseURL($server)
 204   {
 205       // Check if we're on a dev vhost and point to JSIA in the user's public_html
 206       // on the datanode
 207       // $$$ the remapping isn't totally automatic yet and requires user to
 208       //     ln -s ~/petabox/www/datanode/BookReader ~/public_html/BookReader
 209       //     so we enable it only for known hosts
 210       $devhost = BookReader::getDevHost($server);
 211       $devhosts = array('mang', 'testflip', 'rkumar');
 212       if (in_array($devhost, $devhosts)) {
 213         $server = $server . "/~" . $devhost;
 214       }
 215       return $server;
 216   }
 217
 218
 219   public static function jsMetadataURL($server, $identifier, $mainDir, $subPrefix = '')
 220   {
 221     $serverBaseURL = BookReader::serverBaseURL($server);
 222
 223     $params = array( 'id' => $identifier, 'itemPath' => $mainDir, 'server' => $server );
 224     if ($subPrefix) {
 225         $params['subPrefix'] = $subPrefix;
 226     }
 227
 228     $keys = array_keys($params);
 229     $lastParam = end($keys);
 230     $url = "http://{$serverBaseURL}/BookReader/BookReaderJSIA.php?";
 231     foreach($params as $param=>$value) {
 232         $url .= $param . '=' . $value;
 233         if ($param != $lastParam) {
 234             $url .= '&';
 235         }
 236     }
 237
 238     return $url;
 239   }
 240
 241   // Return the URL for the requested /download/$path, or null
 242   public static function getURL($path, $item) {
 243     // $path should look like {itemId}/{operator}/{filename}
 244     // Other operators may be added
 245
 246     $urlParts = BookReader::parsePath($path);
 247
 248     // Check for non-handled cases
 249     $required = array('identifier', 'operator', 'operand');
 250     foreach ($required as $key) {
 251         if (!array_key_exists($key, $urlParts)) {
 252             return null;
 253         }
 254     }
 255
 256     $identifier = $urlParts['identifier'];
 257     $operator = $urlParts['operator'];
 258     $filename = $urlParts['operand'];
 259     $subPrefix = $urlParts['subPrefix'];
 260
 261     $serverBaseURL = BookReader::serverBaseURL($item->getServer());
 262
 263     // Baseline query params
 264     $query = array(
 265         'id' => $identifier,
 266         'itemPath' => $item->getMainDir(),
 267         'server' => $serverBaseURL
 268     );
 269     if ($subPrefix) {
 270         $query['subPrefix'] = $subPrefix;
 271     }
 272
 273     switch ($operator) {
 274         case 'page':
 275
 276             // Look for old-style preview request - e.g. {identifier}_cover.jpg
 277             if (preg_match('/^(.*)_((cover|title|preview).*)/', $filename, $matches) === 1) {
 278                 // Serve preview image
 279                 $page = $matches[2];
 280                 $query['page'] = $page;
 281                 return 'http://' . $serverBaseURL . '/BookReader/BookReaderPreview.php?' . http_build_query($query, '', '&');
 282             }
 283
 284             // New-style preview request - e.g. cover_thumb.jpg
 285             if (preg_match('/^(cover|title|preview)/', $filename, $matches) === 1) {
 286                 $query['page'] = $filename;
 287                 return 'http://' . $serverBaseURL . '/BookReader/BookReaderPreview.php?' . http_build_query($query, '', '&');
 288             }
 289
 290             // Asking for a non-preview page
 291             $query['page'] = $filename;
 292             return 'http://' . $serverBaseURL . '/BookReader/BookReaderImages.php?' . http_build_query($query, '', '&');
 293
 294         default:
 295             // Unknown operator
 296             return null;
 297     }
 298
 299     return null; // was not handled
 300   }
 301
 302   public static function browserFromUserAgent($userAgent) {
 303       $browserPatterns = array(
 304           'ipad' => '/iPad/',
 305           'iphone' => '/iPhone/', // Also cover iPod Touch
 306           'android' => '/Android/',
 307       );
 308
 309       foreach ($browserPatterns as $browser => $pattern) {
 310           if (preg_match($pattern, $userAgent)) {
 311               return $browser;
 312           }
 313       }
 314       return null;
 315   }
 316
 317
 318   // $$$ Ideally we will not rely on user agent, but for the moment we do
 319   public static function paramsFromUserAgent($userAgent) {
 320       // $$$ using 'embed' here for devices with assumed small screens -- really should just use CSS3 media queries
 321       $browserParams = array(
 322           'ipad' => array( 'ui' => 'touch' ),
 323           'iphone' => array( 'ui' => 'embed', 'mode' => '1up' ),
 324           'android' => array( 'ui' => 'embed', 'mode' => '1up' ),
 325       );
 326
 327       $browser = BookReader::browserFromUserAgent($userAgent);
 328       if ($browser) {
 329           return $browserParams[$browser];
 330       }
 331       return array();
 332   }
 333
 334   public static function parsePath($path) {
 335     // Parse the BookReader path and return the parts
 336     // e.g. itemid/some/sub/dir/page/cover.jpg -> array( 'identifier' => 'itemid', 'subPrefix' => 'some/sub/dir',
 337     //            'operator' => 'page', 'filename' => 'cover.jpg')
 338
 339     $parts = array();
 340
 341     // Pull off query, e.g. ?foo=bar
 342     if (preg_match('#(.*?)(\?.*)#', $path, $matches) === 1) {
 343         $parts['query'] = $matches[2];
 344         $path = $matches[1];
 345     }
 346
 347     // Pull off identifier
 348     if (preg_match('#[^/&?]+#', $path, $matches) === 0) {
 349         // no match
 350         return $parts;
 351     }
 352     $parts['identifier'] = $matches[0];
 353     $path = substr($path, strlen($matches[0]));
 354
 355     // Look for operators
 356     // The sub-prefix can be arbitrary, so we match up until the first operator
 357     $operators = '(' . join('|', self::$downloadOperators) . ')';
 358     $pattern = '#(?P<subPrefix>.*?)/(?P<operator>' . $operators . ')/(?P<operand>.*)#';
 359     if (preg_match($pattern, $path, $matches) === 1) {
 360         $parts['subPrefix'] = substr($matches['subPrefix'], 1); // remove leading '/'
 361         $parts['operator'] = $matches['operator'];
 362         $parts['operand'] = $matches['operand'];
 363     } else {
 364         $parts['subPrefix'] = $path;
 365     }
 366
 367     return $parts;
 368   }
 369
 370 }
 371
 372 ?>