BookReaderIA/inc/BookReader.inc

   1 <?
   2
   3 /*
   4  *
   5
   6 /*
   7  * Note: Edits to this file must pass through github.  To submit a patch to this
   8  *       file please contact mang at archive dot org or http://github.com/mangtronix
   9  *       Direct changes to this file may get clobbered when the code is synchronized
  10  *       from github.
  11  */
  12
  13 class BookReader
  14 {
  15
  16   // Operators recognized in BookReader download URLs
  17   public static $downloadOperators = array('page');
  18
  19   // Returns true if can display the book in item with a given prefix (typically the item identifier)
  20   public static function canDisplay($item, $prefix, $checkOldScandata = false)
  21   {
  22
  23     // A "book" is an image stack and scandata.
  24     // 1. Old items may have scandata.xml or scandata.zip and itemid_{imageformat}.{zip,tar}
  25     // 2. Newer items may have multiple {arbitraryname}_scandata.xml and {arbitraryname}_{imageformat}.{zip,tar}
  26
  27     $foundScandata = false;
  28     $foundImageStack = false;
  29
  30     $targetScandata = $prefix . "_scandata.xml";
  31
  32     // $$$ TODO add support for jpg and tar stacks
  33     // https://bugs.edge.launchpad.net/gnubook/+bug/323003
  34     // https://bugs.edge.launchpad.net/gnubook/+bug/385397
  35     $imageFormatRegex = '@' . preg_quote($prefix, '@') . '_(jp2|tif|jpg)\.(zip|tar)$@';
  36
  37     $baseLength = strlen($item->metadataGrabber->mainDir . '/');
  38     foreach ($item->getFiles() as $location => $fileInfo) {
  39         $filename = substr($location, $baseLength);
  40
  41         if ($checkOldScandata) {
  42             if ($filename == 'scandata.xml' || $filename == 'scandata.zip') {
  43                 $foundScandata = $filename;
  44             }
  45         }
  46
  47         if ($filename == $targetScandata) {
  48             $foundScandata = $filename;
  49         }
  50
  51         if (preg_match($imageFormatRegex, $filename)) {
  52             $foundImageStack = $filename;
  53         }
  54     }
  55
  56     if ($foundScandata && $foundImageStack) {
  57         return true;
  58     }
  59
  60     return false;
  61   }
  62
  63   // Finds the prefix to use for the book given the part of the URL trailing after /stream/
  64   public static function findPrefix($urlPortion)
  65   {
  66     if (!preg_match('#[^/&?]+#', $urlPortion, $matches)) {
  67         // URL portion was empty or started with /, &, or ? -- no item identifier
  68         return false;
  69     }
  70
  71     $prefix = $matches[0]; // item identifier
  72
  73     // $$$ Currently swallows the rest of the URL.
  74     //     If we want to support e.g. /stream/itemid/subdir/prefix/page/23 will need to adjust.
  75     if (preg_match('#[^/&?]+/([^&?]+)#', $urlPortion, $matches)) {
  76         // Match is everything after item identifier and slash, up to end or ? or &
  77         // e.g. itemid/{match/these/parts}?foo=bar
  78         $prefix = $matches[1]; // sub prefix --
  79     }
  80
  81     return $prefix;
  82   }
  83
  84   // $$$ would be cleaner to use different templates instead of the uiMode param
  85   //
  86   // @param subprefix Optional prefix to display a book inside an item (e.g. if does not match identifier)
  87   public static function draw($server, $mainDir, $identifier, $subPrefix, $title,
  88                               $coverLeaf=null, $titleStart='Internet Archive', $uiMode='full')
  89   {
  90     // Set title to default if not set
  91     if (!$title) {
  92         $title = 'BookReader';
  93     }
  94
  95     $id = $identifier;
  96
  97     // manually update with Launchpad version number at each checkin so that browsers
  98     // do not use old cached version
  99     // see https://bugs.launchpad.net/gnubook/+bug/330748
 100     $version = "imageurls";
 101
 102     if (BookReader::getDevHost($server)) {
 103         // on dev host - add time to force reload
 104         $version .= '_' . time();
 105     }
 106
 107     if ("" == $id) {
 108         echo "No identifier specified!";
 109         die(-1);
 110     }
 111
 112     $metaURL = BookReader::jsMetadataURL($server, $identifier, $mainDir, $subPrefix);
 113
 114 ?>
 115 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 116 <html>
 117 <head>
 118     <meta name="viewport" content="width=device-width, maximum-scale=1.0" />
 119     <meta name="apple-mobile-web-app-capable" content="yes" />
 120     <title><? echo $title; ?></title>
 121 <!--[if lte IE 6]>
 122     <meta http-equiv="refresh" content="2; URL=/bookreader/browserunsupported.php?id=<? echo($id); ?>">
 123 <![endif]-->
 124     <link rel="stylesheet" type="text/css" href="/bookreader/BookReader.css?v=<? echo($version); ?>">
 125 <? if ($uiMode == "embed") { ?>
 126     <link rel="stylesheet" type="text/css" href="/bookreader/BookReaderEmbed.css?v=<? echo($version); ?>">
 127 <? } elseif ($uiMode == "touch") { ?>
 128     <link rel="stylesheet" type="text/css" href="/bookreader/touch/BookReaderTouch.css?v=<? echo($version); ?>">
 129 <? } /* uiMode */ ?>
 130     <script src="/includes/jquery-1.4.2.min.js" type="text/javascript"></script>
 131     <script type="text/javascript" src="/bookreader/jquery-ui-1.8.1.custom.min.js?v=<? echo($version); ?>"></script>
 132     <script type="text/javascript" src="/bookreader/dragscrollable.js?v=<? echo($version); ?>"></script>
 133     <script type="text/javascript" src="/bookreader/BookReader.js?v=<? echo($version); ?>"></script>
 134     <script type="text/javascript" src="/bookreader/soundmanager/soundmanager2.js?v=<? echo($version); ?>"></script>
 135     <script>
 136         soundManager.debugMode = false;
 137         soundManager.url = '/bookreader/soundmanager/swf/';
 138         soundManager.useHTML5Audio = true;
 139     </script>
 140 </head>
 141 <body style="background-color: #FFFFFF;">
 142
 143 <? if ($uiMode == 'full') { ?>
 144 <div id="BookReader" style="left:10px; right:200px; top:10px; bottom:2em;">Internet Archive BookReader <noscript>requires JavaScript to be enabled.</noscript></div>
 145 <? } else { ?>
 146 <div id="BookReader" style="left:0; right:0; top:0; bottom:0; border:0">Internet Archive Bookreader <noscript>requires JavaScript to be enabled.</noscript></div>
 147 <? } /* uiMode*/ ?>
 148
 149 <script type="text/javascript">
 150   // Set some config variables -- $$$ NB: Config object format has not been finalized
 151   var brConfig = {};
 152 <? if ($uiMode == 'embed') { ?>
 153   brConfig["mode"] = 1;
 154   brConfig["reduce"] = 8;
 155   brConfig["ui"] = "embed";
 156 <? } else { ?>
 157   brConfig["mode"] = 2;
 158 <? } ?>
 159 </script>
 160 <!-- The script included below is dynamically generated JavaScript that includes the book metadata and page image access functions -->
 161 <script type="text/javascript" src="<? echo($metaURL); ?>"></script>
 162
 163 <? if ($uiMode == 'full') { ?>
 164 <div id="BookReaderSearch" style="width:190px; right:0px; top:10px; bottom:2em;">
 165     <form action='javascript:' onsubmit="br.search($('#BookReaderSearchBox').val());">
 166         <p style="display: inline">
 167             <input id="BookReaderSearchBox" type="text" size="20" value="search..." onfocus="if('search...'==this.value)this.value='';" /><input type="submit" value="go" />
 168         </p>
 169     </form>
 170     <div id="BookReaderSearchResults">
 171         Search results
 172     </div>
 173 </div>
 174
 175
 176 <div id="BRfooter">
 177     <div class="BRlogotype">
 178         <a href="http://archive.org/" class="BRblack">Internet Archive</a>
 179     </div>
 180     <div class="BRnavlinks">
 181         <!-- <a class="BRblack" href="http://openlibrary.org/dev/docs/bookreader">About the Bookreader</a> | -->
 182         <a class="BRblack" href="http://www.archive.org/about/faqs.php#Report_Item">Content Problems</a> |
 183         <a class="BRblack" href="https://bugs.launchpad.net/bookreader/+filebug">Report Bugs</a> |
 184         <a class="BRblack" href="http://www.archive.org/details/texts">Texts Collection</a> |
 185         <a class="BRblack" href="http://www.archive.org/about/contact.php">Contact Us</a>
 186     </div>
 187 </div>
 188 <? } /* uiMode */ ?>
 189
 190 <script type="text/javascript">
 191     // $$$ hack to workaround sizing bug when starting in two-up mode
 192     $(document).ready(function() {
 193         $(window).trigger('resize');
 194     });
 195 </script>
 196   <?
 197     exit;
 198   }
 199
 200   // Returns the user part of dev host from URL, or null
 201   public static function getDevHost($server)
 202   {
 203       if (preg_match("/^www-(\w+)/", $_SERVER["SERVER_NAME"], $match)) {
 204         return $match[1];
 205       }
 206
 207       return null;
 208   }
 209
 210
 211   public static function serverBaseURL($server)
 212   {
 213       // Check if we're on a dev vhost and point to JSIA in the user's public_html
 214       // on the datanode
 215       // $$$ the remapping isn't totally automatic yet and requires user to
 216       //     ln -s ~/petabox/www/datanode/BookReader ~/public_html/BookReader
 217       //     so we enable it only for known hosts
 218       $devhost = BookReader::getDevHost($server);
 219       $devhosts = array('mang', 'testflip', 'rkumar');
 220       if (in_array($devhost, $devhosts)) {
 221         $server = $server . "/~" . $devhost;
 222       }
 223       return $server;
 224   }
 225
 226
 227   public static function jsMetadataURL($server, $identifier, $mainDir, $subPrefix = '')
 228   {
 229     $serverBaseURL = BookReader::serverBaseURL($server);
 230
 231     $params = array( 'id' => $identifier, 'itemPath' => $mainDir, 'server' => $server );
 232     if ($subPrefix) {
 233         $params['subPrefix'] = $subPrefix;
 234     }
 235
 236     $keys = array_keys($params);
 237     $lastParam = end($keys);
 238     $url = "http://{$serverBaseURL}/BookReader/BookReaderJSIA.php?";
 239     foreach($params as $param=>$value) {
 240         $url .= $param . '=' . $value;
 241         if ($param != $lastParam) {
 242             $url .= '&';
 243         }
 244     }
 245
 246     return $url;
 247   }
 248
 249   // Return the URL for the requested /download/$path, or null
 250   public static function getURL($path, $item) {
 251     // $path should look like {itemId}/{operator}/{filename}
 252     // Other operators may be added
 253
 254     $urlParts = BookReader::parsePath($path);
 255
 256     // Check for non-handled cases
 257     $required = array('identifier', 'operator', 'operand');
 258     foreach ($required as $key) {
 259         if (!array_key_exists($key, $urlParts)) {
 260             return null;
 261         }
 262     }
 263
 264     $identifier = $urlParts['identifier'];
 265     $operator = $urlParts['operator'];
 266     $filename = $urlParts['operand'];
 267     $subPrefix = $urlParts['subPrefix'];
 268
 269     $serverBaseURL = BookReader::serverBaseURL($item->getServer());
 270
 271     // Baseline query params
 272     $query = array(
 273         'id' => $identifier,
 274         'itemPath' => $item->getMainDir(),
 275         'server' => $serverBaseURL
 276     );
 277     if ($subPrefix) {
 278         $query['subPrefix'] = $subPrefix;
 279     }
 280
 281     switch ($operator) {
 282         case 'page':
 283
 284             // Look for old-style preview request - e.g. {identifier}_cover.jpg
 285             if (preg_match('/^(.*)_((cover|title|preview).*)/', $filename, $matches) === 1) {
 286                 // Serve preview image
 287                 $page = $matches[2];
 288                 $query['page'] = $page;
 289                 return 'http://' . $serverBaseURL . '/BookReader/BookReaderPreview.php?' . http_build_query($query, '', '&');
 290             }
 291
 292             // New-style preview request - e.g. cover_thumb.jpg
 293             if (preg_match('/^(cover|title|preview)/', $filename, $matches) === 1) {
 294                 $query['page'] = $filename;
 295                 return 'http://' . $serverBaseURL . '/BookReader/BookReaderPreview.php?' . http_build_query($query, '', '&');
 296             }
 297
 298             // Asking for a non-preview page
 299             $query['page'] = $filename;
 300             return 'http://' . $serverBaseURL . '/BookReader/BookReaderImages.php?' . http_build_query($query, '', '&');
 301
 302         default:
 303             // Unknown operator
 304             return null;
 305     }
 306
 307     return null; // was not handled
 308   }
 309
 310   public static function browserFromUserAgent($userAgent) {
 311       $browserPatterns = array(
 312           'ipad' => '/iPad/',
 313           'iphone' => '/iPhone/', // Also cover iPod Touch
 314           'android' => '/Android/',
 315       );
 316
 317       foreach ($browserPatterns as $browser => $pattern) {
 318           if (preg_match($pattern, $userAgent)) {
 319               return $browser;
 320           }
 321       }
 322       return null;
 323   }
 324
 325
 326   // $$$ Ideally we will not rely on user agent, but for the moment we do
 327   public static function paramsFromUserAgent($userAgent) {
 328       // $$$ using 'embed' here for devices with assumed small screens -- really should just use CSS3 media queries
 329       $browserParams = array(
 330           'ipad' => array( 'ui' => 'touch' ),
 331           'iphone' => array( 'ui' => 'embed', 'mode' => '1up' ),
 332           'android' => array( 'ui' => 'embed', 'mode' => '1up' ),
 333       );
 334
 335       $browser = BookReader::browserFromUserAgent($userAgent);
 336       if ($browser) {
 337           return $browserParams[$browser];
 338       }
 339       return array();
 340   }
 341
 342   public static function parsePath($path) {
 343     // Parse the BookReader path and return the parts
 344     // e.g. itemid/some/sub/dir/page/cover.jpg -> array( 'identifier' => 'itemid', 'subPrefix' => 'some/sub/dir',
 345     //            'operator' => 'page', 'filename' => 'cover.jpg')
 346
 347     $parts = array();
 348
 349     // Pull off query, e.g. ?foo=bar
 350     if (preg_match('#(.*?)(\?.*)#', $path, $matches) === 1) {
 351         $parts['query'] = $matches[2];
 352         $path = $matches[1];
 353     }
 354
 355     // Pull off identifier
 356     if (preg_match('#[^/&?]+#', $path, $matches) === 0) {
 357         // no match
 358         return $parts;
 359     }
 360     $parts['identifier'] = $matches[0];
 361     $path = substr($path, strlen($matches[0]));
 362
 363     // Look for operators
 364     // The sub-prefix can be arbitrary, so we match up until the first operator
 365     $operators = '(' . join('|', self::$downloadOperators) . ')';
 366     $pattern = '#(?P<subPrefix>.*?)/(?P<operator>' . $operators . ')/(?P<operand>.*)#';
 367     if (preg_match($pattern, $path, $matches) === 1) {
 368         $parts['subPrefix'] = substr($matches['subPrefix'], 1); // remove leading '/'
 369         $parts['operator'] = $matches['operator'];
 370         $parts['operand'] = $matches['operand'];
 371     } else {
 372         $parts['subPrefix'] = $path;
 373     }
 374
 375     return $parts;
 376   }
 377
 378 }
 379
 380 ?>