Merge branch 'newui' of https://github.com/rajbot/bookreader into newui
[bookreader.git] / BookReaderIA / datanode / BookReaderImages.inc.php
index d0e2c79..74b7b13 100644 (file)
@@ -28,7 +28,7 @@ require_once("BookReaderMeta.inc.php");
 
 class BookReaderImages
 {
-    public $MIMES = array('gif' => 'image/gif',
+    public static $MIMES = array('gif' => 'image/gif',
                    'jp2' => 'image/jp2',
                    'jpg' => 'image/jpeg',
                    'jpeg' => 'image/jpeg',
@@ -36,7 +36,7 @@ class BookReaderImages
                    'tif' => 'image/tiff',
                    'tiff' => 'image/tiff');
                    
-    public $EXTENSIONS = array('gif' => 'gif',
+    public static $EXTENSIONS = array('gif' => 'gif',
                         'jp2' => 'jp2',
                         'jpeg' => 'jpeg',
                         'jpg' => 'jpeg',
@@ -45,17 +45,31 @@ class BookReaderImages
                         'tiff' => 'tiff');
     
     // Width when generating thumbnails
-    public $imageSizes = array(
+    public static $imageSizes = array(
         'thumb' => 100,
-        'small' => 240,
-        'medium' => 500,
-        'large' => 1024,
+        'small' => 256,
+        'medium' => 512,
+        'large' => 2048,
+    );
+
+    // Keys in the image permalink urls, e.g. http://www.archive.org/download/itemid/page/cover_{keyval}_{keyval}.jpg
+    public static $imageUrlKeys = array(
+        //'r' => 'reduce', // pow of 2 reduction
+        's' => 'scale', // $$$ scale is downscaling factor in BookReaderImages but most people call this "reduce"
+        'region' => 'region',
+        'tile' => 'tile',
+        'w' => 'width',
+        'h' => 'height',
+        'rotate' => 'rotate'
     );
     
     // Paths to command-line tools
     var $exiftool = '/petabox/sw/books/exiftool/exiftool';
     var $kduExpand = '/petabox/sw/bin/kdu_expand';
     
+    // Name of temporary files, to be cleaned at exit
+    var $tempFiles = array();
+    
     /*
      * Serve an image request that requires looking up the book metadata
      *
@@ -73,7 +87,7 @@ class BookReaderImages
         try {
             $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
         } catch (Exception $e) {
-            $this->BRfatal($e->getMessage);
+            $this->BRfatal($e->getMessage());
         }
         
         $page = $_REQUEST['page'];
@@ -93,22 +107,28 @@ class BookReaderImages
 
         $basePage = $pageInfo['type'];
         
+        $leaf = null;
         switch ($basePage) {
+        
             case 'title':
                 if (! array_key_exists('titleIndex', $metadata)) {
                     $this->BRfatal("No title page asserted in book");
                 }
                 $imageIndex = $metadata['titleIndex'];
                 break;
-                
+            
+            /* Old 'cover' behaviour where it would show cover 0 if it exists or return 404.
+               Could be re-added as cover0, cover1, etc
             case 'cover':
                 if (! array_key_exists('coverIndices', $metadata)) {
                     $this->BRfatal("No cover asserted in book");
                 }
                 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
                 break;
-                
+            */
+            
             case 'preview':
+            case 'cover': // Show our best guess if cover is requested
                 // Preference is:
                 //   Cover page if book was published >= 1950
                 //   Title page
@@ -151,6 +171,11 @@ class BookReaderImages
                 $imageIndex = $index;
                 break;
                 
+            case 'leaf':
+                // Leaf explicitly specified
+                $leaf = $pageInfo['value'];
+                break;
+                
             default:
                 // Shouldn't be possible
                 $this->BRfatal("Unrecognized page type requested");
@@ -158,13 +183,23 @@ class BookReaderImages
                 
         }
         
-        $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
+        if (is_null($leaf)) {
+            // Leaf was not explicitly set -- look it up
+            $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
+        }
         
         $requestEnv = array(
             'zip' => $metadata['zip'],
             'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
-            'ext' => 'jpg',
+            'ext' => 'jpg', // XXX should pass through ext
         );
+        
+        // remove non-passthrough keys from pageInfo
+        unset($pageInfo['type']);
+        unset($pageInfo['value']);
+        
+        // add pageinfo to request
+        $requestEnv = array_merge($pageInfo, $requestEnv);
 
         // Return image data - will check privs        
         $this->serveRequest($requestEnv);
@@ -185,6 +220,7 @@ class BookReaderImages
      * Clean up temporary files
      */
      function serveRequest($requestEnv) {
+     
         // Process some of the request parameters
         $zipPath  = $requestEnv['zip'];
         $file     = $requestEnv['file'];
@@ -249,7 +285,15 @@ class BookReaderImages
         $jpegOptions = '-quality 75';
         
         // The pbmreduce reduction factor produces an image with dimension 1/n
-        // The kakadu reduction factor produceds an image with dimension 1/(2^n)
+        // The kakadu reduction factor produces an image with dimension 1/(2^n)
+        
+        // We interpret the requested size and scale, look at image format, and determine the
+        // actual scaling to be returned to the client.  We generally return the largest
+        // power of 2 reduction that is larger than the requested size in order to reduce
+        // image processing load on our cluster.  The client should then scale to their final
+        // needed size.
+        
+        // Set scale from height or width if set
         if (isset($requestEnv['height'])) {
             $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
             $scale = pow(2, $powReduce);
@@ -258,13 +302,9 @@ class BookReaderImages
             $scale = pow(2, $powReduce);
 
         } else {
-            // $$$ could be cleaner
-            // Provide next smaller power of two reduction
-            $scale = $requestEnv['scale'];
-            if (!$scale) {
-                $scale = 1;
-            }
-            if (array_key_exists($scale, $this->imageSizes)) {
+            // Set scale from named size (e.g. 'large') if set
+            $size = $requestEnv['size'];
+            if ( $size && array_key_exists($size, self::$imageSizes)) {
                 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
                 if ($srcRatio > 1) {
                     // wide
@@ -272,17 +312,26 @@ class BookReaderImages
                 } else {
                     $dimension = 'height';
                 }
-                $powReduce = $this->nearestPow2Reduce($this->imageSizes[$scale], $imageInfo[$dimension]);
+                $powReduce = $this->nearestPow2Reduce(self::$imageSizes[$size], $imageInfo[$dimension]);
+                $scale = pow(2, $powReduce);
+                
             } else {
+                // No named size - use explicit scale, if given
+                $scale = $requestEnv['scale'];
+                if (!$scale) {
+                    $scale = 1;
+                }
                 $powReduce = $this->nearestPow2ForScale($scale);
-            }
-            $scale = pow(2, $powReduce);
+                // ensure integer scale
+                $scale = pow(2, $powReduce);
+            }            
         }
         
         // Override depending on source image format
         // $$$ consider doing a 302 here instead, to make better use of the browser cache
         // Limit scaling for 1-bit images.  See https://bugs.edge.launchpad.net/bookreader/+bug/486011
         if (1 == $imageInfo['bits']) {
+            
             if ($scale > 1) {
                 $scale /= 2;
                 $powReduce -= 1;
@@ -336,32 +385,37 @@ class BookReaderImages
         
         $filenameForClient = $this->filenameForClient($file, $ext);
         
-        $headers = array('Content-type: '. $MIMES[$ext], // XXX is nginx swallowing this?
+        $headers = array('Content-type: '. self::$MIMES[$ext],
                          'Cache-Control: max-age=15552000',
                          'Content-disposition: inline; filename=' . $filenameForClient);
                           
         
         $errorMessage = '';
+        
         if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
             // $$$ automated reporting
             trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
             
             // Try some content-specific recovery
-            $recovered = false;    
+            $recovered = false;
             if ($imageInfo['type'] == 'jp2') {
                 $records = $this->getJp2Records($zipPath, $file);
-                if ($powReduce > intval($records['Clevels'])) {
-                    $powReduce = $records['Clevels'];
-                    $reduce = pow(2, $powReduce);
+                if (array_key_exists('Clevels', $records)) {
+                    $maxReduce = intval($records['Clevels']);
+                    trigger_error("BookReader using max reduce $maxReduce from jp2 records");
                 } else {
-                    $reduce = 1;
-                    $powReduce = 0;
+                    $maxReduce = 0;
                 }
-                 
+                
+                $powReduce = min($powReduce, $maxReduce);
+                $reduce = pow(2, $powReduce);
+                
                 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
+                trigger_error('BookReader rerunning with new cmd: ' . $cmd, E_USER_WARNING);
                 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
                     $recovered = true;
                 } else {
+                    $this->cleanup();
                     trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
                 }
             }
@@ -371,9 +425,7 @@ class BookReaderImages
             }
         }
         
-        if (isset($tempFile)) {
-            unlink($tempFile);
-        }
+        $this->cleanup();
     }    
     
     function getUnarchiveCommand($archivePath, $file)
@@ -408,8 +460,8 @@ class BookReaderImages
     function imageExtensionToType($extension)
     {
         
-        if (array_key_exists($extension, $this->EXTENSIONS)) {
-            return $this->EXTENSIONS[$extension];
+        if (array_key_exists($extension, self::$EXTENSIONS)) {
+            return self::$EXTENSIONS[$extension];
         } else {
             $this->BRfatal('Unknown image extension');
         }            
@@ -543,6 +595,7 @@ class BookReaderImages
                 // We use the BookReaderTiff prefix to give a hint in case things don't
                 // get cleaned up.
                 $tempFile = tempnam("/tmp", "BookReaderTiff");
+                array_push($this->tempFiles, $tempFile);
             
                 // $$$ look at bit depth when reducing
                 $decompressCmd = 
@@ -605,15 +658,34 @@ class BookReaderImages
             $read = array($stdout, $stderr);
             $write = NULL;
             $except = NULL;
+            
             $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
             if (false === $numChanged) {
                 // select failed
                 $errorMessage = 'Select failed';
                 $retVal = false;
-            }
-            if ($read[0] == $stdout && (1 == $numChanged)) {
-                // Got output first on stdout (only)
-                // $$$ make sure we get all stdout
+                error_log('BookReader select failed!');
+            } else {            
+                if (in_array($stderr, $read)) {
+                    // Either content in stderr, or stderr is closed (could read 0 bytes)
+                    $error = stream_get_contents($stderr);
+                    if ($error) {
+                    
+                        $errorMessage = $error;
+                        $retVal = false;
+                        
+                        fclose($stderr);
+                        fclose($stdout);
+                        fclose($stdin);
+                        
+                        // It is important that you close any pipes before calling
+                        // proc_close in order to avoid a deadlock
+                        proc_close($process);
+                        return $retVal;             
+                    }
+                }
+                
                 $output = fopen('php://output', 'w');
                 foreach($headers as $header) {
                     header($header);
@@ -621,11 +693,6 @@ class BookReaderImages
                 stream_copy_to_stream($pipes[1], $output);
                 fclose($output); // okay since tied to special php://output
                 $retVal = true;
-            } else {
-                // Got output on stderr
-                // $$$ make sure we get all stderr
-                $errorMessage = stream_get_contents($stderr);
-                $retVal = false;
             }
     
             fclose($stderr);
@@ -645,10 +712,12 @@ class BookReaderImages
     }
     
     function BRfatal($string) {
+        $this->cleanup();
         throw new Exception("Image error: $string");
     }
     
     // Returns true if using a power node
+    // XXX change to "on red box" - not working for new Xeon
     function onPowerNode() {
         exec("lspci | fgrep -c Realtek", $output, $return);
         if ("0" != $output[0]) {
@@ -675,6 +744,9 @@ class BookReaderImages
     }
     
     function checkPrivs($filename) {
+        // $$$ we assume here that requests for the title, cover or preview
+        //     come in via BookReaderPreview.php which will be re-run with
+        //     privileges after we return the 403
         if (!is_readable($filename)) {
             header('HTTP/1.1 403 Forbidden');
             exit(0);
@@ -713,8 +785,12 @@ class BookReaderImages
      */
     function parsePageRequest($pageRequest, $bookPrefix) {
     
+        // Will hold parsed results
         $pageInfo = array();
         
+        // Normalize
+        $pageRequest = strtolower($pageRequest);
+        
         // Pull off extension
         if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
             $pageRequest = $matches[1];
@@ -745,7 +821,8 @@ class BookReaderImages
             'n' => 'num',
             'cover' => 'single',
             'preview' => 'single',
-            'title' => 'single'
+            'title' => 'single',
+            'leaf' => 'num'
         );
         
         // Look for known page types
@@ -771,24 +848,40 @@ class BookReaderImages
         
         // Look for other known parts
         foreach ($parts as $part) {
-            $start = substr($part, 0, 1);
+            if ( array_key_exists($part, self::$imageSizes) ) {
+                $pageInfo['size'] = $part;
+                continue;
+            }
+        
+            // Key must be alpha, value must start with digit and contain digits, alpha, ',' or '.'
+            // Should prevent injection of strange values into the redirect to datanode
+            if ( preg_match('#^([a-z]+)(\d[a-z0-9,.]*)#', $part, $matches) === 0) {
+                // Not recognized
+                continue;
+            }
             
-            switch ($start) {
-                case 't':
-                    $pageInfo['size'] = $start;
-                    break;
-                case 'r':
-                    $pageInfo['reduce'] = substr($part, 0);
-                    break;
-                default:
-                    // Unrecognized... just let it pass
-                    break;
+            $key = $matches[1];
+            $value = $matches[2];
+            
+            if ( array_key_exists($key, self::$imageUrlKeys) ) {
+                $pageInfo[self::$imageUrlKeys[$key]] = $value;
+                continue;
             }
+            
+            // If we hit here, was unrecognized (no action)
         }
         
         return $pageInfo;
     }
     
+    // Clean up temporary files and resources
+    function cleanup() {
+        foreach($this->tempFiles as $tempFile) {
+            unlink($tempFile);
+        }
+        $this->tempFiles = array();
+    }
+    
 }
 
 ?>
\ No newline at end of file