Calculate region in source image
[bookreader.git] / BookReaderIA / datanode / BookReaderImages.inc.php
index b553f8f..9957b40 100644 (file)
@@ -24,9 +24,11 @@ the MIME type is "image/jpeg".
     along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+require_once("BookReaderMeta.inc.php");
+
 class BookReaderImages
 {
-    public $MIMES = array('gif' => 'image/gif',
+    public static $MIMES = array('gif' => 'image/gif',
                    'jp2' => 'image/jp2',
                    'jpg' => 'image/jpeg',
                    'jpeg' => 'image/jpeg',
@@ -34,7 +36,7 @@ class BookReaderImages
                    'tif' => 'image/tiff',
                    'tiff' => 'image/tiff');
                    
-    public $EXTENSIONS = array('gif' => 'gif',
+    public static $EXTENSIONS = array('gif' => 'gif',
                         'jp2' => 'jp2',
                         'jpeg' => 'jpeg',
                         'jpg' => 'jpeg',
@@ -43,17 +45,33 @@ class BookReaderImages
                         'tiff' => 'tiff');
     
     // Width when generating thumbnails
-    public $imageSizes = array(
+    public static $imageSizes = array(
         'thumb' => 100,
-        'small' => 240,
-        'medium' => 500,
-        'large' => 1024,
+        'small' => 256,
+        'medium' => 512,
+        'large' => 2048,
+    );
+
+    // Keys in the image permalink urls, e.g. http://www.archive.org/download/itemid/page/cover_{keyval}_{keyval}.jpg
+    public static $imageUrlKeys = array(
+        //'r' => 'reduce', // pow of 2 reduction
+        's' => 'scale', // $$$ scale is downscaling factor in BookReaderImages but most people call this "reduce"
+        'region' => 'region',
+        'tile' => 'tile',
+        'w' => 'width',
+        'h' => 'height',
+        'x' => 'x',
+        'y' => 'y',
+        'rotate' => 'rotate'
     );
     
     // Paths to command-line tools
     var $exiftool = '/petabox/sw/books/exiftool/exiftool';
     var $kduExpand = '/petabox/sw/bin/kdu_expand';
     
+    // Name of temporary files, to be cleaned at exit
+    var $tempFiles = array();
+    
     /*
      * Serve an image request that requires looking up the book metadata
      *
@@ -71,7 +89,7 @@ class BookReaderImages
         try {
             $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
         } catch (Exception $e) {
-            $this->BRfatal($e->getMessage);
+            $this->BRfatal($e->getMessage());
         }
         
         $page = $_REQUEST['page'];
@@ -79,32 +97,41 @@ class BookReaderImages
         // Index of image to return
         $imageIndex = null;
 
-        // XXX deal with subPrefix
-        $pageInfo = $this->parsePageRequest($page);
-
-        // Parse requested page for page type, size and format options
-        if (preg_match('#^([^_]+)#', $page, $matches) === 0) {
-            // Unrecognized page specifier
-            $this->BRfatal('Unrecognized page specifier');
+        // deal with subPrefix
+        if ($_REQUEST['subPrefix']) {
+            $parts = explode('/', $_REQUEST['subPrefix']);
+            $bookId = $parts[count($parts) - 1 ];
+        } else {
+            $bookId = $_REQUEST['id'];
         }
-        $basePage = $matches[1];
         
+        $pageInfo = $this->parsePageRequest($page, $bookId);
+
+        $basePage = $pageInfo['type'];
+        
+        $leaf = null;
+        $region = null;
         switch ($basePage) {
+        
             case 'title':
                 if (! array_key_exists('titleIndex', $metadata)) {
                     $this->BRfatal("No title page asserted in book");
                 }
                 $imageIndex = $metadata['titleIndex'];
                 break;
-                
+            
+            /* Old 'cover' behaviour where it would show cover 0 if it exists or return 404.
+               Could be re-added as cover0, cover1, etc
             case 'cover':
                 if (! array_key_exists('coverIndices', $metadata)) {
                     $this->BRfatal("No cover asserted in book");
                 }
                 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
                 break;
-                
+            */
+            
             case 'preview':
+            case 'cover': // Show our best guess if cover is requested
                 // Preference is:
                 //   Cover page if book was published >= 1950
                 //   Title page
@@ -130,20 +157,52 @@ class BookReaderImages
                 $imageIndex = 0;
                 break;
                 
+            case 'n':
+                // Accessible index page
+                $imageIndex = intval($pageInfo['value']);
+                break;
+                
+            case 'page':
+                // Named page
+                $index = array_search($pageInfo['value'], $metadata['pageNums']);
+                if ($index === FALSE) {
+                    // Not found
+                    $this->BRfatal("Page not found");
+                    break;
+                }
+                
+                $imageIndex = $index;
+                break;
+                
+            case 'leaf':
+                // Leaf explicitly specified
+                $leaf = $pageInfo['value'];
+                break;
+                                
             default:
                 // Shouldn't be possible
-                $this->BRfatal("Couldn't find page");
+                $this->BRfatal("Unrecognized page type requested");
                 break;
                 
         }
         
-        $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
+        if (is_null($leaf)) {
+            // Leaf was not explicitly set -- look it up
+            $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
+        }
         
         $requestEnv = array(
             'zip' => $metadata['zip'],
             'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
-            'ext' => 'jpg',
+            'ext' => 'jpg', // XXX should pass through ext
         );
+        
+        // remove non-passthrough keys from pageInfo
+        unset($pageInfo['type']);
+        unset($pageInfo['value']);
+        
+        // add pageinfo to request
+        $requestEnv = array_merge($pageInfo, $requestEnv);
 
         // Return image data - will check privs        
         $this->serveRequest($requestEnv);
@@ -164,6 +223,7 @@ class BookReaderImages
      * Clean up temporary files
      */
      function serveRequest($requestEnv) {
+     
         // Process some of the request parameters
         $zipPath  = $requestEnv['zip'];
         $file     = $requestEnv['file'];
@@ -194,6 +254,20 @@ class BookReaderImages
         // Get the image size and depth
         $imageInfo = $this->getImageInfo($zipPath, $file);
         
+        $region = array();
+        foreach (array('x', 'y', 'w', 'h') as $key) {
+            if (array_key_exists($key, $requestEnv)) {
+                $region[$key] = $requestEnv[$key];
+            }
+        }
+        $regionDimensions = $this->getRegionDimensions($imageInfo, $region);    
+        
+        /* $$$ remove
+        print_r($imageInfo);
+        print_r($region);
+        print_r($regionDimensions);
+        */
+        
         // Output json if requested
         if ('json' == $ext) {
             // $$$ we should determine the output size first based on requested scale
@@ -228,7 +302,15 @@ class BookReaderImages
         $jpegOptions = '-quality 75';
         
         // The pbmreduce reduction factor produces an image with dimension 1/n
-        // The kakadu reduction factor produceds an image with dimension 1/(2^n)
+        // The kakadu reduction factor produces an image with dimension 1/(2^n)
+        
+        // We interpret the requested size and scale, look at image format, and determine the
+        // actual scaling to be returned to the client.  We generally return the largest
+        // power of 2 reduction that is larger than the requested size in order to reduce
+        // image processing load on our cluster.  The client should then scale to their final
+        // needed size.
+        
+        // Set scale from height or width if set
         if (isset($requestEnv['height'])) {
             $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
             $scale = pow(2, $powReduce);
@@ -237,13 +319,9 @@ class BookReaderImages
             $scale = pow(2, $powReduce);
 
         } else {
-            // $$$ could be cleaner
-            // Provide next smaller power of two reduction
-            $scale = $requestEnv['scale'];
-            if (!$scale) {
-                $scale = 1;
-            }
-            if (array_key_exists($scale, $this->imageSizes)) {
+            // Set scale from named size (e.g. 'large') if set
+            $size = $requestEnv['size'];
+            if ( $size && array_key_exists($size, self::$imageSizes)) {
                 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
                 if ($srcRatio > 1) {
                     // wide
@@ -251,17 +329,26 @@ class BookReaderImages
                 } else {
                     $dimension = 'height';
                 }
-                $powReduce = $this->nearestPow2Reduce($this->imageSizes[$scale], $imageInfo[$dimension]);
+                $powReduce = $this->nearestPow2Reduce(self::$imageSizes[$size], $imageInfo[$dimension]);
+                $scale = pow(2, $powReduce);
+                
             } else {
+                // No named size - use explicit scale, if given
+                $scale = $requestEnv['scale'];
+                if (!$scale) {
+                    $scale = 1;
+                }
                 $powReduce = $this->nearestPow2ForScale($scale);
-            }
-            $scale = pow(2, $powReduce);
+                // ensure integer scale
+                $scale = pow(2, $powReduce);
+            }            
         }
         
         // Override depending on source image format
         // $$$ consider doing a 302 here instead, to make better use of the browser cache
         // Limit scaling for 1-bit images.  See https://bugs.edge.launchpad.net/bookreader/+bug/486011
         if (1 == $imageInfo['bits']) {
+            
             if ($scale > 1) {
                 $scale /= 2;
                 $powReduce -= 1;
@@ -315,32 +402,37 @@ class BookReaderImages
         
         $filenameForClient = $this->filenameForClient($file, $ext);
         
-        $headers = array('Content-type: '. $MIMES[$ext], // XXX is nginx swallowing this?
+        $headers = array('Content-type: '. self::$MIMES[$ext],
                          'Cache-Control: max-age=15552000',
                          'Content-disposition: inline; filename=' . $filenameForClient);
                           
         
         $errorMessage = '';
+        
         if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
             // $$$ automated reporting
             trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
             
             // Try some content-specific recovery
-            $recovered = false;    
+            $recovered = false;
             if ($imageInfo['type'] == 'jp2') {
                 $records = $this->getJp2Records($zipPath, $file);
-                if ($powReduce > intval($records['Clevels'])) {
-                    $powReduce = $records['Clevels'];
-                    $reduce = pow(2, $powReduce);
+                if (array_key_exists('Clevels', $records)) {
+                    $maxReduce = intval($records['Clevels']);
+                    trigger_error("BookReader using max reduce $maxReduce from jp2 records");
                 } else {
-                    $reduce = 1;
-                    $powReduce = 0;
+                    $maxReduce = 0;
                 }
-                 
+                
+                $powReduce = min($powReduce, $maxReduce);
+                $reduce = pow(2, $powReduce);
+                
                 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
+                trigger_error('BookReader rerunning with new cmd: ' . $cmd, E_USER_WARNING);
                 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
                     $recovered = true;
                 } else {
+                    $this->cleanup();
                     trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
                 }
             }
@@ -350,9 +442,7 @@ class BookReaderImages
             }
         }
         
-        if (isset($tempFile)) {
-            unlink($tempFile);
-        }
+        $this->cleanup();
     }    
     
     function getUnarchiveCommand($archivePath, $file)
@@ -387,8 +477,8 @@ class BookReaderImages
     function imageExtensionToType($extension)
     {
         
-        if (array_key_exists($extension, $this->EXTENSIONS)) {
-            return $this->EXTENSIONS[$extension];
+        if (array_key_exists($extension, self::$EXTENSIONS)) {
+            return self::$EXTENSIONS[$extension];
         } else {
             $this->BRfatal('Unknown image extension');
         }            
@@ -522,6 +612,7 @@ class BookReaderImages
                 // We use the BookReaderTiff prefix to give a hint in case things don't
                 // get cleaned up.
                 $tempFile = tempnam("/tmp", "BookReaderTiff");
+                array_push($this->tempFiles, $tempFile);
             
                 // $$$ look at bit depth when reducing
                 $decompressCmd = 
@@ -584,15 +675,34 @@ class BookReaderImages
             $read = array($stdout, $stderr);
             $write = NULL;
             $except = NULL;
+            
             $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
             if (false === $numChanged) {
                 // select failed
                 $errorMessage = 'Select failed';
                 $retVal = false;
-            }
-            if ($read[0] == $stdout && (1 == $numChanged)) {
-                // Got output first on stdout (only)
-                // $$$ make sure we get all stdout
+                error_log('BookReader select failed!');
+            } else {            
+                if (in_array($stderr, $read)) {
+                    // Either content in stderr, or stderr is closed (could read 0 bytes)
+                    $error = stream_get_contents($stderr);
+                    if ($error) {
+                    
+                        $errorMessage = $error;
+                        $retVal = false;
+                        
+                        fclose($stderr);
+                        fclose($stdout);
+                        fclose($stdin);
+                        
+                        // It is important that you close any pipes before calling
+                        // proc_close in order to avoid a deadlock
+                        proc_close($process);
+                        return $retVal;             
+                    }
+                }
+                
                 $output = fopen('php://output', 'w');
                 foreach($headers as $header) {
                     header($header);
@@ -600,11 +710,6 @@ class BookReaderImages
                 stream_copy_to_stream($pipes[1], $output);
                 fclose($output); // okay since tied to special php://output
                 $retVal = true;
-            } else {
-                // Got output on stderr
-                // $$$ make sure we get all stderr
-                $errorMessage = stream_get_contents($stderr);
-                $retVal = false;
             }
     
             fclose($stderr);
@@ -624,10 +729,12 @@ class BookReaderImages
     }
     
     function BRfatal($string) {
+        $this->cleanup();
         throw new Exception("Image error: $string");
     }
     
     // Returns true if using a power node
+    // XXX change to "on red box" - not working for new Xeon
     function onPowerNode() {
         exec("lspci | fgrep -c Realtek", $output, $return);
         if ("0" != $output[0]) {
@@ -654,6 +761,9 @@ class BookReaderImages
     }
     
     function checkPrivs($filename) {
+        // $$$ we assume here that requests for the title, cover or preview
+        //     come in via BookReaderPreview.php which will be re-run with
+        //     privileges after we return the 403
         if (!is_readable($filename)) {
             header('HTTP/1.1 403 Forbidden');
             exit(0);
@@ -692,8 +802,12 @@ class BookReaderImages
      */
     function parsePageRequest($pageRequest, $bookPrefix) {
     
+        // Will hold parsed results
         $pageInfo = array();
         
+        // Normalize
+        $pageRequest = strtolower($pageRequest);
+        
         // Pull off extension
         if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
             $pageRequest = $matches[1];
@@ -724,7 +838,8 @@ class BookReaderImages
             'n' => 'num',
             'cover' => 'single',
             'preview' => 'single',
-            'title' => 'single'
+            'title' => 'single',
+            'leaf' => 'num'
         );
         
         // Look for known page types
@@ -750,24 +865,94 @@ class BookReaderImages
         
         // Look for other known parts
         foreach ($parts as $part) {
-            $start = substr($part, 0, 1);
+            if ( array_key_exists($part, self::$imageSizes) ) {
+                $pageInfo['size'] = $part;
+                continue;
+            }
+        
+            // Key must be alpha, value must start with digit and contain digits, alpha, ',' or '.'
+            // Should prevent injection of strange values into the redirect to datanode
+            if ( preg_match('#^([a-z]+)(\d[a-z0-9,.]*)#', $part, $matches) === 0) {
+                // Not recognized
+                continue;
+            }
             
-            switch ($start) {
-                case 't':
-                    $pageInfo['size'] = $start;
-                    break;
-                case 'r':
-                    $pageInfo['reduce'] = substr($part, 0);
-                    break;
-                default:
-                    // Unrecognized... just let it pass
-                    break;
+            $key = $matches[1];
+            $value = $matches[2];
+            
+            if ( array_key_exists($key, self::$imageUrlKeys) ) {
+                $pageInfo[self::$imageUrlKeys[$key]] = $value;
+                continue;
             }
+            
+            // If we hit here, was unrecognized (no action)
         }
         
         return $pageInfo;
     }
     
+    function getRegionDimensions($sourceDimensions, $regionDimensions) {
+        // Return region dimensions as { 'x' => xOffset, 'y' => yOffset, 'w' => width, 'h' => height }
+        // in terms of full resolution image.
+        // Note: this will clip the returned dimensions to fit within the source image
+
+        $sourceX = 0;
+        if (array_key_exists('x', $regionDimensions)) {
+            $sourceX = intAmount($regionDimensions['x'], $sourceDimensions['width']);
+        }
+        $sourceX = $this->clamp(0, $sourceDimensions['width'] - 2, $sourceX); // Allow at least one pixel
+        
+        $sourceY = 0;
+        if (array_key_exists('y', $regionDimensions)) {
+            $sourceY = intAmount($regionDimensions['y'], $sourceDimensions['height']);
+        }
+        $sourceY = $this->clamp(0, $sourceDimensions['height'] - 2, $sourceY); // Allow at least one pixel
+        
+        $sourceWidth = $sourceDimensions['width'] - $sourceX;
+        if (array_key_exists('w', $regionDimensions)) {
+            $sourceWidth = intAmount($regionDimensions['w'], $sourceDimensions['width']);
+        }
+        $sourceWidth = $this->clamp(1, max(1, $sourceDimensions['width'] - $sourceX), $sourceWidth);
+        
+        $sourceHeight = $sourceDimensions['height'] - $sourceY;
+        if (array_key_exists('h', $regionDimensions)) {
+            $sourceHeight = intAmount($regionDimensions['h'], $sourceDimensions['height']);
+        }
+        $sourceHeight = $this->clamp(1, max(1, $sourceDimensions['height'] - $sourceY), $sourceHeight);
+        
+        return array('x' => $sourceX, 'y' => $sourceY, 'w' => $sourceWidth, 'h' => $sourceHeight);
+    }
+    
+    function intAmount($stringValue, $maximum) {
+        // Returns integer amount for string like "5" (5 units) or "0.5" (50%)
+        if (strpos($stringValue, '.') === false) {
+            // No decimal, assume int
+            return intval($stringValue);
+        }
+        
+        return floatval($stringValue) * $maximum + 0.5;
+    }
+    
+    function clamp($minValue, $maxValue, $observedValue) {
+        if ($observedValue < $minValue) {
+            return $minValue;
+        }
+        
+        if ($observedValue > $maxValue) {
+            return $maxValue;
+        }
+        
+        return $observedValue;
+    }
+    
+    // Clean up temporary files and resources
+    function cleanup() {
+        foreach($this->tempFiles as $tempFile) {
+            unlink($tempFile);
+        }
+        $this->tempFiles = array();
+    }
+    
 }
 
 ?>
\ No newline at end of file