X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=BookReaderIA%2Fdatanode%2FBookReaderImages.inc.php;h=512cd6e67fdacf3724b0ca98b584604950d7356f;hb=2b57cdeb7b3fdbe80cb9277ba698f2fe0ad6556a;hp=493399995d3329d72c511302b39be665d2768fa2;hpb=9a9f84690fc774ddb0230b31cd238ecb092fd105;p=bookreader.git diff --git a/BookReaderIA/datanode/BookReaderImages.inc.php b/BookReaderIA/datanode/BookReaderImages.inc.php index 4933999..512cd6e 100644 --- a/BookReaderIA/datanode/BookReaderImages.inc.php +++ b/BookReaderIA/datanode/BookReaderImages.inc.php @@ -28,7 +28,7 @@ require_once("BookReaderMeta.inc.php"); class BookReaderImages { - public $MIMES = array('gif' => 'image/gif', + public static $MIMES = array('gif' => 'image/gif', 'jp2' => 'image/jp2', 'jpg' => 'image/jpeg', 'jpeg' => 'image/jpeg', @@ -36,7 +36,7 @@ class BookReaderImages 'tif' => 'image/tiff', 'tiff' => 'image/tiff'); - public $EXTENSIONS = array('gif' => 'gif', + public static $EXTENSIONS = array('gif' => 'gif', 'jp2' => 'jp2', 'jpeg' => 'jpeg', 'jpg' => 'jpeg', @@ -45,17 +45,33 @@ class BookReaderImages 'tiff' => 'tiff'); // Width when generating thumbnails - public $imageSizes = array( + public static $imageSizes = array( 'thumb' => 100, - 'small' => 240, - 'medium' => 500, - 'large' => 1024, + 'small' => 256, + 'medium' => 512, + 'large' => 2048, + ); + + // Keys in the image permalink urls, e.g. http://www.archive.org/download/itemid/page/cover_{keyval}_{keyval}.jpg + public static $imageUrlKeys = array( + //'r' => 'reduce', // pow of 2 reduction + 's' => 'scale', // $$$ scale is downscaling factor in BookReaderImages but most people call this "reduce" + 'region' => 'region', + 'tile' => 'tile', + 'w' => 'width', + 'h' => 'height', + 'x' => 'x', + 'y' => 'y', + 'rot' => 'rotate' ); // Paths to command-line tools var $exiftool = '/petabox/sw/books/exiftool/exiftool'; var $kduExpand = '/petabox/sw/bin/kdu_expand'; + // Name of temporary files, to be cleaned at exit + var $tempFiles = array(); + /* * Serve an image request that requires looking up the book metadata * @@ -73,7 +89,7 @@ class BookReaderImages try { $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']); } catch (Exception $e) { - $this->BRfatal($e->getMessage); + $this->BRfatal($e->getMessage()); } $page = $_REQUEST['page']; @@ -83,7 +99,7 @@ class BookReaderImages // deal with subPrefix if ($_REQUEST['subPrefix']) { - $parts = split('/', $_REQUEST['subPrefix']); + $parts = explode('/', $_REQUEST['subPrefix']); $bookId = $parts[count($parts) - 1 ]; } else { $bookId = $_REQUEST['id']; @@ -93,22 +109,29 @@ class BookReaderImages $basePage = $pageInfo['type']; + $leaf = null; + $region = null; switch ($basePage) { + case 'title': if (! array_key_exists('titleIndex', $metadata)) { $this->BRfatal("No title page asserted in book"); } $imageIndex = $metadata['titleIndex']; break; - + + /* Old 'cover' behaviour where it would show cover 0 if it exists or return 404. + Could be re-added as cover0, cover1, etc case 'cover': if (! array_key_exists('coverIndices', $metadata)) { $this->BRfatal("No cover asserted in book"); } $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers break; - + */ + case 'preview': + case 'cover': // Show our best guess if cover is requested // Preference is: // Cover page if book was published >= 1950 // Title page @@ -151,6 +174,11 @@ class BookReaderImages $imageIndex = $index; break; + case 'leaf': + // Leaf explicitly specified + $leaf = $pageInfo['value']; + break; + default: // Shouldn't be possible $this->BRfatal("Unrecognized page type requested"); @@ -158,18 +186,23 @@ class BookReaderImages } - $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']); + if (is_null($leaf)) { + // Leaf was not explicitly set -- look it up + $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']); + } $requestEnv = array( 'zip' => $metadata['zip'], 'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']), - 'ext' => 'jpg', + 'ext' => 'jpg', // XXX should pass through ext ); - if ($pageInfo['reduce']) { - $requestEnv['reduce'] = $pageInfo['reduce']; - } - // $$$ handle scale, other sizes, rotation, etc + // remove non-passthrough keys from pageInfo + unset($pageInfo['type']); + unset($pageInfo['value']); + + // add pageinfo to request + $requestEnv = array_merge($pageInfo, $requestEnv); // Return image data - will check privs $this->serveRequest($requestEnv); @@ -190,6 +223,7 @@ class BookReaderImages * Clean up temporary files */ function serveRequest($requestEnv) { + // Process some of the request parameters $zipPath = $requestEnv['zip']; $file = $requestEnv['file']; @@ -219,7 +253,7 @@ class BookReaderImages // Get the image size and depth $imageInfo = $this->getImageInfo($zipPath, $file); - + // Output json if requested if ('json' == $ext) { // $$$ we should determine the output size first based on requested scale @@ -254,22 +288,37 @@ class BookReaderImages $jpegOptions = '-quality 75'; // The pbmreduce reduction factor produces an image with dimension 1/n - // The kakadu reduction factor produceds an image with dimension 1/(2^n) - if (isset($requestEnv['height'])) { - $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']); - $scale = pow(2, $powReduce); - } else if (isset($requestEnv['width'])) { - $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']); - $scale = pow(2, $powReduce); + // The kakadu reduction factor produces an image with dimension 1/(2^n) + + // We interpret the requested size and scale, look at image format, and determine the + // actual scaling to be returned to the client. We generally return the largest + // power of 2 reduction that is larger than the requested size in order to reduce + // image processing load on our cluster. The client should then scale to their final + // needed size. + + // Sizing logic: + // If a named size is provided, we size the full image to that size + // If x or y is set, we interpret the supplied width/height as the size of image region to crop to + // If x and y are not set and both width and height are set, we size the full image "within" the width/height + // If x and y are not set and only one of width and height are set, we size the full image to that width or height + // If none of the above apply, we use the whole image + + // Crop region, if empty whole image is used + $region = array(); - } else { - // $$$ could be cleaner - // Provide next smaller power of two reduction + // Initialize scale + $scale = 1; + if (isset($requestEnv['scale'])) { $scale = $requestEnv['scale']; - if (!$scale) { - $scale = 1; - } - if (array_key_exists($scale, $this->imageSizes)) { + } + $powReduce = $this->nearestPow2ForScale($scale); + // ensure integer scale + $scale = pow(2, $powReduce); + + if ( isset($requestEnv['size']) ) { + // Set scale from named size (e.g. 'large') if set + $size = $requestEnv['size']; + if ( $size && array_key_exists($size, self::$imageSizes)) { $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']); if ($srcRatio > 1) { // wide @@ -277,17 +326,62 @@ class BookReaderImages } else { $dimension = 'height'; } - $powReduce = $this->nearestPow2Reduce($this->imageSizes[$scale], $imageInfo[$dimension]); + $powReduce = $this->nearestPow2Reduce(self::$imageSizes[$size], $imageInfo[$dimension]); + $scale = pow(2, $powReduce); + } + + } else if ( isset($requestEnv['x']) || isset($requestEnv['y']) ) { + // x,y is crop region origin, width,height is size of crop region + foreach (array('x', 'y', 'width', 'height') as $key) { + if (array_key_exists($key, $requestEnv)) { + $region[$key] = $requestEnv[$key]; + } + } + + } else if ( isset($requestEnv['width']) && isset($requestEnv['height']) ) { + // proportional scaling within requested width/height + $srcAspect = floatval($imageInfo['width']) / floatval($imageInfo['height']); + $fitAspect = floatval($requestEnv['width']) / floatval($requestEnv['height']); + + if ($srcAspect > $fitAspect) { + // Source image is wide compared to fit + $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']); } else { - $powReduce = $this->nearestPow2ForScale($scale); + $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']); } + $scale = pow(2, $poweReduce); + + } else if ( isset($requestEnv['width']) ) { + // Fit within width + $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']); + $scale = pow(2, $powReduce); + + } else if ( isset($requestEnv['height'])) { + // Fit within height + $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']); $scale = pow(2, $powReduce); } + + $regionDimensions = $this->getRegionDimensions($imageInfo, $region); + + /* + print('imageInfo'); + print_r($imageInfo); + print('region'); + print_r($region); + print('regionDimensions'); + print_r($regionDimensions); + print('asFloat'); + print_r($this->getRegionDimensionsAsFloat($imageInfo, $region)); + die(-1); + */ + // Override depending on source image format // $$$ consider doing a 302 here instead, to make better use of the browser cache // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011 if (1 == $imageInfo['bits']) { + if ($scale > 1) { $scale /= 2; $powReduce -= 1; @@ -309,8 +403,8 @@ class BookReaderImages $unzipCmd = $this->getUnarchiveCommand($zipPath, $file); - $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink); - + $decompressCmd = $this->getDecompressCmd($imageInfo, $powReduce, $rotate, $scale, $region, $stdoutLink); + // Non-integer scaling is currently disabled on the cluster // if (isset($_REQUEST['height'])) { // $cmd .= " | pnmscale -height {$_REQUEST['height']} "; @@ -341,32 +435,37 @@ class BookReaderImages $filenameForClient = $this->filenameForClient($file, $ext); - $headers = array('Content-type: '. $MIMES[$ext], // XXX is nginx swallowing this? + $headers = array('Content-type: '. self::$MIMES[$ext], 'Cache-Control: max-age=15552000', 'Content-disposition: inline; filename=' . $filenameForClient); $errorMessage = ''; + if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest // $$$ automated reporting trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING); // Try some content-specific recovery - $recovered = false; + $recovered = false; if ($imageInfo['type'] == 'jp2') { $records = $this->getJp2Records($zipPath, $file); - if ($powReduce > intval($records['Clevels'])) { - $powReduce = $records['Clevels']; - $reduce = pow(2, $powReduce); + if (array_key_exists('Clevels', $records)) { + $maxReduce = intval($records['Clevels']); + trigger_error("BookReader using max reduce $maxReduce from jp2 records"); } else { - $reduce = 1; - $powReduce = 0; + $maxReduce = 0; } - - $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd; + + $powReduce = min($powReduce, $maxReduce); + $reduce = pow(2, $powReduce); + + $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo, $powReduce, $rotate, $scale, $region, $stdoutLink) . $compressCmd; + trigger_error('BookReader rerunning with new cmd: ' . $cmd, E_USER_WARNING); if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest $recovered = true; } else { + $this->cleanup(); trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING); } } @@ -376,9 +475,7 @@ class BookReaderImages } } - if (isset($tempFile)) { - unlink($tempFile); - } + $this->cleanup(); } function getUnarchiveCommand($archivePath, $file) @@ -413,8 +510,8 @@ class BookReaderImages function imageExtensionToType($extension) { - if (array_key_exists($extension, $this->EXTENSIONS)) { - return $this->EXTENSIONS[$extension]; + if (array_key_exists($extension, self::$EXTENSIONS)) { + return self::$EXTENSIONS[$extension]; } else { $this->BRfatal('Unknown image extension'); } @@ -530,24 +627,27 @@ class BookReaderImages echo $jsonOutput; } - function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) { + function getDecompressCmd($srcInfo, $powReduce, $rotate, $scale, $region, $stdoutLink) { - switch ($imageType) { + switch ($srcInfo['type']) { case 'jp2': + $regionAsFloat = $this->getRegionDimensionsAsFloat($srcInfo, $region); + $regionString = sprintf("{%f,%f},{%f,%f}", $regionAsFloat['y'], $regionAsFloat['x'], $regionAsFloat['h'], $regionAsFloat['w']); $decompressCmd = - " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink; + " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -region $regionString -i /dev/stdin -o " . $stdoutLink; if ($this->decompressToBmp) { // We suppress output since bmptopnm always outputs on stderr $decompressCmd .= ' | (bmptopnm 2>/dev/null)'; } - break; - + break; +/* case 'tiff': // We need to create a temporary file for tifftopnm since it cannot // work on a pipe (the file must be seekable). // We use the BookReaderTiff prefix to give a hint in case things don't // get cleaned up. $tempFile = tempnam("/tmp", "BookReaderTiff"); + array_push($this->tempFiles, $tempFile); // $$$ look at bit depth when reducing $decompressCmd = @@ -561,6 +661,25 @@ class BookReaderImages case 'png': $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale); break; +*/ + + // Formats handled by ImageMagick + case 'tiff': + case 'jpeg': + case 'png': + $region = $this->getRegionDimensions($srcInfo, $region); + $regionString = sprintf('[%dx%d+%d+%d]', $region['w'], $region['h'], $region['x'], $region['y']); + + // The argument to ImageMagick's scale command is a "geometry". We pass in the new width/height + $scaleString = ' -scale ' . sprintf("%dx%d", $region['w'] / $scale, $region['h'] / $scale); + + $rotateString = ''; + if ($rotate && $rotate != '0') { + $rotateString = ' -rotate ' . $rotate; // was previously checked to be a known value + } + + $decompressCmd = ' | convert -' . $regionString . $scaleString . $rotateString . ' pnm:-'; + break; default: $this->BRfatal('Unknown image type: ' . $imageType); @@ -569,6 +688,7 @@ class BookReaderImages return $decompressCmd; } + // If the command has its initial output on stdout the headers will be emitted followed // by the stdout output. If initial output is on stderr an error message will be // returned. @@ -610,15 +730,34 @@ class BookReaderImages $read = array($stdout, $stderr); $write = NULL; $except = NULL; + $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout if (false === $numChanged) { // select failed $errorMessage = 'Select failed'; $retVal = false; - } - if ($read[0] == $stdout && (1 == $numChanged)) { - // Got output first on stdout (only) - // $$$ make sure we get all stdout + error_log('BookReader select failed!'); + } else { + if (in_array($stderr, $read)) { + // Either content in stderr, or stderr is closed (could read 0 bytes) + $error = stream_get_contents($stderr); + if ($error) { + + $errorMessage = $error; + $retVal = false; + + fclose($stderr); + fclose($stdout); + fclose($stdin); + + // It is important that you close any pipes before calling + // proc_close in order to avoid a deadlock + proc_close($process); + return $retVal; + + } + } + $output = fopen('php://output', 'w'); foreach($headers as $header) { header($header); @@ -626,11 +765,6 @@ class BookReaderImages stream_copy_to_stream($pipes[1], $output); fclose($output); // okay since tied to special php://output $retVal = true; - } else { - // Got output on stderr - // $$$ make sure we get all stderr - $errorMessage = stream_get_contents($stderr); - $retVal = false; } fclose($stderr); @@ -650,10 +784,12 @@ class BookReaderImages } function BRfatal($string) { + $this->cleanup(); throw new Exception("Image error: $string"); } // Returns true if using a power node + // XXX change to "on red box" - not working for new Xeon function onPowerNode() { exec("lspci | fgrep -c Realtek", $output, $return); if ("0" != $output[0]) { @@ -680,6 +816,9 @@ class BookReaderImages } function checkPrivs($filename) { + // $$$ we assume here that requests for the title, cover or preview + // come in via BookReaderPreview.php which will be re-run with + // privileges after we return the 403 if (!is_readable($filename)) { header('HTTP/1.1 403 Forbidden'); exit(0); @@ -754,20 +893,8 @@ class BookReaderImages 'n' => 'num', 'cover' => 'single', 'preview' => 'single', - 'title' => 'single' - ); - - $sizes = array( - 'large', 'thumb', 'medium', 'small', 'orig' - ); - - $keys = array( - 'r' => 'reduce', - 's' => 'scale', - 'region' => 'region', - 'tile' => 'tile', - 'w' => 'width', - 'h' => 'height' + 'title' => 'single', + 'leaf' => 'num' ); // Look for known page types @@ -793,7 +920,7 @@ class BookReaderImages // Look for other known parts foreach ($parts as $part) { - if ( in_array($part, $sizes) ) { + if ( array_key_exists($part, self::$imageSizes) ) { $pageInfo['size'] = $part; continue; } @@ -808,8 +935,8 @@ class BookReaderImages $key = $matches[1]; $value = $matches[2]; - if ( array_key_exists($key, $keys) ) { - $pageInfo[$keys[$key]] = $value; + if ( array_key_exists($key, self::$imageUrlKeys) ) { + $pageInfo[self::$imageUrlKeys[$key]] = $value; continue; } @@ -819,6 +946,111 @@ class BookReaderImages return $pageInfo; } + function getRegionDimensions($sourceDimensions, $regionDimensions) { + // Return region dimensions as { 'x' => xOffset, 'y' => yOffset, 'w' => width, 'h' => height } + // in terms of full resolution image. + // Note: this will clip the returned dimensions to fit within the source image + + $sourceX = 0; + if (array_key_exists('x', $regionDimensions)) { + $sourceX = $this->intAmount($regionDimensions['x'], $sourceDimensions['width']); + } + $sourceX = $this->clamp(0, $sourceDimensions['width'] - 2, $sourceX); // Allow at least one pixel + + $sourceY = 0; + if (array_key_exists('y', $regionDimensions)) { + $sourceY = $this->intAmount($regionDimensions['y'], $sourceDimensions['height']); + } + $sourceY = $this->clamp(0, $sourceDimensions['height'] - 2, $sourceY); // Allow at least one pixel + + $sourceWidth = $sourceDimensions['width'] - $sourceX; + if (array_key_exists('width', $regionDimensions)) { + $sourceWidth = $this->intAmount($regionDimensions['width'], $sourceDimensions['width']); + } + $sourceWidth = $this->clamp(1, max(1, $sourceDimensions['width'] - $sourceX), $sourceWidth); + + $sourceHeight = $sourceDimensions['height'] - $sourceY; + if (array_key_exists('height', $regionDimensions)) { + $sourceHeight = $this->intAmount($regionDimensions['height'], $sourceDimensions['height']); + } + $sourceHeight = $this->clamp(1, max(1, $sourceDimensions['height'] - $sourceY), $sourceHeight); + + return array('x' => $sourceX, 'y' => $sourceY, 'w' => $sourceWidth, 'h' => $sourceHeight); + } + + function getRegionDimensionsAsFloat($sourceDimensions, $regionDimensions) { + // Return region dimensions as { 'x' => xOffset, 'y' => yOffset, 'w' => width, 'h' => height } + // in terms of full resolution image. + // Note: this will clip the returned dimensions to fit within the source image + + $sourceX = 0; + if (array_key_exists('x', $regionDimensions)) { + $sourceX = $this->floatAmount($regionDimensions['x'], $sourceDimensions['width']); + } + $sourceX = $this->clamp(0.0, 1.0, $sourceX); + + $sourceY = 0; + if (array_key_exists('y', $regionDimensions)) { + $sourceY = $this->floatAmount($regionDimensions['y'], $sourceDimensions['height']); + } + $sourceY = $this->clamp(0.0, 1.0, $sourceY); + + $sourceWidth = 1 - $sourceX; + if (array_key_exists('width', $regionDimensions)) { + $sourceWidth = $this->floatAmount($regionDimensions['width'], $sourceDimensions['width']); + } + $sourceWidth = $this->clamp(0.0, 1.0, $sourceWidth); + + $sourceHeight = 1 - $sourceY; + if (array_key_exists('height', $regionDimensions)) { + $sourceHeight = $this->floatAmount($regionDimensions['height'], $sourceDimensions['height']); + } + $sourceHeight = $this->clamp(0.0, 1.0, $sourceHeight); + + return array('x' => $sourceX, 'y' => $sourceY, 'w' => $sourceWidth, 'h' => $sourceHeight); + } + + function intAmount($stringValue, $maximum) { + // Returns integer amount for string like "5" (5 units) or "0.5" (50%) + if (strpos($stringValue, '.') === false) { + // No decimal, assume int + return intval($stringValue); + } + + return floatval($stringValue) * $maximum + 0.5; + } + + function floatAmount($stringValue, $maximum) { + // Returns float amount (0.0 to 1.0) for string like "0.4" (40%) or "4" (40% if max is 10) + if (strpos($stringValue, ".") === false) { + // No decimal, assume int value out of maximum + return floatval($stringValue) / $maximum; + } + + // Given float - just pass through + return floatval($stringValue); + } + + function clamp($minValue, $maxValue, $observedValue) { + if ($observedValue < $minValue) { + return $minValue; + } + + if ($observedValue > $maxValue) { + return $maxValue; + } + + return $observedValue; + } + + // Clean up temporary files and resources + function cleanup() { + foreach($this->tempFiles as $tempFile) { + unlink($tempFile); + } + $this->tempFiles = array(); + } + } ?> \ No newline at end of file