4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 require_once("BookReaderMeta.inc.php");
29 class BookReaderImages
31 public static $MIMES = array('gif' => 'image/gif',
33 'jpg' => 'image/jpeg',
34 'jpeg' => 'image/jpeg',
36 'tif' => 'image/tiff',
37 'tiff' => 'image/tiff');
39 public static $EXTENSIONS = array('gif' => 'gif',
47 // Width when generating thumbnails
48 public static $imageSizes = array(
55 // Keys in the image permalink urls, e.g. http://www.archive.org/download/itemid/page/cover_{keyval}_{keyval}.jpg
56 public static $imageUrlKeys = array(
57 //'r' => 'reduce', // pow of 2 reduction
58 's' => 'scale', // $$$ scale is downscaling factor in BookReaderImages but most people call this "reduce"
66 // Paths to command-line tools
67 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
68 var $kduExpand = '/petabox/sw/bin/kdu_expand';
70 // Name of temporary files, to be cleaned at exit
71 var $tempFiles = array();
74 * Serve an image request that requires looking up the book metadata
78 * - Parse the requested page (e.g. cover_t.jpg, n5_r4.jpg) to determine which page type,
79 * size and format (etc) is being requested
80 * - Determine the leaf number corresponding to the page
81 * - Determine scaling values
82 * - Serve image request now that all information has been gathered
85 function serveLookupRequest($requestEnv) {
86 $brm = new BookReaderMeta();
88 $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
89 } catch (Exception $e) {
90 $this->BRfatal($e->getMessage());
93 $page = $_REQUEST['page'];
95 // Index of image to return
98 // deal with subPrefix
99 if ($_REQUEST['subPrefix']) {
100 $parts = explode('/', $_REQUEST['subPrefix']);
101 $bookId = $parts[count($parts) - 1 ];
103 $bookId = $_REQUEST['id'];
106 $pageInfo = $this->parsePageRequest($page, $bookId);
108 $basePage = $pageInfo['type'];
114 if (! array_key_exists('titleIndex', $metadata)) {
115 $this->BRfatal("No title page asserted in book");
117 $imageIndex = $metadata['titleIndex'];
120 /* Old 'cover' behaviour where it would show cover 0 if it exists or return 404.
121 Could be re-added as cover0, cover1, etc
123 if (! array_key_exists('coverIndices', $metadata)) {
124 $this->BRfatal("No cover asserted in book");
126 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
131 case 'cover': // Show our best guess if cover is requested
133 // Cover page if book was published >= 1950
138 if ( array_key_exists('date', $metadata) && array_key_exists('coverIndices', $metadata) ) {
139 if ($brm->parseYear($metadata['date']) >= 1950) {
140 $imageIndex = $metadata['coverIndices'][0];
144 if (array_key_exists('titleIndex', $metadata)) {
145 $imageIndex = $metadata['titleIndex'];
148 if (array_key_exists('coverIndices', $metadata)) {
149 $imageIndex = $metadata['coverIndices'][0];
158 // Accessible index page
159 $imageIndex = intval($pageInfo['value']);
164 $index = array_search($pageInfo['value'], $metadata['pageNums']);
165 if ($index === FALSE) {
167 $this->BRfatal("Page not found");
171 $imageIndex = $index;
175 // Leaf explicitly specified
176 $leaf = $pageInfo['value'];
180 // Shouldn't be possible
181 $this->BRfatal("Unrecognized page type requested");
186 if (is_null($leaf)) {
187 // Leaf was not explicitly set -- look it up
188 $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
192 'zip' => $metadata['zip'],
193 'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
194 'ext' => 'jpg', // XXX should pass through ext
197 // remove non-passthrough keys from pageInfo
198 unset($pageInfo['type']);
199 unset($pageInfo['value']);
201 // add pageinfo to request
202 $requestEnv = array_merge($pageInfo, $requestEnv);
204 // Return image data - will check privs
205 $this->serveRequest($requestEnv);
210 * Returns a page image when all parameters such as the image stack location are
215 * Get info about requested image (input)
216 * Get info about requested output format
217 * Determine processing parameters
220 * Clean up temporary files
222 function serveRequest($requestEnv) {
224 // Process some of the request parameters
225 $zipPath = $requestEnv['zip'];
226 $file = $requestEnv['file'];
228 $ext = $requestEnv['ext'];
233 if (isset($requestEnv['callback'])) {
234 // validate callback is valid JS identifier (only)
235 $callback = $requestEnv['callback'];
236 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
237 if (! preg_match($identifierPatt, $callback)) {
238 $this->BRfatal('Invalid callback');
244 if ( !file_exists($zipPath) ) {
245 $this->BRfatal('Image stack does not exist at ' . $zipPath);
247 // Make sure the image stack is readable - return 403 if not
248 $this->checkPrivs($zipPath);
251 // Get the image size and depth
252 $imageInfo = $this->getImageInfo($zipPath, $file);
254 // Output json if requested
255 if ('json' == $ext) {
256 // $$$ we should determine the output size first based on requested scale
257 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
261 // Unfortunately kakadu requires us to know a priori if the
262 // output file should be .ppm or .pgm. By decompressing to
263 // .bmp kakadu will write a file we can consistently turn into
264 // .pnm. Really kakadu should support .pnm as the file output
265 // extension and automatically write ppm or pgm format as
267 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
268 if ($this->decompressToBmp) {
269 $stdoutLink = '/tmp/stdout.bmp';
271 $stdoutLink = '/tmp/stdout.ppm';
274 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
276 // Rotate is currently only supported for jp2 since it does not add server load
277 $allowedRotations = array("0", "90", "180", "270");
278 $rotate = $requestEnv['rotate'];
279 if ( !in_array($rotate, $allowedRotations) ) {
283 // Image conversion options
285 $jpegOptions = '-quality 75';
287 // The pbmreduce reduction factor produces an image with dimension 1/n
288 // The kakadu reduction factor produces an image with dimension 1/(2^n)
290 // We interpret the requested size and scale, look at image format, and determine the
291 // actual scaling to be returned to the client. We generally return the largest
292 // power of 2 reduction that is larger than the requested size in order to reduce
293 // image processing load on our cluster. The client should then scale to their final
296 // Set scale from height or width if set
297 if (isset($requestEnv['height'])) {
298 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
299 $scale = pow(2, $powReduce);
300 } else if (isset($requestEnv['width'])) {
301 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
302 $scale = pow(2, $powReduce);
305 // Set scale from named size (e.g. 'large') if set
306 $size = $requestEnv['size'];
307 if ( $size && array_key_exists($size, self::$imageSizes)) {
308 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
311 $dimension = 'width';
313 $dimension = 'height';
315 $powReduce = $this->nearestPow2Reduce(self::$imageSizes[$size], $imageInfo[$dimension]);
316 $scale = pow(2, $powReduce);
319 // No named size - use explicit scale, if given
320 $scale = $requestEnv['scale'];
324 $powReduce = $this->nearestPow2ForScale($scale);
325 // ensure integer scale
326 $scale = pow(2, $powReduce);
330 // Override depending on source image format
331 // $$$ consider doing a 302 here instead, to make better use of the browser cache
332 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
333 if (1 == $imageInfo['bits']) {
339 // Hard limit so there are some black pixels to use!
347 if (!file_exists($stdoutLink))
349 system('ln -s /dev/stdout ' . $stdoutLink);
352 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
354 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
356 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
358 // Non-integer scaling is currently disabled on the cluster
359 // if (isset($_REQUEST['height'])) {
360 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
365 $compressCmd = ' | pnmtopng ' . $pngOptions;
371 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
372 $ext = 'jpeg'; // for matching below
377 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
378 // Just pass through original data if same format and size
381 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
386 $filenameForClient = $this->filenameForClient($file, $ext);
388 $headers = array('Content-type: '. self::$MIMES[$ext],
389 'Cache-Control: max-age=15552000',
390 'Content-disposition: inline; filename=' . $filenameForClient);
395 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
396 // $$$ automated reporting
397 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
399 // Try some content-specific recovery
401 if ($imageInfo['type'] == 'jp2') {
402 $records = $this->getJp2Records($zipPath, $file);
403 if (array_key_exists('Clevels', $records)) {
404 $maxReduce = intval($records['Clevels']);
405 trigger_error("BookReader using max reduce $maxReduce from jp2 records");
410 $powReduce = min($powReduce, $maxReduce);
411 $reduce = pow(2, $powReduce);
413 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
414 trigger_error('BookReader rerunning with new cmd: ' . $cmd, E_USER_WARNING);
415 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
419 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
424 $this->BRfatal('Problem processing image - command failed');
431 function getUnarchiveCommand($archivePath, $file)
433 $lowerPath = strtolower($archivePath);
434 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
435 $suffix = $matches[1];
437 if ($suffix == 'zip') {
439 . escapeshellarg($archivePath)
440 . ' ' . escapeshellarg($file);
441 } else if ($suffix == 'tar') {
442 return ' ( 7z e -so '
443 . escapeshellarg($archivePath)
444 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
446 $this->BRfatal('Incompatible archive format');
450 $this->BRfatal('Bad image stack path');
453 $this->BRfatal('Bad image stack path or archive format');
458 * Returns the image type associated with the file extension.
460 function imageExtensionToType($extension)
463 if (array_key_exists($extension, self::$EXTENSIONS)) {
464 return self::$EXTENSIONS[$extension];
466 $this->BRfatal('Unknown image extension');
471 * Get the image information. The returned associative array fields will
472 * vary depending on the image type. The basic keys are width, height, type
475 function getImageInfo($zipPath, $file)
477 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
480 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
481 $type = imageExtensionToType($fileExt);
485 return getImageInfoFromJp2($zipPath, $file);
488 return getImageInfoFromExif($zipPath, $file);
493 // Get the records of of JP2 as returned by kdu_expand
494 function getJp2Records($zipPath, $file)
497 $cmd = $this->getUnarchiveCommand($zipPath, $file)
498 . ' | ' . $this->kduExpand
499 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
503 foreach ($output as $line) {
504 $elems = explode("=", $line, 2);
505 if (1 == count($elems)) {
506 // delimiter not found
509 $records[$elems[0]] = $elems[1];
516 * Get the image width, height and depth using the EXIF information.
518 function getImageInfoFromExif($zipPath, $file)
521 // We look for all the possible tags of interest then act on the
522 // ones presumed present based on the file type
523 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
524 . ' -BitsPerComponent -ColorSpace' // jp2
525 . ' -BitDepth' // png
526 . ' -BitsPerSample'; // tiff
528 $cmd = $this->getUnarchiveCommand($zipPath, $file)
529 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
533 foreach ($output as $line) {
534 $keyValue = explode(": ", $line);
535 $tags[$keyValue[0]] = $keyValue[1];
538 $width = intval($tags["ImageWidth"]);
539 $height = intval($tags["ImageHeight"]);
540 $type = strtolower($tags["FileType"]);
544 $bits = intval($tags["BitsPerComponent"]);
547 $bits = intval($tags["BitsPerSample"]);
553 $bits = intval($tags["BitDepth"]);
556 $this->BRfatal("Unsupported image type $type for file $file in $zipPath");
561 $retval = Array('width' => $width, 'height' => $height,
562 'bits' => $bits, 'type' => $type);
568 * Output JSON given the imageInfo associative array
570 function outputJSON($imageInfo, $callback)
572 header('Content-type: text/plain');
573 $jsonOutput = json_encode($imageInfo);
575 $jsonOutput = $callback . '(' . $jsonOutput . ');';
580 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
582 switch ($imageType) {
585 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
586 if ($this->decompressToBmp) {
587 // We suppress output since bmptopnm always outputs on stderr
588 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
593 // We need to create a temporary file for tifftopnm since it cannot
594 // work on a pipe (the file must be seekable).
595 // We use the BookReaderTiff prefix to give a hint in case things don't
597 $tempFile = tempnam("/tmp", "BookReaderTiff");
598 array_push($this->tempFiles, $tempFile);
600 // $$$ look at bit depth when reducing
602 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
606 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
610 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
614 $this->BRfatal('Unknown image type: ' . $imageType);
617 return $decompressCmd;
620 // If the command has its initial output on stdout the headers will be emitted followed
621 // by the stdout output. If initial output is on stderr an error message will be
625 // true - if command emits stdout and has zero exit code
626 // false - command has initial output on stderr or non-zero exit code
627 // &$errorMessage - error string if there was an error
629 // $$$ Tested with our command-line image processing. May be deadlocks for
631 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
636 $descriptorspec = array(
637 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
638 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
639 2 => array("pipe", "w"), // stderr is a pipe to write to
645 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
647 if (is_resource($process)) {
648 // $pipes now looks like this:
649 // 0 => writeable handle connected to child stdin
650 // 1 => readable handle connected to child stdout
651 // 2 => readable handle connected to child stderr
657 // check whether we get input first on stdout or stderr
658 $read = array($stdout, $stderr);
662 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
663 if (false === $numChanged) {
665 $errorMessage = 'Select failed';
667 error_log('BookReader select failed!');
669 if (in_array($stderr, $read)) {
670 // Either content in stderr, or stderr is closed (could read 0 bytes)
671 $error = stream_get_contents($stderr);
674 $errorMessage = $error;
681 // It is important that you close any pipes before calling
682 // proc_close in order to avoid a deadlock
683 proc_close($process);
689 $output = fopen('php://output', 'w');
690 foreach($headers as $header) {
693 stream_copy_to_stream($pipes[1], $output);
694 fclose($output); // okay since tied to special php://output
703 // It is important that you close any pipes before calling
704 // proc_close in order to avoid a deadlock
705 $cmdRet = proc_close($process);
708 $errorMessage .= "Command failed with result code " . $cmdRet;
714 function BRfatal($string) {
716 throw new Exception("Image error: $string");
719 // Returns true if using a power node
720 // XXX change to "on red box" - not working for new Xeon
721 function onPowerNode() {
722 exec("lspci | fgrep -c Realtek", $output, $return);
723 if ("0" != $output[0]) {
726 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
734 function reduceCommand($scale) {
736 if ($this->onPowerNode()) {
737 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
739 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
746 function checkPrivs($filename) {
747 // $$$ we assume here that requests for the title, cover or preview
748 // come in via BookReaderPreview.php which will be re-run with
749 // privileges after we return the 403
750 if (!is_readable($filename)) {
751 header('HTTP/1.1 403 Forbidden');
756 // Given file path (inside archive) and output file extension, return a filename
757 // suitable for Content-disposition header
758 function filenameForClient($filePath, $ext) {
759 $pathParts = pathinfo($filePath);
760 if ('jpeg' == $ext) {
763 return $pathParts['filename'] . '.' . $ext;
766 // Returns the nearest power of 2 reduction factor that results in a larger image
767 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
768 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
769 return $this->nearestPow2ForScale($ratio);
772 // Returns nearest power of 2 reduction factor that results in a larger image
773 function nearestPow2ForScale($scale) {
774 $scale = intval($scale);
778 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
779 return strlen($binStr) - 1;
783 * Parses a page request like "page5_r2.jpg" or "cover_t.jpg" to corresponding
784 * page type, size, reduce, and format
786 function parsePageRequest($pageRequest, $bookPrefix) {
788 // Will hold parsed results
792 $pageRequest = strtolower($pageRequest);
794 // Pull off extension
795 if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
796 $pageRequest = $matches[1];
797 $extension = $matches[2];
798 if ($extension == 'jpeg') {
804 $pageInfo['extension'] = $extension;
807 $parts = explode('_', $pageRequest);
809 // Remove book prefix if it was included (historical)
810 if ($parts[0] == $bookPrefix) {
814 if (count($parts) === 0) {
815 $this->BRfatal('No page type specified');
817 $page = array_shift($parts);
823 'preview' => 'single',
828 // Look for known page types
829 foreach ( $pageTypes as $pageName => $kind ) {
830 if ( preg_match('#^(' . $pageName . ')(.*)#', $page, $matches) === 1 ) {
831 $pageInfo['type'] = $matches[1];
834 $pageInfo['value'] = $matches[2];
837 $pageInfo['value'] = intval($matches[2]);
845 if ( !array_key_exists('type', $pageInfo) ) {
846 $this->BRfatal('Unrecognized page type');
849 // Look for other known parts
850 foreach ($parts as $part) {
851 if ( array_key_exists($part, self::$imageSizes) ) {
852 $pageInfo['size'] = $part;
856 // Key must be alpha, value must start with digit and contain digits, alpha, ',' or '.'
857 // Should prevent injection of strange values into the redirect to datanode
858 if ( preg_match('#^([a-z]+)(\d[a-z0-9,.]*)#', $part, $matches) === 0) {
864 $value = $matches[2];
866 if ( array_key_exists($key, self::$imageUrlKeys) ) {
867 $pageInfo[self::$imageUrlKeys[$key]] = $value;
871 // If we hit here, was unrecognized (no action)
877 // Clean up temporary files and resources
879 foreach($this->tempFiles as $tempFile) {
882 $this->tempFiles = array();