4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 require_once("BookReaderMeta.inc.php");
29 class BookReaderImages
31 public static $MIMES = array('gif' => 'image/gif',
33 'jpg' => 'image/jpeg',
34 'jpeg' => 'image/jpeg',
36 'tif' => 'image/tiff',
37 'tiff' => 'image/tiff');
39 public static $EXTENSIONS = array('gif' => 'gif',
47 // Width when generating thumbnails
48 public static $imageSizes = array(
55 // Keys in the image permalink urls, e.g. http://www.archive.org/download/itemid/page/cover_{keyval}_{keyval}.jpg
56 public static $imageUrlKeys = array(
66 // Paths to command-line tools
67 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
68 var $kduExpand = '/petabox/sw/bin/kdu_expand';
70 // Name of temporary files, to be cleaned at exit
71 var $tempFiles = array();
74 * Serve an image request that requires looking up the book metadata
78 * - Parse the requested page (e.g. cover_t.jpg, n5_r4.jpg) to determine which page type,
79 * size and format (etc) is being requested
80 * - Determine the leaf number corresponding to the page
81 * - Determine scaling values
82 * - Serve image request now that all information has been gathered
85 function serveLookupRequest($requestEnv) {
86 $brm = new BookReaderMeta();
88 $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
89 } catch (Exception $e) {
90 $this->BRfatal($e->getMessage);
93 $page = $_REQUEST['page'];
95 // Index of image to return
98 // deal with subPrefix
99 if ($_REQUEST['subPrefix']) {
100 $parts = split('/', $_REQUEST['subPrefix']);
101 $bookId = $parts[count($parts) - 1 ];
103 $bookId = $_REQUEST['id'];
106 $pageInfo = $this->parsePageRequest($page, $bookId);
108 $basePage = $pageInfo['type'];
112 if (! array_key_exists('titleIndex', $metadata)) {
113 $this->BRfatal("No title page asserted in book");
115 $imageIndex = $metadata['titleIndex'];
119 if (! array_key_exists('coverIndices', $metadata)) {
120 $this->BRfatal("No cover asserted in book");
122 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
127 // Cover page if book was published >= 1950
132 if ( array_key_exists('date', $metadata) && array_key_exists('coverIndices', $metadata) ) {
133 if ($brm->parseYear($metadata['date']) >= 1950) {
134 $imageIndex = $metadata['coverIndices'][0];
138 if (array_key_exists('titleIndex', $metadata)) {
139 $imageIndex = $metadata['titleIndex'];
142 if (array_key_exists('coverIndices', $metadata)) {
143 $imageIndex = $metadata['coverIndices'][0];
152 // Accessible index page
153 $imageIndex = intval($pageInfo['value']);
158 $index = array_search($pageInfo['value'], $metadata['pageNums']);
159 if ($index === FALSE) {
161 $this->BRfatal("Page not found");
165 $imageIndex = $index;
169 // Shouldn't be possible
170 $this->BRfatal("Unrecognized page type requested");
175 $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
178 'zip' => $metadata['zip'],
179 'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
183 // remove non-passthrough keys from pageInfo
184 unset($pageInfo['type']);
185 unset($pageInfo['value']);
187 // add pageinfo to request
188 $requestEnv = array_merge($pageInfo, $requestEnv);
190 // Return image data - will check privs
191 $this->serveRequest($requestEnv);
196 * Returns a page image when all parameters such as the image stack location are
201 * Get info about requested image (input)
202 * Get info about requested output format
203 * Determine processing parameters
206 * Clean up temporary files
208 function serveRequest($requestEnv) {
209 // Process some of the request parameters
210 $zipPath = $requestEnv['zip'];
211 $file = $requestEnv['file'];
213 $ext = $requestEnv['ext'];
218 if (isset($requestEnv['callback'])) {
219 // validate callback is valid JS identifier (only)
220 $callback = $requestEnv['callback'];
221 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
222 if (! preg_match($identifierPatt, $callback)) {
223 $this->BRfatal('Invalid callback');
229 if ( !file_exists($zipPath) ) {
230 $this->BRfatal('Image stack does not exist at ' . $zipPath);
232 // Make sure the image stack is readable - return 403 if not
233 $this->checkPrivs($zipPath);
236 // Get the image size and depth
237 $imageInfo = $this->getImageInfo($zipPath, $file);
239 // Output json if requested
240 if ('json' == $ext) {
241 // $$$ we should determine the output size first based on requested scale
242 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
246 // Unfortunately kakadu requires us to know a priori if the
247 // output file should be .ppm or .pgm. By decompressing to
248 // .bmp kakadu will write a file we can consistently turn into
249 // .pnm. Really kakadu should support .pnm as the file output
250 // extension and automatically write ppm or pgm format as
252 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
253 if ($this->decompressToBmp) {
254 $stdoutLink = '/tmp/stdout.bmp';
256 $stdoutLink = '/tmp/stdout.ppm';
259 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
261 // Rotate is currently only supported for jp2 since it does not add server load
262 $allowedRotations = array("0", "90", "180", "270");
263 $rotate = $requestEnv['rotate'];
264 if ( !in_array($rotate, $allowedRotations) ) {
268 // Image conversion options
270 $jpegOptions = '-quality 75';
272 // The pbmreduce reduction factor produces an image with dimension 1/n
273 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
275 // Set scale from height or width if set
276 if (isset($requestEnv['height'])) {
277 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
278 $scale = pow(2, $powReduce);
279 } else if (isset($requestEnv['width'])) {
280 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
281 $scale = pow(2, $powReduce);
284 // $$$ could be cleaner
285 // Provide next smaller power of two reduction
287 // Set scale from 'scale' if set
288 $scale = $requestEnv['scale'];
293 // Set scale from named size (e.g. 'large') if set
294 $size = $requestEnv['size'];
295 if ( $size && array_key_exists($size, self::$imageSizes)) {
296 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
299 $dimension = 'width';
301 $dimension = 'height';
303 $powReduce = $this->nearestPow2Reduce(self::$imageSizes[$size], $imageInfo[$dimension]);
305 // No named size - update powReduce from scale
306 $powReduce = $this->nearestPow2ForScale($sale);
309 // Make sure scale matches powReduce
310 $scale = pow(2, $powReduce);
313 // Override depending on source image format
314 // $$$ consider doing a 302 here instead, to make better use of the browser cache
315 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
316 if (1 == $imageInfo['bits']) {
321 // Hard limit so there are some black pixels to use!
329 if (!file_exists($stdoutLink))
331 system('ln -s /dev/stdout ' . $stdoutLink);
334 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
336 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
338 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
340 // Non-integer scaling is currently disabled on the cluster
341 // if (isset($_REQUEST['height'])) {
342 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
347 $compressCmd = ' | pnmtopng ' . $pngOptions;
353 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
354 $ext = 'jpeg'; // for matching below
359 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
360 // Just pass through original data if same format and size
363 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
368 $filenameForClient = $this->filenameForClient($file, $ext);
370 $headers = array('Content-type: '. self::$MIMES[$ext],
371 'Cache-Control: max-age=15552000',
372 'Content-disposition: inline; filename=' . $filenameForClient);
376 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
377 // $$$ automated reporting
378 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
380 // Try some content-specific recovery
382 if ($imageInfo['type'] == 'jp2') {
383 $records = $this->getJp2Records($zipPath, $file);
384 if ($powReduce > intval($records['Clevels'])) {
385 $powReduce = $records['Clevels'];
386 $reduce = pow(2, $powReduce);
392 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
393 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
397 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
402 $this->BRfatal('Problem processing image - command failed');
409 function getUnarchiveCommand($archivePath, $file)
411 $lowerPath = strtolower($archivePath);
412 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
413 $suffix = $matches[1];
415 if ($suffix == 'zip') {
417 . escapeshellarg($archivePath)
418 . ' ' . escapeshellarg($file);
419 } else if ($suffix == 'tar') {
420 return ' ( 7z e -so '
421 . escapeshellarg($archivePath)
422 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
424 $this->BRfatal('Incompatible archive format');
428 $this->BRfatal('Bad image stack path');
431 $this->BRfatal('Bad image stack path or archive format');
436 * Returns the image type associated with the file extension.
438 function imageExtensionToType($extension)
441 if (array_key_exists($extension, self::$EXTENSIONS)) {
442 return self::$EXTENSIONS[$extension];
444 $this->BRfatal('Unknown image extension');
449 * Get the image information. The returned associative array fields will
450 * vary depending on the image type. The basic keys are width, height, type
453 function getImageInfo($zipPath, $file)
455 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
458 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
459 $type = imageExtensionToType($fileExt);
463 return getImageInfoFromJp2($zipPath, $file);
466 return getImageInfoFromExif($zipPath, $file);
471 // Get the records of of JP2 as returned by kdu_expand
472 function getJp2Records($zipPath, $file)
475 $cmd = $this->getUnarchiveCommand($zipPath, $file)
476 . ' | ' . $this->kduExpand
477 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
481 foreach ($output as $line) {
482 $elems = explode("=", $line, 2);
483 if (1 == count($elems)) {
484 // delimiter not found
487 $records[$elems[0]] = $elems[1];
494 * Get the image width, height and depth using the EXIF information.
496 function getImageInfoFromExif($zipPath, $file)
499 // We look for all the possible tags of interest then act on the
500 // ones presumed present based on the file type
501 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
502 . ' -BitsPerComponent -ColorSpace' // jp2
503 . ' -BitDepth' // png
504 . ' -BitsPerSample'; // tiff
506 $cmd = $this->getUnarchiveCommand($zipPath, $file)
507 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
511 foreach ($output as $line) {
512 $keyValue = explode(": ", $line);
513 $tags[$keyValue[0]] = $keyValue[1];
516 $width = intval($tags["ImageWidth"]);
517 $height = intval($tags["ImageHeight"]);
518 $type = strtolower($tags["FileType"]);
522 $bits = intval($tags["BitsPerComponent"]);
525 $bits = intval($tags["BitsPerSample"]);
531 $bits = intval($tags["BitDepth"]);
534 $this->BRfatal("Unsupported image type $type for file $file in $zipPath");
539 $retval = Array('width' => $width, 'height' => $height,
540 'bits' => $bits, 'type' => $type);
546 * Output JSON given the imageInfo associative array
548 function outputJSON($imageInfo, $callback)
550 header('Content-type: text/plain');
551 $jsonOutput = json_encode($imageInfo);
553 $jsonOutput = $callback . '(' . $jsonOutput . ');';
558 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
560 switch ($imageType) {
563 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
564 if ($this->decompressToBmp) {
565 // We suppress output since bmptopnm always outputs on stderr
566 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
571 // We need to create a temporary file for tifftopnm since it cannot
572 // work on a pipe (the file must be seekable).
573 // We use the BookReaderTiff prefix to give a hint in case things don't
575 $tempFile = tempnam("/tmp", "BookReaderTiff");
576 array_push($this->tempFiles, $tempFile);
578 // $$$ look at bit depth when reducing
580 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
584 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
588 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
592 $this->BRfatal('Unknown image type: ' . $imageType);
595 return $decompressCmd;
598 // If the command has its initial output on stdout the headers will be emitted followed
599 // by the stdout output. If initial output is on stderr an error message will be
603 // true - if command emits stdout and has zero exit code
604 // false - command has initial output on stderr or non-zero exit code
605 // &$errorMessage - error string if there was an error
607 // $$$ Tested with our command-line image processing. May be deadlocks for
609 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
614 $descriptorspec = array(
615 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
616 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
617 2 => array("pipe", "w"), // stderr is a pipe to write to
623 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
625 if (is_resource($process)) {
626 // $pipes now looks like this:
627 // 0 => writeable handle connected to child stdin
628 // 1 => readable handle connected to child stdout
629 // 2 => readable handle connected to child stderr
635 // check whether we get input first on stdout or stderr
636 $read = array($stdout, $stderr);
639 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
640 if (false === $numChanged) {
642 $errorMessage = 'Select failed';
645 if ($read[0] == $stdout && (1 == $numChanged)) {
646 // Got output first on stdout (only)
647 // $$$ make sure we get all stdout
648 $output = fopen('php://output', 'w');
649 foreach($headers as $header) {
652 stream_copy_to_stream($pipes[1], $output);
653 fclose($output); // okay since tied to special php://output
656 // Got output on stderr
657 // $$$ make sure we get all stderr
658 $errorMessage = stream_get_contents($stderr);
667 // It is important that you close any pipes before calling
668 // proc_close in order to avoid a deadlock
669 $cmdRet = proc_close($process);
672 $errorMessage .= "Command failed with result code " . $cmdRet;
678 function BRfatal($string) {
680 throw new Exception("Image error: $string");
683 // Returns true if using a power node
684 function onPowerNode() {
685 exec("lspci | fgrep -c Realtek", $output, $return);
686 if ("0" != $output[0]) {
689 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
697 function reduceCommand($scale) {
699 if ($this->onPowerNode()) {
700 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
702 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
709 function checkPrivs($filename) {
710 if (!is_readable($filename)) {
711 header('HTTP/1.1 403 Forbidden');
716 // Given file path (inside archive) and output file extension, return a filename
717 // suitable for Content-disposition header
718 function filenameForClient($filePath, $ext) {
719 $pathParts = pathinfo($filePath);
720 if ('jpeg' == $ext) {
723 return $pathParts['filename'] . '.' . $ext;
726 // Returns the nearest power of 2 reduction factor that results in a larger image
727 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
728 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
729 return $this->nearestPow2ForScale($ratio);
732 // Returns nearest power of 2 reduction factor that results in a larger image
733 function nearestPow2ForScale($scale) {
734 $scale = intval($scale);
738 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
739 return strlen($binStr) - 1;
743 * Parses a page request like "page5_r2.jpg" or "cover_t.jpg" to corresponding
744 * page type, size, reduce, and format
746 function parsePageRequest($pageRequest, $bookPrefix) {
748 // Will hold parsed results
752 $pageRequest = strtolower($pageRequest);
754 // Pull off extension
755 if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
756 $pageRequest = $matches[1];
757 $extension = $matches[2];
758 if ($extension == 'jpeg') {
764 $pageInfo['extension'] = $extension;
767 $parts = explode('_', $pageRequest);
769 // Remove book prefix if it was included (historical)
770 if ($parts[0] == $bookPrefix) {
774 if (count($parts) === 0) {
775 $this->BRfatal('No page type specified');
777 $page = array_shift($parts);
783 'preview' => 'single',
787 // Look for known page types
788 foreach ( $pageTypes as $pageName => $kind ) {
789 if ( preg_match('#^(' . $pageName . ')(.*)#', $page, $matches) === 1 ) {
790 $pageInfo['type'] = $matches[1];
793 $pageInfo['value'] = $matches[2];
796 $pageInfo['value'] = intval($matches[2]);
804 if ( !array_key_exists('type', $pageInfo) ) {
805 $this->BRfatal('Unrecognized page type');
808 // Look for other known parts
809 foreach ($parts as $part) {
810 if ( array_key_exists($part, self::$imageSizes) ) {
811 $pageInfo['size'] = $part;
815 // Key must be alpha, value must start with digit and contain digits, alpha, ',' or '.'
816 // Should prevent injection of strange values into the redirect to datanode
817 if ( preg_match('#^([a-z]+)(\d[a-z0-9,.]*)#', $part, $matches) === 0) {
823 $value = $matches[2];
825 if ( array_key_exists($key, self::$imageUrlKeys) ) {
826 $pageInfo[self::$imageUrlKeys[$key]] = $value;
830 // If we hit here, was unrecognized (no action)
836 // Clean up temporary files and resources
838 foreach($this->tempFiles as $tempFile) {
841 $this->tempFiles = array();