4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 require_once("BookReaderMeta.inc.php");
29 class BookReaderImages
31 public static $MIMES = array('gif' => 'image/gif',
33 'jpg' => 'image/jpeg',
34 'jpeg' => 'image/jpeg',
36 'tif' => 'image/tiff',
37 'tiff' => 'image/tiff');
39 public static $EXTENSIONS = array('gif' => 'gif',
47 // Width when generating thumbnails
48 public static $imageSizes = array(
55 // Keys in the image permalink urls, e.g. http://www.archive.org/download/itemid/page/cover_{keyval}_{keyval}.jpg
56 public static $imageUrlKeys = array(
57 //'r' => 'reduce', // pow of 2 reduction
58 's' => 'scale', // $$$ scale is downscaling factor in BookReaderImages but most people call this "reduce"
66 // Paths to command-line tools
67 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
68 var $kduExpand = '/petabox/sw/bin/kdu_expand';
70 // Name of temporary files, to be cleaned at exit
71 var $tempFiles = array();
74 * Serve an image request that requires looking up the book metadata
78 * - Parse the requested page (e.g. cover_t.jpg, n5_r4.jpg) to determine which page type,
79 * size and format (etc) is being requested
80 * - Determine the leaf number corresponding to the page
81 * - Determine scaling values
82 * - Serve image request now that all information has been gathered
85 function serveLookupRequest($requestEnv) {
86 $brm = new BookReaderMeta();
88 $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
89 } catch (Exception $e) {
90 $this->BRfatal($e->getMessage);
93 $page = $_REQUEST['page'];
95 // Index of image to return
98 // deal with subPrefix
99 if ($_REQUEST['subPrefix']) {
100 $parts = split('/', $_REQUEST['subPrefix']);
101 $bookId = $parts[count($parts) - 1 ];
103 $bookId = $_REQUEST['id'];
106 $pageInfo = $this->parsePageRequest($page, $bookId);
108 $basePage = $pageInfo['type'];
113 if (! array_key_exists('titleIndex', $metadata)) {
114 $this->BRfatal("No title page asserted in book");
116 $imageIndex = $metadata['titleIndex'];
119 /* Old 'cover' behaviour where it would show cover 0 if it exists or return 404.
120 Could be re-added as cover0, cover1, etc
122 if (! array_key_exists('coverIndices', $metadata)) {
123 $this->BRfatal("No cover asserted in book");
125 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
130 case 'cover': // Show our best guess if cover is requested
132 // Cover page if book was published >= 1950
137 if ( array_key_exists('date', $metadata) && array_key_exists('coverIndices', $metadata) ) {
138 if ($brm->parseYear($metadata['date']) >= 1950) {
139 $imageIndex = $metadata['coverIndices'][0];
143 if (array_key_exists('titleIndex', $metadata)) {
144 $imageIndex = $metadata['titleIndex'];
147 if (array_key_exists('coverIndices', $metadata)) {
148 $imageIndex = $metadata['coverIndices'][0];
157 // Accessible index page
158 $imageIndex = intval($pageInfo['value']);
163 $index = array_search($pageInfo['value'], $metadata['pageNums']);
164 if ($index === FALSE) {
166 $this->BRfatal("Page not found");
170 $imageIndex = $index;
174 // Leaf explicitly specified
175 $leaf = $pageInfo['value'];
179 // Shouldn't be possible
180 $this->BRfatal("Unrecognized page type requested");
185 if (is_null($leaf)) {
186 // Leaf was not explicitly set -- look it up
187 $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
191 'zip' => $metadata['zip'],
192 'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
196 // remove non-passthrough keys from pageInfo
197 unset($pageInfo['type']);
198 unset($pageInfo['value']);
200 // add pageinfo to request
201 $requestEnv = array_merge($pageInfo, $requestEnv);
203 // Return image data - will check privs
204 $this->serveRequest($requestEnv);
209 * Returns a page image when all parameters such as the image stack location are
214 * Get info about requested image (input)
215 * Get info about requested output format
216 * Determine processing parameters
219 * Clean up temporary files
221 function serveRequest($requestEnv) {
222 // Process some of the request parameters
223 $zipPath = $requestEnv['zip'];
224 $file = $requestEnv['file'];
226 $ext = $requestEnv['ext'];
231 if (isset($requestEnv['callback'])) {
232 // validate callback is valid JS identifier (only)
233 $callback = $requestEnv['callback'];
234 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
235 if (! preg_match($identifierPatt, $callback)) {
236 $this->BRfatal('Invalid callback');
242 if ( !file_exists($zipPath) ) {
243 $this->BRfatal('Image stack does not exist at ' . $zipPath);
245 // Make sure the image stack is readable - return 403 if not
246 $this->checkPrivs($zipPath);
249 // Get the image size and depth
250 $imageInfo = $this->getImageInfo($zipPath, $file);
252 // Output json if requested
253 if ('json' == $ext) {
254 // $$$ we should determine the output size first based on requested scale
255 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
259 // Unfortunately kakadu requires us to know a priori if the
260 // output file should be .ppm or .pgm. By decompressing to
261 // .bmp kakadu will write a file we can consistently turn into
262 // .pnm. Really kakadu should support .pnm as the file output
263 // extension and automatically write ppm or pgm format as
265 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
266 if ($this->decompressToBmp) {
267 $stdoutLink = '/tmp/stdout.bmp';
269 $stdoutLink = '/tmp/stdout.ppm';
272 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
274 // Rotate is currently only supported for jp2 since it does not add server load
275 $allowedRotations = array("0", "90", "180", "270");
276 $rotate = $requestEnv['rotate'];
277 if ( !in_array($rotate, $allowedRotations) ) {
281 // Image conversion options
283 $jpegOptions = '-quality 75';
285 // The pbmreduce reduction factor produces an image with dimension 1/n
286 // The kakadu reduction factor produces an image with dimension 1/(2^n)
288 // We interpret the requested size and scale, look at image format, and determine the
289 // actual scaling to be returned to the client. We generally return the largest
290 // power of 2 reduction that is larger than the requested size in order to reduce
291 // image processing load on our cluster. The client should then scale to their final
294 // Set scale from height or width if set
295 if (isset($requestEnv['height'])) {
296 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
297 $scale = pow(2, $powReduce);
298 } else if (isset($requestEnv['width'])) {
299 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
300 $scale = pow(2, $powReduce);
303 // Set scale from named size (e.g. 'large') if set
304 $size = $requestEnv['size'];
305 if ( $size && array_key_exists($size, self::$imageSizes)) {
306 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
309 $dimension = 'width';
311 $dimension = 'height';
313 $powReduce = $this->nearestPow2Reduce(self::$imageSizes[$size], $imageInfo[$dimension]);
314 $scale = pow(2, $powReduce);
317 // No named size - use explicit scale, if given
318 $scale = $requestEnv['scale'];
322 $powReduce = $this->nearestPow2ForScale($scale);
323 // ensure integer scale
324 $scale = pow(2, $powReduce);
328 // Override depending on source image format
329 // $$$ consider doing a 302 here instead, to make better use of the browser cache
330 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
331 if (1 == $imageInfo['bits']) {
336 // Hard limit so there are some black pixels to use!
344 if (!file_exists($stdoutLink))
346 system('ln -s /dev/stdout ' . $stdoutLink);
349 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
351 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
353 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
355 // Non-integer scaling is currently disabled on the cluster
356 // if (isset($_REQUEST['height'])) {
357 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
362 $compressCmd = ' | pnmtopng ' . $pngOptions;
368 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
369 $ext = 'jpeg'; // for matching below
374 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
375 // Just pass through original data if same format and size
378 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
383 $filenameForClient = $this->filenameForClient($file, $ext);
385 $headers = array('Content-type: '. self::$MIMES[$ext],
386 'Cache-Control: max-age=15552000',
387 'Content-disposition: inline; filename=' . $filenameForClient);
391 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
392 // $$$ automated reporting
393 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
395 // Try some content-specific recovery
397 if ($imageInfo['type'] == 'jp2') {
398 $records = $this->getJp2Records($zipPath, $file);
399 if ($powReduce > intval($records['Clevels'])) {
400 $powReduce = $records['Clevels'];
401 $reduce = pow(2, $powReduce);
407 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
408 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
412 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
417 $this->BRfatal('Problem processing image - command failed');
424 function getUnarchiveCommand($archivePath, $file)
426 $lowerPath = strtolower($archivePath);
427 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
428 $suffix = $matches[1];
430 if ($suffix == 'zip') {
432 . escapeshellarg($archivePath)
433 . ' ' . escapeshellarg($file);
434 } else if ($suffix == 'tar') {
435 return ' ( 7z e -so '
436 . escapeshellarg($archivePath)
437 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
439 $this->BRfatal('Incompatible archive format');
443 $this->BRfatal('Bad image stack path');
446 $this->BRfatal('Bad image stack path or archive format');
451 * Returns the image type associated with the file extension.
453 function imageExtensionToType($extension)
456 if (array_key_exists($extension, self::$EXTENSIONS)) {
457 return self::$EXTENSIONS[$extension];
459 $this->BRfatal('Unknown image extension');
464 * Get the image information. The returned associative array fields will
465 * vary depending on the image type. The basic keys are width, height, type
468 function getImageInfo($zipPath, $file)
470 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
473 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
474 $type = imageExtensionToType($fileExt);
478 return getImageInfoFromJp2($zipPath, $file);
481 return getImageInfoFromExif($zipPath, $file);
486 // Get the records of of JP2 as returned by kdu_expand
487 function getJp2Records($zipPath, $file)
490 $cmd = $this->getUnarchiveCommand($zipPath, $file)
491 . ' | ' . $this->kduExpand
492 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
496 foreach ($output as $line) {
497 $elems = explode("=", $line, 2);
498 if (1 == count($elems)) {
499 // delimiter not found
502 $records[$elems[0]] = $elems[1];
509 * Get the image width, height and depth using the EXIF information.
511 function getImageInfoFromExif($zipPath, $file)
514 // We look for all the possible tags of interest then act on the
515 // ones presumed present based on the file type
516 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
517 . ' -BitsPerComponent -ColorSpace' // jp2
518 . ' -BitDepth' // png
519 . ' -BitsPerSample'; // tiff
521 $cmd = $this->getUnarchiveCommand($zipPath, $file)
522 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
526 foreach ($output as $line) {
527 $keyValue = explode(": ", $line);
528 $tags[$keyValue[0]] = $keyValue[1];
531 $width = intval($tags["ImageWidth"]);
532 $height = intval($tags["ImageHeight"]);
533 $type = strtolower($tags["FileType"]);
537 $bits = intval($tags["BitsPerComponent"]);
540 $bits = intval($tags["BitsPerSample"]);
546 $bits = intval($tags["BitDepth"]);
549 $this->BRfatal("Unsupported image type $type for file $file in $zipPath");
554 $retval = Array('width' => $width, 'height' => $height,
555 'bits' => $bits, 'type' => $type);
561 * Output JSON given the imageInfo associative array
563 function outputJSON($imageInfo, $callback)
565 header('Content-type: text/plain');
566 $jsonOutput = json_encode($imageInfo);
568 $jsonOutput = $callback . '(' . $jsonOutput . ');';
573 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
575 switch ($imageType) {
578 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
579 if ($this->decompressToBmp) {
580 // We suppress output since bmptopnm always outputs on stderr
581 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
586 // We need to create a temporary file for tifftopnm since it cannot
587 // work on a pipe (the file must be seekable).
588 // We use the BookReaderTiff prefix to give a hint in case things don't
590 $tempFile = tempnam("/tmp", "BookReaderTiff");
591 array_push($this->tempFiles, $tempFile);
593 // $$$ look at bit depth when reducing
595 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
599 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
603 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
607 $this->BRfatal('Unknown image type: ' . $imageType);
610 return $decompressCmd;
613 // If the command has its initial output on stdout the headers will be emitted followed
614 // by the stdout output. If initial output is on stderr an error message will be
618 // true - if command emits stdout and has zero exit code
619 // false - command has initial output on stderr or non-zero exit code
620 // &$errorMessage - error string if there was an error
622 // $$$ Tested with our command-line image processing. May be deadlocks for
624 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
629 $descriptorspec = array(
630 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
631 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
632 2 => array("pipe", "w"), // stderr is a pipe to write to
638 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
640 if (is_resource($process)) {
641 // $pipes now looks like this:
642 // 0 => writeable handle connected to child stdin
643 // 1 => readable handle connected to child stdout
644 // 2 => readable handle connected to child stderr
650 // check whether we get input first on stdout or stderr
651 $read = array($stdout, $stderr);
654 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
655 if (false === $numChanged) {
657 $errorMessage = 'Select failed';
660 if ($read[0] == $stdout && (1 == $numChanged)) {
661 // Got output first on stdout (only)
662 // $$$ make sure we get all stdout
663 $output = fopen('php://output', 'w');
664 foreach($headers as $header) {
667 stream_copy_to_stream($pipes[1], $output);
668 fclose($output); // okay since tied to special php://output
671 // Got output on stderr
672 // $$$ make sure we get all stderr
673 $errorMessage = stream_get_contents($stderr);
682 // It is important that you close any pipes before calling
683 // proc_close in order to avoid a deadlock
684 $cmdRet = proc_close($process);
687 $errorMessage .= "Command failed with result code " . $cmdRet;
693 function BRfatal($string) {
695 throw new Exception("Image error: $string");
698 // Returns true if using a power node
699 function onPowerNode() {
700 exec("lspci | fgrep -c Realtek", $output, $return);
701 if ("0" != $output[0]) {
704 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
712 function reduceCommand($scale) {
714 if ($this->onPowerNode()) {
715 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
717 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
724 function checkPrivs($filename) {
725 if (!is_readable($filename)) {
726 header('HTTP/1.1 403 Forbidden');
731 // Given file path (inside archive) and output file extension, return a filename
732 // suitable for Content-disposition header
733 function filenameForClient($filePath, $ext) {
734 $pathParts = pathinfo($filePath);
735 if ('jpeg' == $ext) {
738 return $pathParts['filename'] . '.' . $ext;
741 // Returns the nearest power of 2 reduction factor that results in a larger image
742 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
743 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
744 return $this->nearestPow2ForScale($ratio);
747 // Returns nearest power of 2 reduction factor that results in a larger image
748 function nearestPow2ForScale($scale) {
749 $scale = intval($scale);
753 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
754 return strlen($binStr) - 1;
758 * Parses a page request like "page5_r2.jpg" or "cover_t.jpg" to corresponding
759 * page type, size, reduce, and format
761 function parsePageRequest($pageRequest, $bookPrefix) {
763 // Will hold parsed results
767 $pageRequest = strtolower($pageRequest);
769 // Pull off extension
770 if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
771 $pageRequest = $matches[1];
772 $extension = $matches[2];
773 if ($extension == 'jpeg') {
779 $pageInfo['extension'] = $extension;
782 $parts = explode('_', $pageRequest);
784 // Remove book prefix if it was included (historical)
785 if ($parts[0] == $bookPrefix) {
789 if (count($parts) === 0) {
790 $this->BRfatal('No page type specified');
792 $page = array_shift($parts);
798 'preview' => 'single',
803 // Look for known page types
804 foreach ( $pageTypes as $pageName => $kind ) {
805 if ( preg_match('#^(' . $pageName . ')(.*)#', $page, $matches) === 1 ) {
806 $pageInfo['type'] = $matches[1];
809 $pageInfo['value'] = $matches[2];
812 $pageInfo['value'] = intval($matches[2]);
820 if ( !array_key_exists('type', $pageInfo) ) {
821 $this->BRfatal('Unrecognized page type');
824 // Look for other known parts
825 foreach ($parts as $part) {
826 if ( array_key_exists($part, self::$imageSizes) ) {
827 $pageInfo['size'] = $part;
831 // Key must be alpha, value must start with digit and contain digits, alpha, ',' or '.'
832 // Should prevent injection of strange values into the redirect to datanode
833 if ( preg_match('#^([a-z]+)(\d[a-z0-9,.]*)#', $part, $matches) === 0) {
839 $value = $matches[2];
841 if ( array_key_exists($key, self::$imageUrlKeys) ) {
842 $pageInfo[self::$imageUrlKeys[$key]] = $value;
846 // If we hit here, was unrecognized (no action)
852 // Clean up temporary files and resources
854 foreach($this->tempFiles as $tempFile) {
857 $this->tempFiles = array();