4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 require_once("BookReaderMeta.inc.php");
29 class BookReaderImages
31 public static $MIMES = array('gif' => 'image/gif',
33 'jpg' => 'image/jpeg',
34 'jpeg' => 'image/jpeg',
36 'tif' => 'image/tiff',
37 'tiff' => 'image/tiff');
39 public static $EXTENSIONS = array('gif' => 'gif',
47 // Width when generating thumbnails
48 public static $imageSizes = array(
55 // Keys in the image permalink urls, e.g. http://www.archive.org/download/itemid/page/cover_{keyval}_{keyval}.jpg
56 public static $imageUrlKeys = array(
66 // Paths to command-line tools
67 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
68 var $kduExpand = '/petabox/sw/bin/kdu_expand';
71 * Serve an image request that requires looking up the book metadata
75 * - Parse the requested page (e.g. cover_t.jpg, n5_r4.jpg) to determine which page type,
76 * size and format (etc) is being requested
77 * - Determine the leaf number corresponding to the page
78 * - Determine scaling values
79 * - Serve image request now that all information has been gathered
82 function serveLookupRequest($requestEnv) {
83 $brm = new BookReaderMeta();
85 $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
86 } catch (Exception $e) {
87 $this->BRfatal($e->getMessage);
90 $page = $_REQUEST['page'];
92 // Index of image to return
95 // deal with subPrefix
96 if ($_REQUEST['subPrefix']) {
97 $parts = split('/', $_REQUEST['subPrefix']);
98 $bookId = $parts[count($parts) - 1 ];
100 $bookId = $_REQUEST['id'];
103 $pageInfo = $this->parsePageRequest($page, $bookId);
105 $basePage = $pageInfo['type'];
109 if (! array_key_exists('titleIndex', $metadata)) {
110 $this->BRfatal("No title page asserted in book");
112 $imageIndex = $metadata['titleIndex'];
116 if (! array_key_exists('coverIndices', $metadata)) {
117 $this->BRfatal("No cover asserted in book");
119 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
124 // Cover page if book was published >= 1950
129 if ( array_key_exists('date', $metadata) && array_key_exists('coverIndices', $metadata) ) {
130 if ($brm->parseYear($metadata['date']) >= 1950) {
131 $imageIndex = $metadata['coverIndices'][0];
135 if (array_key_exists('titleIndex', $metadata)) {
136 $imageIndex = $metadata['titleIndex'];
139 if (array_key_exists('coverIndices', $metadata)) {
140 $imageIndex = $metadata['coverIndices'][0];
149 // Accessible index page
150 $imageIndex = intval($pageInfo['value']);
155 $index = array_search($pageInfo['value'], $metadata['pageNums']);
156 if ($index === FALSE) {
158 $this->BRfatal("Page not found");
162 $imageIndex = $index;
166 // Shouldn't be possible
167 $this->BRfatal("Unrecognized page type requested");
172 $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
175 'zip' => $metadata['zip'],
176 'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
180 // remove non-passthrough keys from pageInfo
181 unset($pageInfo['type']);
182 unset($pageInfo['value']);
184 // add pageinfo to request
185 $requestEnv = array_merge($pageInfo, $requestEnv);
187 // Return image data - will check privs
188 $this->serveRequest($requestEnv);
193 * Returns a page image when all parameters such as the image stack location are
198 * Get info about requested image (input)
199 * Get info about requested output format
200 * Determine processing parameters
203 * Clean up temporary files
205 function serveRequest($requestEnv) {
206 // Process some of the request parameters
207 $zipPath = $requestEnv['zip'];
208 $file = $requestEnv['file'];
210 $ext = $requestEnv['ext'];
215 if (isset($requestEnv['callback'])) {
216 // validate callback is valid JS identifier (only)
217 $callback = $requestEnv['callback'];
218 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
219 if (! preg_match($identifierPatt, $callback)) {
220 $this->BRfatal('Invalid callback');
226 if ( !file_exists($zipPath) ) {
227 $this->BRfatal('Image stack does not exist at ' . $zipPath);
229 // Make sure the image stack is readable - return 403 if not
230 $this->checkPrivs($zipPath);
233 // Get the image size and depth
234 $imageInfo = $this->getImageInfo($zipPath, $file);
236 // Output json if requested
237 if ('json' == $ext) {
238 // $$$ we should determine the output size first based on requested scale
239 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
243 // Unfortunately kakadu requires us to know a priori if the
244 // output file should be .ppm or .pgm. By decompressing to
245 // .bmp kakadu will write a file we can consistently turn into
246 // .pnm. Really kakadu should support .pnm as the file output
247 // extension and automatically write ppm or pgm format as
249 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
250 if ($this->decompressToBmp) {
251 $stdoutLink = '/tmp/stdout.bmp';
253 $stdoutLink = '/tmp/stdout.ppm';
256 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
258 // Rotate is currently only supported for jp2 since it does not add server load
259 $allowedRotations = array("0", "90", "180", "270");
260 $rotate = $requestEnv['rotate'];
261 if ( !in_array($rotate, $allowedRotations) ) {
265 // Image conversion options
267 $jpegOptions = '-quality 75';
269 // The pbmreduce reduction factor produces an image with dimension 1/n
270 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
272 // Set scale from height or width if set
273 if (isset($requestEnv['height'])) {
274 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
275 $scale = pow(2, $powReduce);
276 } else if (isset($requestEnv['width'])) {
277 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
278 $scale = pow(2, $powReduce);
281 // $$$ could be cleaner
282 // Provide next smaller power of two reduction
284 // Set scale from 'scale' if set
285 $scale = $requestEnv['scale'];
290 // Set scale from named size (e.g. 'large') if set
291 $size = $requestEnv['size'];
292 if ( $size && array_key_exists($size, self::$imageSizes)) {
293 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
296 $dimension = 'width';
298 $dimension = 'height';
300 $powReduce = $this->nearestPow2Reduce(self::$imageSizes[$size], $imageInfo[$dimension]);
302 // No named size - update powReduce from scale
303 $powReduce = $this->nearestPow2ForScale($sale);
306 // Make sure scale matches powReduce
307 $scale = pow(2, $powReduce);
310 // Override depending on source image format
311 // $$$ consider doing a 302 here instead, to make better use of the browser cache
312 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
313 if (1 == $imageInfo['bits']) {
318 // Hard limit so there are some black pixels to use!
326 if (!file_exists($stdoutLink))
328 system('ln -s /dev/stdout ' . $stdoutLink);
331 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
333 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
335 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
337 // Non-integer scaling is currently disabled on the cluster
338 // if (isset($_REQUEST['height'])) {
339 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
344 $compressCmd = ' | pnmtopng ' . $pngOptions;
350 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
351 $ext = 'jpeg'; // for matching below
356 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
357 // Just pass through original data if same format and size
360 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
365 $filenameForClient = $this->filenameForClient($file, $ext);
367 $headers = array('Content-type: '. self::$MIMES[$ext],
368 'Cache-Control: max-age=15552000',
369 'Content-disposition: inline; filename=' . $filenameForClient);
373 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
374 // $$$ automated reporting
375 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
377 // Try some content-specific recovery
379 if ($imageInfo['type'] == 'jp2') {
380 $records = $this->getJp2Records($zipPath, $file);
381 if ($powReduce > intval($records['Clevels'])) {
382 $powReduce = $records['Clevels'];
383 $reduce = pow(2, $powReduce);
389 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
390 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
393 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
398 $this->BRfatal('Problem processing image - command failed');
402 if (isset($tempFile)) {
407 function getUnarchiveCommand($archivePath, $file)
409 $lowerPath = strtolower($archivePath);
410 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
411 $suffix = $matches[1];
413 if ($suffix == 'zip') {
415 . escapeshellarg($archivePath)
416 . ' ' . escapeshellarg($file);
417 } else if ($suffix == 'tar') {
418 return ' ( 7z e -so '
419 . escapeshellarg($archivePath)
420 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
422 $this->BRfatal('Incompatible archive format');
426 $this->BRfatal('Bad image stack path');
429 $this->BRfatal('Bad image stack path or archive format');
434 * Returns the image type associated with the file extension.
436 function imageExtensionToType($extension)
439 if (array_key_exists($extension, self::$EXTENSIONS)) {
440 return self::$EXTENSIONS[$extension];
442 $this->BRfatal('Unknown image extension');
447 * Get the image information. The returned associative array fields will
448 * vary depending on the image type. The basic keys are width, height, type
451 function getImageInfo($zipPath, $file)
453 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
456 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
457 $type = imageExtensionToType($fileExt);
461 return getImageInfoFromJp2($zipPath, $file);
464 return getImageInfoFromExif($zipPath, $file);
469 // Get the records of of JP2 as returned by kdu_expand
470 function getJp2Records($zipPath, $file)
473 $cmd = $this->getUnarchiveCommand($zipPath, $file)
474 . ' | ' . $this->kduExpand
475 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
479 foreach ($output as $line) {
480 $elems = explode("=", $line, 2);
481 if (1 == count($elems)) {
482 // delimiter not found
485 $records[$elems[0]] = $elems[1];
492 * Get the image width, height and depth using the EXIF information.
494 function getImageInfoFromExif($zipPath, $file)
497 // We look for all the possible tags of interest then act on the
498 // ones presumed present based on the file type
499 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
500 . ' -BitsPerComponent -ColorSpace' // jp2
501 . ' -BitDepth' // png
502 . ' -BitsPerSample'; // tiff
504 $cmd = $this->getUnarchiveCommand($zipPath, $file)
505 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
509 foreach ($output as $line) {
510 $keyValue = explode(": ", $line);
511 $tags[$keyValue[0]] = $keyValue[1];
514 $width = intval($tags["ImageWidth"]);
515 $height = intval($tags["ImageHeight"]);
516 $type = strtolower($tags["FileType"]);
520 $bits = intval($tags["BitsPerComponent"]);
523 $bits = intval($tags["BitsPerSample"]);
529 $bits = intval($tags["BitDepth"]);
532 $this->BRfatal("Unsupported image type $type for file $file in $zipPath");
537 $retval = Array('width' => $width, 'height' => $height,
538 'bits' => $bits, 'type' => $type);
544 * Output JSON given the imageInfo associative array
546 function outputJSON($imageInfo, $callback)
548 header('Content-type: text/plain');
549 $jsonOutput = json_encode($imageInfo);
551 $jsonOutput = $callback . '(' . $jsonOutput . ');';
556 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
558 switch ($imageType) {
561 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
562 if ($this->decompressToBmp) {
563 // We suppress output since bmptopnm always outputs on stderr
564 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
569 // We need to create a temporary file for tifftopnm since it cannot
570 // work on a pipe (the file must be seekable).
571 // We use the BookReaderTiff prefix to give a hint in case things don't
573 $tempFile = tempnam("/tmp", "BookReaderTiff");
575 // $$$ look at bit depth when reducing
577 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
581 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
585 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
589 $this->BRfatal('Unknown image type: ' . $imageType);
592 return $decompressCmd;
595 // If the command has its initial output on stdout the headers will be emitted followed
596 // by the stdout output. If initial output is on stderr an error message will be
600 // true - if command emits stdout and has zero exit code
601 // false - command has initial output on stderr or non-zero exit code
602 // &$errorMessage - error string if there was an error
604 // $$$ Tested with our command-line image processing. May be deadlocks for
606 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
611 $descriptorspec = array(
612 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
613 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
614 2 => array("pipe", "w"), // stderr is a pipe to write to
620 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
622 if (is_resource($process)) {
623 // $pipes now looks like this:
624 // 0 => writeable handle connected to child stdin
625 // 1 => readable handle connected to child stdout
626 // 2 => readable handle connected to child stderr
632 // check whether we get input first on stdout or stderr
633 $read = array($stdout, $stderr);
636 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
637 if (false === $numChanged) {
639 $errorMessage = 'Select failed';
642 if ($read[0] == $stdout && (1 == $numChanged)) {
643 // Got output first on stdout (only)
644 // $$$ make sure we get all stdout
645 $output = fopen('php://output', 'w');
646 foreach($headers as $header) {
649 stream_copy_to_stream($pipes[1], $output);
650 fclose($output); // okay since tied to special php://output
653 // Got output on stderr
654 // $$$ make sure we get all stderr
655 $errorMessage = stream_get_contents($stderr);
664 // It is important that you close any pipes before calling
665 // proc_close in order to avoid a deadlock
666 $cmdRet = proc_close($process);
669 $errorMessage .= "Command failed with result code " . $cmdRet;
675 function BRfatal($string) {
676 throw new Exception("Image error: $string");
679 // Returns true if using a power node
680 function onPowerNode() {
681 exec("lspci | fgrep -c Realtek", $output, $return);
682 if ("0" != $output[0]) {
685 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
693 function reduceCommand($scale) {
695 if ($this->onPowerNode()) {
696 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
698 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
705 function checkPrivs($filename) {
706 if (!is_readable($filename)) {
707 header('HTTP/1.1 403 Forbidden');
712 // Given file path (inside archive) and output file extension, return a filename
713 // suitable for Content-disposition header
714 function filenameForClient($filePath, $ext) {
715 $pathParts = pathinfo($filePath);
716 if ('jpeg' == $ext) {
719 return $pathParts['filename'] . '.' . $ext;
722 // Returns the nearest power of 2 reduction factor that results in a larger image
723 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
724 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
725 return $this->nearestPow2ForScale($ratio);
728 // Returns nearest power of 2 reduction factor that results in a larger image
729 function nearestPow2ForScale($scale) {
730 $scale = intval($scale);
734 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
735 return strlen($binStr) - 1;
739 * Parses a page request like "page5_r2.jpg" or "cover_t.jpg" to corresponding
740 * page type, size, reduce, and format
742 function parsePageRequest($pageRequest, $bookPrefix) {
744 // Will hold parsed results
748 $pageRequest = strtolower($pageRequest);
750 // Pull off extension
751 if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
752 $pageRequest = $matches[1];
753 $extension = $matches[2];
754 if ($extension == 'jpeg') {
760 $pageInfo['extension'] = $extension;
763 $parts = explode('_', $pageRequest);
765 // Remove book prefix if it was included (historical)
766 if ($parts[0] == $bookPrefix) {
770 if (count($parts) === 0) {
771 $this->BRfatal('No page type specified');
773 $page = array_shift($parts);
779 'preview' => 'single',
783 // Look for known page types
784 foreach ( $pageTypes as $pageName => $kind ) {
785 if ( preg_match('#^(' . $pageName . ')(.*)#', $page, $matches) === 1 ) {
786 $pageInfo['type'] = $matches[1];
789 $pageInfo['value'] = $matches[2];
792 $pageInfo['value'] = intval($matches[2]);
800 if ( !array_key_exists('type', $pageInfo) ) {
801 $this->BRfatal('Unrecognized page type');
804 // Look for other known parts
805 foreach ($parts as $part) {
806 if ( array_key_exists($part, self::$imageSizes) ) {
807 $pageInfo['size'] = $part;
811 // Key must be alpha, value must start with digit and contain digits, alpha, ',' or '.'
812 // Should prevent injection of strange values into the redirect to datanode
813 if ( preg_match('#^([a-z]+)(\d[a-z0-9,.]*)#', $part, $matches) === 0) {
819 $value = $matches[2];
821 if ( array_key_exists($key, self::$imageUrlKeys) ) {
822 $pageInfo[self::$imageUrlKeys[$key]] = $value;
826 // If we hit here, was unrecognized (no action)