4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 require_once("BookReaderMeta.inc.php");
29 class BookReaderImages
31 public static $MIMES = array('gif' => 'image/gif',
33 'jpg' => 'image/jpeg',
34 'jpeg' => 'image/jpeg',
36 'tif' => 'image/tiff',
37 'tiff' => 'image/tiff');
39 public static $EXTENSIONS = array('gif' => 'gif',
47 // Width when generating thumbnails
48 public static $imageSizes = array(
55 // Keys in the image permalink urls, e.g. http://www.archive.org/download/itemid/page/cover_{keyval}_{keyval}.jpg
56 public static $imageUrlKeys = array(
66 // Paths to command-line tools
67 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
68 var $kduExpand = '/petabox/sw/bin/kdu_expand';
71 * Serve an image request that requires looking up the book metadata
75 * - Parse the requested page (e.g. cover_t.jpg, n5_r4.jpg) to determine which page type,
76 * size and format (etc) is being requested
77 * - Determine the leaf number corresponding to the page
78 * - Determine scaling values
79 * - Serve image request now that all information has been gathered
82 function serveLookupRequest($requestEnv) {
83 $brm = new BookReaderMeta();
85 $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
86 } catch (Exception $e) {
87 $this->BRfatal($e->getMessage);
90 $page = $_REQUEST['page'];
92 // Index of image to return
95 // deal with subPrefix
96 if ($_REQUEST['subPrefix']) {
97 $parts = split('/', $_REQUEST['subPrefix']);
98 $bookId = $parts[count($parts) - 1 ];
100 $bookId = $_REQUEST['id'];
103 $pageInfo = $this->parsePageRequest($page, $bookId);
105 $basePage = $pageInfo['type'];
109 if (! array_key_exists('titleIndex', $metadata)) {
110 $this->BRfatal("No title page asserted in book");
112 $imageIndex = $metadata['titleIndex'];
116 if (! array_key_exists('coverIndices', $metadata)) {
117 $this->BRfatal("No cover asserted in book");
119 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
124 // Cover page if book was published >= 1950
129 if ( array_key_exists('date', $metadata) && array_key_exists('coverIndices', $metadata) ) {
130 if ($brm->parseYear($metadata['date']) >= 1950) {
131 $imageIndex = $metadata['coverIndices'][0];
135 if (array_key_exists('titleIndex', $metadata)) {
136 $imageIndex = $metadata['titleIndex'];
139 if (array_key_exists('coverIndices', $metadata)) {
140 $imageIndex = $metadata['coverIndices'][0];
149 // Accessible index page
150 $imageIndex = intval($pageInfo['value']);
155 $index = array_search($pageInfo['value'], $metadata['pageNums']);
156 if ($index === FALSE) {
158 $this->BRfatal("Page not found");
162 $imageIndex = $index;
166 // Shouldn't be possible
167 $this->BRfatal("Unrecognized page type requested");
172 $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
175 'zip' => $metadata['zip'],
176 'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
180 // remove non-passthrough keys from pageInfo
181 unset($pageInfo['type']);
182 unset($pageInfo['value']);
184 // add pageinfo to request
185 $requestEnv = array_merge($pageInfo, $requestEnv);
187 // Return image data - will check privs
188 $this->serveRequest($requestEnv);
193 * Returns a page image when all parameters such as the image stack location are
198 * Get info about requested image (input)
199 * Get info about requested output format
200 * Determine processing parameters
203 * Clean up temporary files
205 function serveRequest($requestEnv) {
206 // Process some of the request parameters
207 $zipPath = $requestEnv['zip'];
208 $file = $requestEnv['file'];
210 $ext = $requestEnv['ext'];
215 if (isset($requestEnv['callback'])) {
216 // validate callback is valid JS identifier (only)
217 $callback = $requestEnv['callback'];
218 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
219 if (! preg_match($identifierPatt, $callback)) {
220 $this->BRfatal('Invalid callback');
226 if ( !file_exists($zipPath) ) {
227 $this->BRfatal('Image stack does not exist at ' . $zipPath);
229 // Make sure the image stack is readable - return 403 if not
230 $this->checkPrivs($zipPath);
233 // Get the image size and depth
234 $imageInfo = $this->getImageInfo($zipPath, $file);
236 // Output json if requested
237 if ('json' == $ext) {
238 // $$$ we should determine the output size first based on requested scale
239 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
243 // Unfortunately kakadu requires us to know a priori if the
244 // output file should be .ppm or .pgm. By decompressing to
245 // .bmp kakadu will write a file we can consistently turn into
246 // .pnm. Really kakadu should support .pnm as the file output
247 // extension and automatically write ppm or pgm format as
249 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
250 if ($this->decompressToBmp) {
251 $stdoutLink = '/tmp/stdout.bmp';
253 $stdoutLink = '/tmp/stdout.ppm';
256 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
258 // Rotate is currently only supported for jp2 since it does not add server load
259 $allowedRotations = array("0", "90", "180", "270");
260 $rotate = $requestEnv['rotate'];
261 if ( !in_array($rotate, $allowedRotations) ) {
265 // Image conversion options
267 $jpegOptions = '-quality 75';
269 // The pbmreduce reduction factor produces an image with dimension 1/n
270 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
271 if (isset($requestEnv['height'])) {
272 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
273 $scale = pow(2, $powReduce);
274 } else if (isset($requestEnv['width'])) {
275 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
276 $scale = pow(2, $powReduce);
279 // $$$ could be cleaner
280 // Provide next smaller power of two reduction
281 $scale = $requestEnv['scale'];
285 if (array_key_exists($scale, self::$imageSizes)) {
286 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
289 $dimension = 'width';
291 $dimension = 'height';
293 $powReduce = $this->nearestPow2Reduce($this->imageSizes[$scale], $imageInfo[$dimension]);
295 $powReduce = $this->nearestPow2ForScale($scale);
297 $scale = pow(2, $powReduce);
300 // Override depending on source image format
301 // $$$ consider doing a 302 here instead, to make better use of the browser cache
302 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
303 if (1 == $imageInfo['bits']) {
308 // Hard limit so there are some black pixels to use!
316 if (!file_exists($stdoutLink))
318 system('ln -s /dev/stdout ' . $stdoutLink);
321 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
323 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
325 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
327 // Non-integer scaling is currently disabled on the cluster
328 // if (isset($_REQUEST['height'])) {
329 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
334 $compressCmd = ' | pnmtopng ' . $pngOptions;
340 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
341 $ext = 'jpeg'; // for matching below
346 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
347 // Just pass through original data if same format and size
350 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
355 $filenameForClient = $this->filenameForClient($file, $ext);
357 $headers = array('Content-type: '. self::$MIMES[$ext],
358 'Cache-Control: max-age=15552000',
359 'Content-disposition: inline; filename=' . $filenameForClient);
363 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
364 // $$$ automated reporting
365 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
367 // Try some content-specific recovery
369 if ($imageInfo['type'] == 'jp2') {
370 $records = $this->getJp2Records($zipPath, $file);
371 if ($powReduce > intval($records['Clevels'])) {
372 $powReduce = $records['Clevels'];
373 $reduce = pow(2, $powReduce);
379 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
380 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
383 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
388 $this->BRfatal('Problem processing image - command failed');
392 if (isset($tempFile)) {
397 function getUnarchiveCommand($archivePath, $file)
399 $lowerPath = strtolower($archivePath);
400 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
401 $suffix = $matches[1];
403 if ($suffix == 'zip') {
405 . escapeshellarg($archivePath)
406 . ' ' . escapeshellarg($file);
407 } else if ($suffix == 'tar') {
408 return ' ( 7z e -so '
409 . escapeshellarg($archivePath)
410 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
412 $this->BRfatal('Incompatible archive format');
416 $this->BRfatal('Bad image stack path');
419 $this->BRfatal('Bad image stack path or archive format');
424 * Returns the image type associated with the file extension.
426 function imageExtensionToType($extension)
429 if (array_key_exists($extension, self::$EXTENSIONS)) {
430 return self::$EXTENSIONS[$extension];
432 $this->BRfatal('Unknown image extension');
437 * Get the image information. The returned associative array fields will
438 * vary depending on the image type. The basic keys are width, height, type
441 function getImageInfo($zipPath, $file)
443 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
446 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
447 $type = imageExtensionToType($fileExt);
451 return getImageInfoFromJp2($zipPath, $file);
454 return getImageInfoFromExif($zipPath, $file);
459 // Get the records of of JP2 as returned by kdu_expand
460 function getJp2Records($zipPath, $file)
463 $cmd = $this->getUnarchiveCommand($zipPath, $file)
464 . ' | ' . $this->kduExpand
465 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
469 foreach ($output as $line) {
470 $elems = explode("=", $line, 2);
471 if (1 == count($elems)) {
472 // delimiter not found
475 $records[$elems[0]] = $elems[1];
482 * Get the image width, height and depth using the EXIF information.
484 function getImageInfoFromExif($zipPath, $file)
487 // We look for all the possible tags of interest then act on the
488 // ones presumed present based on the file type
489 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
490 . ' -BitsPerComponent -ColorSpace' // jp2
491 . ' -BitDepth' // png
492 . ' -BitsPerSample'; // tiff
494 $cmd = $this->getUnarchiveCommand($zipPath, $file)
495 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
499 foreach ($output as $line) {
500 $keyValue = explode(": ", $line);
501 $tags[$keyValue[0]] = $keyValue[1];
504 $width = intval($tags["ImageWidth"]);
505 $height = intval($tags["ImageHeight"]);
506 $type = strtolower($tags["FileType"]);
510 $bits = intval($tags["BitsPerComponent"]);
513 $bits = intval($tags["BitsPerSample"]);
519 $bits = intval($tags["BitDepth"]);
522 $this->BRfatal("Unsupported image type $type for file $file in $zipPath");
527 $retval = Array('width' => $width, 'height' => $height,
528 'bits' => $bits, 'type' => $type);
534 * Output JSON given the imageInfo associative array
536 function outputJSON($imageInfo, $callback)
538 header('Content-type: text/plain');
539 $jsonOutput = json_encode($imageInfo);
541 $jsonOutput = $callback . '(' . $jsonOutput . ');';
546 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
548 switch ($imageType) {
551 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
552 if ($this->decompressToBmp) {
553 // We suppress output since bmptopnm always outputs on stderr
554 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
559 // We need to create a temporary file for tifftopnm since it cannot
560 // work on a pipe (the file must be seekable).
561 // We use the BookReaderTiff prefix to give a hint in case things don't
563 $tempFile = tempnam("/tmp", "BookReaderTiff");
565 // $$$ look at bit depth when reducing
567 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
571 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
575 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
579 $this->BRfatal('Unknown image type: ' . $imageType);
582 return $decompressCmd;
585 // If the command has its initial output on stdout the headers will be emitted followed
586 // by the stdout output. If initial output is on stderr an error message will be
590 // true - if command emits stdout and has zero exit code
591 // false - command has initial output on stderr or non-zero exit code
592 // &$errorMessage - error string if there was an error
594 // $$$ Tested with our command-line image processing. May be deadlocks for
596 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
601 $descriptorspec = array(
602 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
603 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
604 2 => array("pipe", "w"), // stderr is a pipe to write to
610 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
612 if (is_resource($process)) {
613 // $pipes now looks like this:
614 // 0 => writeable handle connected to child stdin
615 // 1 => readable handle connected to child stdout
616 // 2 => readable handle connected to child stderr
622 // check whether we get input first on stdout or stderr
623 $read = array($stdout, $stderr);
626 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
627 if (false === $numChanged) {
629 $errorMessage = 'Select failed';
632 if ($read[0] == $stdout && (1 == $numChanged)) {
633 // Got output first on stdout (only)
634 // $$$ make sure we get all stdout
635 $output = fopen('php://output', 'w');
636 foreach($headers as $header) {
639 stream_copy_to_stream($pipes[1], $output);
640 fclose($output); // okay since tied to special php://output
643 // Got output on stderr
644 // $$$ make sure we get all stderr
645 $errorMessage = stream_get_contents($stderr);
654 // It is important that you close any pipes before calling
655 // proc_close in order to avoid a deadlock
656 $cmdRet = proc_close($process);
659 $errorMessage .= "Command failed with result code " . $cmdRet;
665 function BRfatal($string) {
666 throw new Exception("Image error: $string");
669 // Returns true if using a power node
670 function onPowerNode() {
671 exec("lspci | fgrep -c Realtek", $output, $return);
672 if ("0" != $output[0]) {
675 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
683 function reduceCommand($scale) {
685 if ($this->onPowerNode()) {
686 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
688 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
695 function checkPrivs($filename) {
696 if (!is_readable($filename)) {
697 header('HTTP/1.1 403 Forbidden');
702 // Given file path (inside archive) and output file extension, return a filename
703 // suitable for Content-disposition header
704 function filenameForClient($filePath, $ext) {
705 $pathParts = pathinfo($filePath);
706 if ('jpeg' == $ext) {
709 return $pathParts['filename'] . '.' . $ext;
712 // Returns the nearest power of 2 reduction factor that results in a larger image
713 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
714 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
715 return $this->nearestPow2ForScale($ratio);
718 // Returns nearest power of 2 reduction factor that results in a larger image
719 function nearestPow2ForScale($scale) {
720 $scale = intval($scale);
724 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
725 return strlen($binStr) - 1;
729 * Parses a page request like "page5_r2.jpg" or "cover_t.jpg" to corresponding
730 * page type, size, reduce, and format
732 function parsePageRequest($pageRequest, $bookPrefix) {
734 // Will hold parsed results
738 $pageRequest = strtolower($pageRequest);
740 // Pull off extension
741 if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
742 $pageRequest = $matches[1];
743 $extension = $matches[2];
744 if ($extension == 'jpeg') {
750 $pageInfo['extension'] = $extension;
753 $parts = explode('_', $pageRequest);
755 // Remove book prefix if it was included (historical)
756 if ($parts[0] == $bookPrefix) {
760 if (count($parts) === 0) {
761 $this->BRfatal('No page type specified');
763 $page = array_shift($parts);
769 'preview' => 'single',
773 // Look for known page types
774 foreach ( $pageTypes as $pageName => $kind ) {
775 if ( preg_match('#^(' . $pageName . ')(.*)#', $page, $matches) === 1 ) {
776 $pageInfo['type'] = $matches[1];
779 $pageInfo['value'] = $matches[2];
782 $pageInfo['value'] = intval($matches[2]);
790 if ( !array_key_exists('type', $pageInfo) ) {
791 $this->BRfatal('Unrecognized page type');
794 // Look for other known parts
795 foreach ($parts as $part) {
796 if ( in_array($part, self::$imageSizes) ) {
797 $pageInfo['size'] = $part;
801 // Key must be alpha, value must start with digit and contain digits, alpha, ',' or '.'
802 // Should prevent injection of strange values into the redirect to datanode
803 if ( preg_match('#^([a-z]+)(\d[a-z0-9,.]*)#', $part, $matches) === 0) {
809 $value = $matches[2];
811 if ( array_key_exists($key, self::$imageUrlKeys) ) {
812 $pageInfo[self::$imageUrlKeys[$key]] = $value;
816 // If we hit here, was unrecognized (no action)