4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 require_once("BookReaderMeta.inc.php");
29 class BookReaderImages
31 public $MIMES = array('gif' => 'image/gif',
33 'jpg' => 'image/jpeg',
34 'jpeg' => 'image/jpeg',
36 'tif' => 'image/tiff',
37 'tiff' => 'image/tiff');
39 public $EXTENSIONS = array('gif' => 'gif',
47 // Width when generating thumbnails
48 public $imageSizes = array(
55 // Paths to command-line tools
56 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
57 var $kduExpand = '/petabox/sw/bin/kdu_expand';
60 * Serve an image request that requires looking up the book metadata
64 * - Parse the requested page (e.g. cover_t.jpg, n5_r4.jpg) to determine which page type,
65 * size and format (etc) is being requested
66 * - Determine the leaf number corresponding to the page
67 * - Determine scaling values
68 * - Serve image request now that all information has been gathered
71 function serveLookupRequest($requestEnv) {
72 $brm = new BookReaderMeta();
74 $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
75 } catch (Exception $e) {
76 $this->BRfatal($e->getMessage);
79 $page = $_REQUEST['page'];
81 // Index of image to return
84 // deal with subPrefix
85 if ($_REQUEST['subPrefix']) {
86 $parts = split('/', $_REQUEST['subPrefix']);
87 $bookId = $parts[count($parts) - 1 ];
89 $bookId = $_REQUEST['id'];
92 $pageInfo = $this->parsePageRequest($page, $bookId);
94 $basePage = $pageInfo['type'];
98 if (! array_key_exists('titleIndex', $metadata)) {
99 $this->BRfatal("No title page asserted in book");
101 $imageIndex = $metadata['titleIndex'];
105 if (! array_key_exists('coverIndices', $metadata)) {
106 $this->BRfatal("No cover asserted in book");
108 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
113 // Cover page if book was published >= 1950
118 if ( array_key_exists('date', $metadata) && array_key_exists('coverIndices', $metadata) ) {
119 if ($brm->parseYear($metadata['date']) >= 1950) {
120 $imageIndex = $metadata['coverIndices'][0];
124 if (array_key_exists('titleIndex', $metadata)) {
125 $imageIndex = $metadata['titleIndex'];
128 if (array_key_exists('coverIndices', $metadata)) {
129 $imageIndex = $metadata['coverIndices'][0];
138 // Accessible index page
139 $imageIndex = intval($pageInfo['value']);
144 $index = array_search($pageInfo['value'], $metadata['pageNums']);
145 if ($index === FALSE) {
147 $this->BRfatal("Page not found");
151 $imageIndex = $index;
155 // Shouldn't be possible
156 $this->BRfatal("Unrecognized page type requested");
161 $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
164 'zip' => $metadata['zip'],
165 'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
169 // Return image data - will check privs
170 $this->serveRequest($requestEnv);
175 * Returns a page image when all parameters such as the image stack location are
180 * Get info about requested image (input)
181 * Get info about requested output format
182 * Determine processing parameters
185 * Clean up temporary files
187 function serveRequest($requestEnv) {
188 // Process some of the request parameters
189 $zipPath = $requestEnv['zip'];
190 $file = $requestEnv['file'];
192 $ext = $requestEnv['ext'];
197 if (isset($requestEnv['callback'])) {
198 // validate callback is valid JS identifier (only)
199 $callback = $requestEnv['callback'];
200 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
201 if (! preg_match($identifierPatt, $callback)) {
202 $this->BRfatal('Invalid callback');
208 if ( !file_exists($zipPath) ) {
209 $this->BRfatal('Image stack does not exist at ' . $zipPath);
211 // Make sure the image stack is readable - return 403 if not
212 $this->checkPrivs($zipPath);
215 // Get the image size and depth
216 $imageInfo = $this->getImageInfo($zipPath, $file);
218 // Output json if requested
219 if ('json' == $ext) {
220 // $$$ we should determine the output size first based on requested scale
221 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
225 // Unfortunately kakadu requires us to know a priori if the
226 // output file should be .ppm or .pgm. By decompressing to
227 // .bmp kakadu will write a file we can consistently turn into
228 // .pnm. Really kakadu should support .pnm as the file output
229 // extension and automatically write ppm or pgm format as
231 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
232 if ($this->decompressToBmp) {
233 $stdoutLink = '/tmp/stdout.bmp';
235 $stdoutLink = '/tmp/stdout.ppm';
238 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
240 // Rotate is currently only supported for jp2 since it does not add server load
241 $allowedRotations = array("0", "90", "180", "270");
242 $rotate = $requestEnv['rotate'];
243 if ( !in_array($rotate, $allowedRotations) ) {
247 // Image conversion options
249 $jpegOptions = '-quality 75';
251 // The pbmreduce reduction factor produces an image with dimension 1/n
252 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
253 if (isset($requestEnv['height'])) {
254 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
255 $scale = pow(2, $powReduce);
256 } else if (isset($requestEnv['width'])) {
257 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
258 $scale = pow(2, $powReduce);
261 // $$$ could be cleaner
262 // Provide next smaller power of two reduction
263 $scale = $requestEnv['scale'];
267 if (array_key_exists($scale, $this->imageSizes)) {
268 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
271 $dimension = 'width';
273 $dimension = 'height';
275 $powReduce = $this->nearestPow2Reduce($this->imageSizes[$scale], $imageInfo[$dimension]);
277 $powReduce = $this->nearestPow2ForScale($scale);
279 $scale = pow(2, $powReduce);
282 // Override depending on source image format
283 // $$$ consider doing a 302 here instead, to make better use of the browser cache
284 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
285 if (1 == $imageInfo['bits']) {
290 // Hard limit so there are some black pixels to use!
298 if (!file_exists($stdoutLink))
300 system('ln -s /dev/stdout ' . $stdoutLink);
303 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
305 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
307 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
309 // Non-integer scaling is currently disabled on the cluster
310 // if (isset($_REQUEST['height'])) {
311 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
316 $compressCmd = ' | pnmtopng ' . $pngOptions;
322 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
323 $ext = 'jpeg'; // for matching below
328 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
329 // Just pass through original data if same format and size
332 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
337 $filenameForClient = $this->filenameForClient($file, $ext);
339 $headers = array('Content-type: '. $MIMES[$ext], // XXX is nginx swallowing this?
340 'Cache-Control: max-age=15552000',
341 'Content-disposition: inline; filename=' . $filenameForClient);
345 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
346 // $$$ automated reporting
347 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
349 // Try some content-specific recovery
351 if ($imageInfo['type'] == 'jp2') {
352 $records = $this->getJp2Records($zipPath, $file);
353 if ($powReduce > intval($records['Clevels'])) {
354 $powReduce = $records['Clevels'];
355 $reduce = pow(2, $powReduce);
361 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
362 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
365 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
370 $this->BRfatal('Problem processing image - command failed');
374 if (isset($tempFile)) {
379 function getUnarchiveCommand($archivePath, $file)
381 $lowerPath = strtolower($archivePath);
382 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
383 $suffix = $matches[1];
385 if ($suffix == 'zip') {
387 . escapeshellarg($archivePath)
388 . ' ' . escapeshellarg($file);
389 } else if ($suffix == 'tar') {
390 return ' ( 7z e -so '
391 . escapeshellarg($archivePath)
392 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
394 $this->BRfatal('Incompatible archive format');
398 $this->BRfatal('Bad image stack path');
401 $this->BRfatal('Bad image stack path or archive format');
406 * Returns the image type associated with the file extension.
408 function imageExtensionToType($extension)
411 if (array_key_exists($extension, $this->EXTENSIONS)) {
412 return $this->EXTENSIONS[$extension];
414 $this->BRfatal('Unknown image extension');
419 * Get the image information. The returned associative array fields will
420 * vary depending on the image type. The basic keys are width, height, type
423 function getImageInfo($zipPath, $file)
425 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
428 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
429 $type = imageExtensionToType($fileExt);
433 return getImageInfoFromJp2($zipPath, $file);
436 return getImageInfoFromExif($zipPath, $file);
441 // Get the records of of JP2 as returned by kdu_expand
442 function getJp2Records($zipPath, $file)
445 $cmd = $this->getUnarchiveCommand($zipPath, $file)
446 . ' | ' . $this->kduExpand
447 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
451 foreach ($output as $line) {
452 $elems = explode("=", $line, 2);
453 if (1 == count($elems)) {
454 // delimiter not found
457 $records[$elems[0]] = $elems[1];
464 * Get the image width, height and depth using the EXIF information.
466 function getImageInfoFromExif($zipPath, $file)
469 // We look for all the possible tags of interest then act on the
470 // ones presumed present based on the file type
471 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
472 . ' -BitsPerComponent -ColorSpace' // jp2
473 . ' -BitDepth' // png
474 . ' -BitsPerSample'; // tiff
476 $cmd = $this->getUnarchiveCommand($zipPath, $file)
477 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
481 foreach ($output as $line) {
482 $keyValue = explode(": ", $line);
483 $tags[$keyValue[0]] = $keyValue[1];
486 $width = intval($tags["ImageWidth"]);
487 $height = intval($tags["ImageHeight"]);
488 $type = strtolower($tags["FileType"]);
492 $bits = intval($tags["BitsPerComponent"]);
495 $bits = intval($tags["BitsPerSample"]);
501 $bits = intval($tags["BitDepth"]);
504 $this->BRfatal("Unsupported image type $type for file $file in $zipPath");
509 $retval = Array('width' => $width, 'height' => $height,
510 'bits' => $bits, 'type' => $type);
516 * Output JSON given the imageInfo associative array
518 function outputJSON($imageInfo, $callback)
520 header('Content-type: text/plain');
521 $jsonOutput = json_encode($imageInfo);
523 $jsonOutput = $callback . '(' . $jsonOutput . ');';
528 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
530 switch ($imageType) {
533 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
534 if ($this->decompressToBmp) {
535 // We suppress output since bmptopnm always outputs on stderr
536 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
541 // We need to create a temporary file for tifftopnm since it cannot
542 // work on a pipe (the file must be seekable).
543 // We use the BookReaderTiff prefix to give a hint in case things don't
545 $tempFile = tempnam("/tmp", "BookReaderTiff");
547 // $$$ look at bit depth when reducing
549 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
553 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
557 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
561 $this->BRfatal('Unknown image type: ' . $imageType);
564 return $decompressCmd;
567 // If the command has its initial output on stdout the headers will be emitted followed
568 // by the stdout output. If initial output is on stderr an error message will be
572 // true - if command emits stdout and has zero exit code
573 // false - command has initial output on stderr or non-zero exit code
574 // &$errorMessage - error string if there was an error
576 // $$$ Tested with our command-line image processing. May be deadlocks for
578 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
583 $descriptorspec = array(
584 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
585 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
586 2 => array("pipe", "w"), // stderr is a pipe to write to
592 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
594 if (is_resource($process)) {
595 // $pipes now looks like this:
596 // 0 => writeable handle connected to child stdin
597 // 1 => readable handle connected to child stdout
598 // 2 => readable handle connected to child stderr
604 // check whether we get input first on stdout or stderr
605 $read = array($stdout, $stderr);
608 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
609 if (false === $numChanged) {
611 $errorMessage = 'Select failed';
614 if ($read[0] == $stdout && (1 == $numChanged)) {
615 // Got output first on stdout (only)
616 // $$$ make sure we get all stdout
617 $output = fopen('php://output', 'w');
618 foreach($headers as $header) {
621 stream_copy_to_stream($pipes[1], $output);
622 fclose($output); // okay since tied to special php://output
625 // Got output on stderr
626 // $$$ make sure we get all stderr
627 $errorMessage = stream_get_contents($stderr);
636 // It is important that you close any pipes before calling
637 // proc_close in order to avoid a deadlock
638 $cmdRet = proc_close($process);
641 $errorMessage .= "Command failed with result code " . $cmdRet;
647 function BRfatal($string) {
648 throw new Exception("Image error: $string");
651 // Returns true if using a power node
652 function onPowerNode() {
653 exec("lspci | fgrep -c Realtek", $output, $return);
654 if ("0" != $output[0]) {
657 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
665 function reduceCommand($scale) {
667 if ($this->onPowerNode()) {
668 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
670 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
677 function checkPrivs($filename) {
678 if (!is_readable($filename)) {
679 header('HTTP/1.1 403 Forbidden');
684 // Given file path (inside archive) and output file extension, return a filename
685 // suitable for Content-disposition header
686 function filenameForClient($filePath, $ext) {
687 $pathParts = pathinfo($filePath);
688 if ('jpeg' == $ext) {
691 return $pathParts['filename'] . '.' . $ext;
694 // Returns the nearest power of 2 reduction factor that results in a larger image
695 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
696 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
697 return $this->nearestPow2ForScale($ratio);
700 // Returns nearest power of 2 reduction factor that results in a larger image
701 function nearestPow2ForScale($scale) {
702 $scale = intval($scale);
706 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
707 return strlen($binStr) - 1;
711 * Parses a page request like "page5_r2.jpg" or "cover_t.jpg" to corresponding
712 * page type, size, reduce, and format
714 function parsePageRequest($pageRequest, $bookPrefix) {
718 // Pull off extension
719 if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
720 $pageRequest = $matches[1];
721 $extension = $matches[2];
722 if ($extension == 'jpeg') {
728 $pageInfo['extension'] = $extension;
731 $parts = explode('_', $pageRequest);
733 // Remove book prefix if it was included (historical)
734 if ($parts[0] == $bookPrefix) {
738 if (count($parts) === 0) {
739 $this->BRfatal('No page type specified');
741 $page = array_shift($parts);
747 'preview' => 'single',
751 // Look for known page types
752 foreach ( $pageTypes as $pageName => $kind ) {
753 if ( preg_match('#^(' . $pageName . ')(.*)#', $page, $matches) === 1 ) {
754 $pageInfo['type'] = $matches[1];
757 $pageInfo['value'] = $matches[2];
760 $pageInfo['value'] = intval($matches[2]);
768 if ( !array_key_exists('type', $pageInfo) ) {
769 $this->BRfatal('Unrecognized page type');
772 // Look for other known parts
773 foreach ($parts as $part) {
774 $start = substr($part, 0, 1);
778 $pageInfo['size'] = $start;
781 $pageInfo['reduce'] = substr($part, 0);
784 // Unrecognized... just let it pass