4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 require_once("BookReaderMeta.inc.php");
29 class BookReaderImages
31 public $MIMES = array('gif' => 'image/gif',
33 'jpg' => 'image/jpeg',
34 'jpeg' => 'image/jpeg',
36 'tif' => 'image/tiff',
37 'tiff' => 'image/tiff');
39 public $EXTENSIONS = array('gif' => 'gif',
47 // Width when generating thumbnails
48 public $imageSizes = array(
55 // Paths to command-line tools
56 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
57 var $kduExpand = '/petabox/sw/bin/kdu_expand';
60 * Serve an image request that requires looking up the book metadata
64 * - Parse the requested page (e.g. cover_t.jpg, n5_r4.jpg) to determine which page type,
65 * size and format (etc) is being requested
66 * - Determine the leaf number corresponding to the page
67 * - Determine scaling values
68 * - Serve image request now that all information has been gathered
71 function serveLookupRequest($requestEnv) {
72 $brm = new BookReaderMeta();
74 $metadata = $brm->buildMetadata($_REQUEST['id'], $_REQUEST['itemPath'], $_REQUEST['subPrefix'], $_REQUEST['server']);
75 } catch (Exception $e) {
76 $this->BRfatal($e->getMessage);
79 $page = $_REQUEST['page'];
81 // Index of image to return
84 // deal with subPrefix
85 if ($_REQUEST['subPrefix']) {
86 $parts = split('/', $_REQUEST['subPrefix']);
87 $bookId = $parts[count($parts) - 1 ];
89 $bookId = $_REQUEST['id'];
92 $pageInfo = $this->parsePageRequest($page, $bookId);
94 $basePage = $pageInfo['type'];
98 if (! array_key_exists('titleIndex', $metadata)) {
99 $this->BRfatal("No title page asserted in book");
101 $imageIndex = $metadata['titleIndex'];
105 if (! array_key_exists('coverIndices', $metadata)) {
106 $this->BRfatal("No cover asserted in book");
108 $imageIndex = $metadata['coverIndices'][0]; // $$$ TODO add support for other covers
113 // Cover page if book was published >= 1950
118 if ( array_key_exists('date', $metadata) && array_key_exists('coverIndices', $metadata) ) {
119 if ($brm->parseYear($metadata['date']) >= 1950) {
120 $imageIndex = $metadata['coverIndices'][0];
124 if (array_key_exists('titleIndex', $metadata)) {
125 $imageIndex = $metadata['titleIndex'];
128 if (array_key_exists('coverIndices', $metadata)) {
129 $imageIndex = $metadata['coverIndices'][0];
138 // Accessible index page
139 $imageIndex = intval($pageInfo['value']);
144 $index = array_search($pageInfo['value'], $metadata['pageNums']);
145 if ($index === FALSE) {
147 $this->BRfatal("Page not found");
151 $imageIndex = $index;
155 // Shouldn't be possible
156 $this->BRfatal("Unrecognized page type requested");
161 $leaf = $brm->leafForIndex($imageIndex, $metadata['leafNums']);
164 'zip' => $metadata['zip'],
165 'file' => $brm->imageFilePath($leaf, $metadata['subPrefix'], $metadata['imageFormat']),
169 if ($pageInfo['reduce']) {
170 $requestEnv['reduce'] = $pageInfo['reduce'];
172 // $$$ handle scale, other sizes, rotation, etc
174 // Return image data - will check privs
175 $this->serveRequest($requestEnv);
180 * Returns a page image when all parameters such as the image stack location are
185 * Get info about requested image (input)
186 * Get info about requested output format
187 * Determine processing parameters
190 * Clean up temporary files
192 function serveRequest($requestEnv) {
193 // Process some of the request parameters
194 $zipPath = $requestEnv['zip'];
195 $file = $requestEnv['file'];
197 $ext = $requestEnv['ext'];
202 if (isset($requestEnv['callback'])) {
203 // validate callback is valid JS identifier (only)
204 $callback = $requestEnv['callback'];
205 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
206 if (! preg_match($identifierPatt, $callback)) {
207 $this->BRfatal('Invalid callback');
213 if ( !file_exists($zipPath) ) {
214 $this->BRfatal('Image stack does not exist at ' . $zipPath);
216 // Make sure the image stack is readable - return 403 if not
217 $this->checkPrivs($zipPath);
220 // Get the image size and depth
221 $imageInfo = $this->getImageInfo($zipPath, $file);
223 // Output json if requested
224 if ('json' == $ext) {
225 // $$$ we should determine the output size first based on requested scale
226 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
230 // Unfortunately kakadu requires us to know a priori if the
231 // output file should be .ppm or .pgm. By decompressing to
232 // .bmp kakadu will write a file we can consistently turn into
233 // .pnm. Really kakadu should support .pnm as the file output
234 // extension and automatically write ppm or pgm format as
236 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
237 if ($this->decompressToBmp) {
238 $stdoutLink = '/tmp/stdout.bmp';
240 $stdoutLink = '/tmp/stdout.ppm';
243 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
245 // Rotate is currently only supported for jp2 since it does not add server load
246 $allowedRotations = array("0", "90", "180", "270");
247 $rotate = $requestEnv['rotate'];
248 if ( !in_array($rotate, $allowedRotations) ) {
252 // Image conversion options
254 $jpegOptions = '-quality 75';
256 // The pbmreduce reduction factor produces an image with dimension 1/n
257 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
258 if (isset($requestEnv['height'])) {
259 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
260 $scale = pow(2, $powReduce);
261 } else if (isset($requestEnv['width'])) {
262 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
263 $scale = pow(2, $powReduce);
266 // $$$ could be cleaner
267 // Provide next smaller power of two reduction
268 $scale = $requestEnv['scale'];
272 if (array_key_exists($scale, $this->imageSizes)) {
273 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
276 $dimension = 'width';
278 $dimension = 'height';
280 $powReduce = $this->nearestPow2Reduce($this->imageSizes[$scale], $imageInfo[$dimension]);
282 $powReduce = $this->nearestPow2ForScale($scale);
284 $scale = pow(2, $powReduce);
287 // Override depending on source image format
288 // $$$ consider doing a 302 here instead, to make better use of the browser cache
289 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
290 if (1 == $imageInfo['bits']) {
295 // Hard limit so there are some black pixels to use!
303 if (!file_exists($stdoutLink))
305 system('ln -s /dev/stdout ' . $stdoutLink);
308 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
310 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
312 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
314 // Non-integer scaling is currently disabled on the cluster
315 // if (isset($_REQUEST['height'])) {
316 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
321 $compressCmd = ' | pnmtopng ' . $pngOptions;
327 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
328 $ext = 'jpeg'; // for matching below
333 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
334 // Just pass through original data if same format and size
337 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
342 $filenameForClient = $this->filenameForClient($file, $ext);
344 $headers = array('Content-type: '. $MIMES[$ext], // XXX is nginx swallowing this?
345 'Cache-Control: max-age=15552000',
346 'Content-disposition: inline; filename=' . $filenameForClient);
350 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
351 // $$$ automated reporting
352 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
354 // Try some content-specific recovery
356 if ($imageInfo['type'] == 'jp2') {
357 $records = $this->getJp2Records($zipPath, $file);
358 if ($powReduce > intval($records['Clevels'])) {
359 $powReduce = $records['Clevels'];
360 $reduce = pow(2, $powReduce);
366 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
367 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
370 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
375 $this->BRfatal('Problem processing image - command failed');
379 if (isset($tempFile)) {
384 function getUnarchiveCommand($archivePath, $file)
386 $lowerPath = strtolower($archivePath);
387 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
388 $suffix = $matches[1];
390 if ($suffix == 'zip') {
392 . escapeshellarg($archivePath)
393 . ' ' . escapeshellarg($file);
394 } else if ($suffix == 'tar') {
395 return ' ( 7z e -so '
396 . escapeshellarg($archivePath)
397 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
399 $this->BRfatal('Incompatible archive format');
403 $this->BRfatal('Bad image stack path');
406 $this->BRfatal('Bad image stack path or archive format');
411 * Returns the image type associated with the file extension.
413 function imageExtensionToType($extension)
416 if (array_key_exists($extension, $this->EXTENSIONS)) {
417 return $this->EXTENSIONS[$extension];
419 $this->BRfatal('Unknown image extension');
424 * Get the image information. The returned associative array fields will
425 * vary depending on the image type. The basic keys are width, height, type
428 function getImageInfo($zipPath, $file)
430 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
433 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
434 $type = imageExtensionToType($fileExt);
438 return getImageInfoFromJp2($zipPath, $file);
441 return getImageInfoFromExif($zipPath, $file);
446 // Get the records of of JP2 as returned by kdu_expand
447 function getJp2Records($zipPath, $file)
450 $cmd = $this->getUnarchiveCommand($zipPath, $file)
451 . ' | ' . $this->kduExpand
452 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
456 foreach ($output as $line) {
457 $elems = explode("=", $line, 2);
458 if (1 == count($elems)) {
459 // delimiter not found
462 $records[$elems[0]] = $elems[1];
469 * Get the image width, height and depth using the EXIF information.
471 function getImageInfoFromExif($zipPath, $file)
474 // We look for all the possible tags of interest then act on the
475 // ones presumed present based on the file type
476 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
477 . ' -BitsPerComponent -ColorSpace' // jp2
478 . ' -BitDepth' // png
479 . ' -BitsPerSample'; // tiff
481 $cmd = $this->getUnarchiveCommand($zipPath, $file)
482 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
486 foreach ($output as $line) {
487 $keyValue = explode(": ", $line);
488 $tags[$keyValue[0]] = $keyValue[1];
491 $width = intval($tags["ImageWidth"]);
492 $height = intval($tags["ImageHeight"]);
493 $type = strtolower($tags["FileType"]);
497 $bits = intval($tags["BitsPerComponent"]);
500 $bits = intval($tags["BitsPerSample"]);
506 $bits = intval($tags["BitDepth"]);
509 $this->BRfatal("Unsupported image type $type for file $file in $zipPath");
514 $retval = Array('width' => $width, 'height' => $height,
515 'bits' => $bits, 'type' => $type);
521 * Output JSON given the imageInfo associative array
523 function outputJSON($imageInfo, $callback)
525 header('Content-type: text/plain');
526 $jsonOutput = json_encode($imageInfo);
528 $jsonOutput = $callback . '(' . $jsonOutput . ');';
533 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
535 switch ($imageType) {
538 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
539 if ($this->decompressToBmp) {
540 // We suppress output since bmptopnm always outputs on stderr
541 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
546 // We need to create a temporary file for tifftopnm since it cannot
547 // work on a pipe (the file must be seekable).
548 // We use the BookReaderTiff prefix to give a hint in case things don't
550 $tempFile = tempnam("/tmp", "BookReaderTiff");
552 // $$$ look at bit depth when reducing
554 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
558 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
562 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
566 $this->BRfatal('Unknown image type: ' . $imageType);
569 return $decompressCmd;
572 // If the command has its initial output on stdout the headers will be emitted followed
573 // by the stdout output. If initial output is on stderr an error message will be
577 // true - if command emits stdout and has zero exit code
578 // false - command has initial output on stderr or non-zero exit code
579 // &$errorMessage - error string if there was an error
581 // $$$ Tested with our command-line image processing. May be deadlocks for
583 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
588 $descriptorspec = array(
589 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
590 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
591 2 => array("pipe", "w"), // stderr is a pipe to write to
597 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
599 if (is_resource($process)) {
600 // $pipes now looks like this:
601 // 0 => writeable handle connected to child stdin
602 // 1 => readable handle connected to child stdout
603 // 2 => readable handle connected to child stderr
609 // check whether we get input first on stdout or stderr
610 $read = array($stdout, $stderr);
613 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
614 if (false === $numChanged) {
616 $errorMessage = 'Select failed';
619 if ($read[0] == $stdout && (1 == $numChanged)) {
620 // Got output first on stdout (only)
621 // $$$ make sure we get all stdout
622 $output = fopen('php://output', 'w');
623 foreach($headers as $header) {
626 stream_copy_to_stream($pipes[1], $output);
627 fclose($output); // okay since tied to special php://output
630 // Got output on stderr
631 // $$$ make sure we get all stderr
632 $errorMessage = stream_get_contents($stderr);
641 // It is important that you close any pipes before calling
642 // proc_close in order to avoid a deadlock
643 $cmdRet = proc_close($process);
646 $errorMessage .= "Command failed with result code " . $cmdRet;
652 function BRfatal($string) {
653 throw new Exception("Image error: $string");
656 // Returns true if using a power node
657 function onPowerNode() {
658 exec("lspci | fgrep -c Realtek", $output, $return);
659 if ("0" != $output[0]) {
662 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
670 function reduceCommand($scale) {
672 if ($this->onPowerNode()) {
673 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
675 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
682 function checkPrivs($filename) {
683 if (!is_readable($filename)) {
684 header('HTTP/1.1 403 Forbidden');
689 // Given file path (inside archive) and output file extension, return a filename
690 // suitable for Content-disposition header
691 function filenameForClient($filePath, $ext) {
692 $pathParts = pathinfo($filePath);
693 if ('jpeg' == $ext) {
696 return $pathParts['filename'] . '.' . $ext;
699 // Returns the nearest power of 2 reduction factor that results in a larger image
700 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
701 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
702 return $this->nearestPow2ForScale($ratio);
705 // Returns nearest power of 2 reduction factor that results in a larger image
706 function nearestPow2ForScale($scale) {
707 $scale = intval($scale);
711 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
712 return strlen($binStr) - 1;
716 * Parses a page request like "page5_r2.jpg" or "cover_t.jpg" to corresponding
717 * page type, size, reduce, and format
719 function parsePageRequest($pageRequest, $bookPrefix) {
721 // Will hold parsed results
725 $pageRequest = strtolower($pageRequest);
727 // Pull off extension
728 if (preg_match('#(.*)\.([^.]+)$#', $pageRequest, $matches) === 1) {
729 $pageRequest = $matches[1];
730 $extension = $matches[2];
731 if ($extension == 'jpeg') {
737 $pageInfo['extension'] = $extension;
740 $parts = explode('_', $pageRequest);
742 // Remove book prefix if it was included (historical)
743 if ($parts[0] == $bookPrefix) {
747 if (count($parts) === 0) {
748 $this->BRfatal('No page type specified');
750 $page = array_shift($parts);
756 'preview' => 'single',
761 'large', 'thumb', 'medium', 'small', 'orig'
767 'region' => 'region',
773 // Look for known page types
774 foreach ( $pageTypes as $pageName => $kind ) {
775 if ( preg_match('#^(' . $pageName . ')(.*)#', $page, $matches) === 1 ) {
776 $pageInfo['type'] = $matches[1];
779 $pageInfo['value'] = $matches[2];
782 $pageInfo['value'] = intval($matches[2]);
790 if ( !array_key_exists('type', $pageInfo) ) {
791 $this->BRfatal('Unrecognized page type');
794 // Look for other known parts
795 foreach ($parts as $part) {
796 if ( in_array($part, $sizes) ) {
797 $pageInfo['size'] = $part;
801 // Key must be alpha, value must start with digit and contain digits, alpha, ',' or '.'
802 // Should prevent injection of strange values into the redirect to datanode
803 if ( preg_match('#^([a-z]+)(\d[a-z0-9,.]*)#', $part, $matches) === 0) {
809 $value = $matches[2];
811 if ( array_key_exists($key, $keys) ) {
812 $pageInfo[$keys[$key]] = $value;
816 // If we hit here, was unrecognized (no action)