4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 class BookReaderImages
29 public $MIMES = array('gif' => 'image/gif',
31 'jpg' => 'image/jpeg',
32 'jpeg' => 'image/jpeg',
34 'tif' => 'image/tiff',
35 'tiff' => 'image/tiff');
37 public $EXTENSIONS = array('gif' => 'gif',
45 // Width when generating thumbnails
46 public $imageSizes = array(
53 // Paths to command-line tools
54 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
55 var $kduExpand = '/petabox/sw/bin/kdu_expand';
58 * Returns a page image when all parameters such as the image stack location are
63 * Get info about requested image (input)
64 * Get info about requested output format
65 * Determine processing parameters
68 * Clean up temporary files
70 function serveRequest($requestEnv) {
71 // Process some of the request parameters
72 $zipPath = $requestEnv['zip'];
73 $file = $requestEnv['file'];
75 $ext = $requestEnv['ext'];
80 if (isset($requestEnv['callback'])) {
81 // validate callback is valid JS identifier (only)
82 $callback = $requestEnv['callback'];
83 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
84 if (! preg_match($identifierPatt, $callback)) {
85 $this->BRfatal('Invalid callback');
91 if ( !file_exists($zipPath) ) {
92 $this->BRfatal('Image stack does not exist at ' . $zipPath);
94 // Make sure the image stack is readable - return 403 if not
95 $this->checkPrivs($zipPath);
98 // Get the image size and depth
99 $imageInfo = $this->getImageInfo($zipPath, $file);
101 // Output json if requested
102 if ('json' == $ext) {
103 // $$$ we should determine the output size first based on requested scale
104 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
108 // Unfortunately kakadu requires us to know a priori if the
109 // output file should be .ppm or .pgm. By decompressing to
110 // .bmp kakadu will write a file we can consistently turn into
111 // .pnm. Really kakadu should support .pnm as the file output
112 // extension and automatically write ppm or pgm format as
114 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
115 if ($this->decompressToBmp) {
116 $stdoutLink = '/tmp/stdout.bmp';
118 $stdoutLink = '/tmp/stdout.ppm';
121 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
123 // Rotate is currently only supported for jp2 since it does not add server load
124 $allowedRotations = array("0", "90", "180", "270");
125 $rotate = $requestEnv['rotate'];
126 if ( !in_array($rotate, $allowedRotations) ) {
130 // Image conversion options
132 $jpegOptions = '-quality 75';
134 // The pbmreduce reduction factor produces an image with dimension 1/n
135 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
136 if (isset($requestEnv['height'])) {
137 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
138 $scale = pow(2, $powReduce);
139 } else if (isset($requestEnv['width'])) {
140 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
141 $scale = pow(2, $powReduce);
144 // $$$ could be cleaner
145 // Provide next smaller power of two reduction
146 $scale = $requestEnv['scale'];
150 if (array_key_exists($scale, $this->imageSizes)) {
151 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
154 $dimension = 'width';
156 $dimension = 'height';
158 $powReduce = $this->nearestPow2Reduce($this->imageSizes[$scale], $imageInfo[$dimension]);
160 $powReduce = $this->nearestPow2ForScale($scale);
162 $scale = pow(2, $powReduce);
165 // Override depending on source image format
166 // $$$ consider doing a 302 here instead, to make better use of the browser cache
167 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
168 if (1 == $imageInfo['bits']) {
173 // Hard limit so there are some black pixels to use!
181 if (!file_exists($stdoutLink))
183 system('ln -s /dev/stdout ' . $stdoutLink);
186 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
188 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
190 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
192 // Non-integer scaling is currently disabled on the cluster
193 // if (isset($_REQUEST['height'])) {
194 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
199 $compressCmd = ' | pnmtopng ' . $pngOptions;
205 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
206 $ext = 'jpeg'; // for matching below
211 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
212 // Just pass through original data if same format and size
215 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
220 $filenameForClient = $this->filenameForClient($file, $ext);
222 $headers = array('Content-type: '. $MIMES[$ext], // XXX is nginx swallowing this?
223 'Cache-Control: max-age=15552000',
224 'Content-disposition: inline; filename=' . $filenameForClient);
228 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
229 // $$$ automated reporting
230 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
232 // Try some content-specific recovery
234 if ($imageInfo['type'] == 'jp2') {
235 $records = $this->getJp2Records($zipPath, $file);
236 if ($powReduce > intval($records['Clevels'])) {
237 $powReduce = $records['Clevels'];
238 $reduce = pow(2, $powReduce);
244 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
245 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
248 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
253 $this->BRfatal('Problem processing image - command failed');
257 if (isset($tempFile)) {
262 function getUnarchiveCommand($archivePath, $file)
264 $lowerPath = strtolower($archivePath);
265 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
266 $suffix = $matches[1];
268 if ($suffix == 'zip') {
270 . escapeshellarg($archivePath)
271 . ' ' . escapeshellarg($file);
272 } else if ($suffix == 'tar') {
273 return ' ( 7z e -so '
274 . escapeshellarg($archivePath)
275 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
277 $this->BRfatal('Incompatible archive format');
281 $this->BRfatal('Bad image stack path');
284 $this->BRfatal('Bad image stack path or archive format');
289 * Returns the image type associated with the file extension.
291 function imageExtensionToType($extension)
294 if (array_key_exists($extension, $this->EXTENSIONS)) {
295 return $this->EXTENSIONS[$extension];
297 $this->BRfatal('Unknown image extension');
302 * Get the image information. The returned associative array fields will
303 * vary depending on the image type. The basic keys are width, height, type
306 function getImageInfo($zipPath, $file)
308 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
311 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
312 $type = imageExtensionToType($fileExt);
316 return getImageInfoFromJp2($zipPath, $file);
319 return getImageInfoFromExif($zipPath, $file);
324 // Get the records of of JP2 as returned by kdu_expand
325 function getJp2Records($zipPath, $file)
328 $cmd = $this->getUnarchiveCommand($zipPath, $file)
329 . ' | ' . $this->kduExpand
330 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
334 foreach ($output as $line) {
335 $elems = explode("=", $line, 2);
336 if (1 == count($elems)) {
337 // delimiter not found
340 $records[$elems[0]] = $elems[1];
347 * Get the image width, height and depth using the EXIF information.
349 function getImageInfoFromExif($zipPath, $file)
352 // We look for all the possible tags of interest then act on the
353 // ones presumed present based on the file type
354 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
355 . ' -BitsPerComponent -ColorSpace' // jp2
356 . ' -BitDepth' // png
357 . ' -BitsPerSample'; // tiff
359 $cmd = $this->getUnarchiveCommand($zipPath, $file)
360 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
364 foreach ($output as $line) {
365 $keyValue = explode(": ", $line);
366 $tags[$keyValue[0]] = $keyValue[1];
369 $width = intval($tags["ImageWidth"]);
370 $height = intval($tags["ImageHeight"]);
371 $type = strtolower($tags["FileType"]);
375 $bits = intval($tags["BitsPerComponent"]);
378 $bits = intval($tags["BitsPerSample"]);
384 $bits = intval($tags["BitDepth"]);
387 $this->BRfatal("Unsupported image type $type for file $file in $zipPath");
392 $retval = Array('width' => $width, 'height' => $height,
393 'bits' => $bits, 'type' => $type);
399 * Output JSON given the imageInfo associative array
401 function outputJSON($imageInfo, $callback)
403 header('Content-type: text/plain');
404 $jsonOutput = json_encode($imageInfo);
406 $jsonOutput = $callback . '(' . $jsonOutput . ');';
411 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
413 switch ($imageType) {
416 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
417 if ($this->decompressToBmp) {
418 // We suppress output since bmptopnm always outputs on stderr
419 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
424 // We need to create a temporary file for tifftopnm since it cannot
425 // work on a pipe (the file must be seekable).
426 // We use the BookReaderTiff prefix to give a hint in case things don't
428 $tempFile = tempnam("/tmp", "BookReaderTiff");
430 // $$$ look at bit depth when reducing
432 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
436 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
440 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
444 $this->BRfatal('Unknown image type: ' . $imageType);
447 return $decompressCmd;
450 // If the command has its initial output on stdout the headers will be emitted followed
451 // by the stdout output. If initial output is on stderr an error message will be
455 // true - if command emits stdout and has zero exit code
456 // false - command has initial output on stderr or non-zero exit code
457 // &$errorMessage - error string if there was an error
459 // $$$ Tested with our command-line image processing. May be deadlocks for
461 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
466 $descriptorspec = array(
467 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
468 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
469 2 => array("pipe", "w"), // stderr is a pipe to write to
475 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
477 if (is_resource($process)) {
478 // $pipes now looks like this:
479 // 0 => writeable handle connected to child stdin
480 // 1 => readable handle connected to child stdout
481 // 2 => readable handle connected to child stderr
487 // check whether we get input first on stdout or stderr
488 $read = array($stdout, $stderr);
491 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
492 if (false === $numChanged) {
494 $errorMessage = 'Select failed';
497 if ($read[0] == $stdout && (1 == $numChanged)) {
498 // Got output first on stdout (only)
499 // $$$ make sure we get all stdout
500 $output = fopen('php://output', 'w');
501 foreach($headers as $header) {
504 stream_copy_to_stream($pipes[1], $output);
505 fclose($output); // okay since tied to special php://output
508 // Got output on stderr
509 // $$$ make sure we get all stderr
510 $errorMessage = stream_get_contents($stderr);
519 // It is important that you close any pipes before calling
520 // proc_close in order to avoid a deadlock
521 $cmdRet = proc_close($process);
524 $errorMessage .= "Command failed with result code " . $cmdRet;
530 function BRfatal($string) {
531 throw new Exception("Image error: $string");
534 // Returns true if using a power node
535 function onPowerNode() {
536 exec("lspci | fgrep -c Realtek", $output, $return);
537 if ("0" != $output[0]) {
540 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
548 function reduceCommand($scale) {
550 if ($this->onPowerNode()) {
551 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
553 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
560 function checkPrivs($filename) {
561 if (!is_readable($filename)) {
562 header('HTTP/1.1 403 Forbidden');
567 // Given file path (inside archive) and output file extension, return a filename
568 // suitable for Content-disposition header
569 function filenameForClient($filePath, $ext) {
570 $pathParts = pathinfo($filePath);
571 if ('jpeg' == $ext) {
574 return $pathParts['filename'] . '.' . $ext;
577 // Returns the nearest power of 2 reduction factor that results in a larger image
578 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
579 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
580 return $this->nearestPow2ForScale($ratio);
583 // Returns nearest power of 2 reduction factor that results in a larger image
584 function nearestPow2ForScale($scale) {
585 $scale = intval($scale);
589 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
590 return strlen($binStr) - 1;