4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 class BookReaderImages
29 public $MIMES = array('gif' => 'image/gif',
31 'jpg' => 'image/jpeg',
32 'jpeg' => 'image/jpeg',
34 'tif' => 'image/tiff',
35 'tiff' => 'image/tiff');
37 public $EXTENSIONS = array('gif' => 'gif',
45 // Width when generating thumbnails
46 public $imageSizes = array(
53 // Paths to command-line tools
54 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
55 var $kduExpand = '/petabox/sw/bin/kdu_expand';
60 * Get info about requested image (input)
61 * Get info about requested output format
62 * Determine processing parameters
65 * Clean up temporary files
68 function serveRequest($requestEnv) {
69 // Process some of the request parameters
70 $zipPath = $requestEnv['zip'];
71 $file = $requestEnv['file'];
73 $ext = $requestEnv['ext'];
78 if (isset($requestEnv['callback'])) {
79 // validate callback is valid JS identifier (only)
80 $callback = $requestEnv['callback'];
81 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
82 if (! preg_match($identifierPatt, $callback)) {
83 $this->BRfatal('Invalid callback');
89 if ( !file_exists($zipPath) ) {
90 $this->BRfatal('Image stack does not exist');
92 // Make sure the image stack is readable - return 403 if not
93 $this->checkPrivs($zipPath);
96 // Get the image size and depth
97 $imageInfo = $this->getImageInfo($zipPath, $file);
99 // Output json if requested
100 if ('json' == $ext) {
101 // $$$ we should determine the output size first based on requested scale
102 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
106 // Unfortunately kakadu requires us to know a priori if the
107 // output file should be .ppm or .pgm. By decompressing to
108 // .bmp kakadu will write a file we can consistently turn into
109 // .pnm. Really kakadu should support .pnm as the file output
110 // extension and automatically write ppm or pgm format as
112 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
113 if ($this->decompressToBmp) {
114 $stdoutLink = '/tmp/stdout.bmp';
116 $stdoutLink = '/tmp/stdout.ppm';
119 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
121 // Rotate is currently only supported for jp2 since it does not add server load
122 $allowedRotations = array("0", "90", "180", "270");
123 $rotate = $requestEnv['rotate'];
124 if ( !in_array($rotate, $allowedRotations) ) {
128 // Image conversion options
130 $jpegOptions = '-quality 75';
132 // The pbmreduce reduction factor produces an image with dimension 1/n
133 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
134 if (isset($requestEnv['height'])) {
135 $powReduce = $this->nearestPow2Reduce($requestEnv['height'], $imageInfo['height']);
136 $scale = pow(2, $powReduce);
137 } else if (isset($requestEnv['width'])) {
138 $powReduce = $this->nearestPow2Reduce($requestEnv['width'], $imageInfo['width']);
139 $scale = pow(2, $powReduce);
142 // $$$ could be cleaner
143 // Provide next smaller power of two reduction
144 $scale = $requestEnv['scale'];
148 if (array_key_exists($scale, $this->imageSizes)) {
149 $srcRatio = floatval($imageInfo['width']) / floatval($imageInfo['height']);
152 $dimension = 'width';
154 $dimension = 'height';
156 $powReduce = $this->nearestPow2Reduce($this->imageSizes[$scale], $imageInfo[$dimension]);
158 $powReduce = $this->nearestPow2ForScale($scale);
160 $scale = pow(2, $powReduce);
163 // Override depending on source image format
164 // $$$ consider doing a 302 here instead, to make better use of the browser cache
165 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
166 if (1 == $imageInfo['bits']) {
171 // Hard limit so there are some black pixels to use!
179 if (!file_exists($stdoutLink))
181 system('ln -s /dev/stdout ' . $stdoutLink);
184 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
186 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
188 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
190 // Non-integer scaling is currently disabled on the cluster
191 // if (isset($_REQUEST['height'])) {
192 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
197 $compressCmd = ' | pnmtopng ' . $pngOptions;
203 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
204 $ext = 'jpeg'; // for matching below
209 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
210 // Just pass through original data if same format and size
213 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
218 $filenameForClient = $this->filenameForClient($file, $ext);
220 $headers = array('Content-type: '. $MIMES[$ext], // XXX is nginx swallowing this?
221 'Cache-Control: max-age=15552000',
222 'Content-disposition: inline; filename=' . $filenameForClient);
226 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
227 // $$$ automated reporting
228 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
230 // Try some content-specific recovery
232 if ($imageInfo['type'] == 'jp2') {
233 $records = $this->getJp2Records($zipPath, $file);
234 if ($powReduce > intval($records['Clevels'])) {
235 $powReduce = $records['Clevels'];
236 $reduce = pow(2, $powReduce);
242 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
243 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
246 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
251 $this->BRfatal('Problem processing image - command failed');
255 if (isset($tempFile)) {
260 function getUnarchiveCommand($archivePath, $file)
262 $lowerPath = strtolower($archivePath);
263 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
264 $suffix = $matches[1];
266 if ($suffix == 'zip') {
268 . escapeshellarg($archivePath)
269 . ' ' . escapeshellarg($file);
270 } else if ($suffix == 'tar') {
271 return ' ( 7z e -so '
272 . escapeshellarg($archivePath)
273 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
275 $this->BRfatal('Incompatible archive format');
279 $this->BRfatal('Bad image stack path');
282 $this->BRfatal('Bad image stack path or archive format');
287 * Returns the image type associated with the file extension.
289 function imageExtensionToType($extension)
292 if (array_key_exists($extension, $this->EXTENSIONS)) {
293 return $this->EXTENSIONS[$extension];
295 $this->BRfatal('Unknown image extension');
300 * Get the image information. The returned associative array fields will
301 * vary depending on the image type. The basic keys are width, height, type
304 function getImageInfo($zipPath, $file)
306 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
309 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
310 $type = imageExtensionToType($fileExt);
314 return getImageInfoFromJp2($zipPath, $file);
317 return getImageInfoFromExif($zipPath, $file);
322 // Get the records of of JP2 as returned by kdu_expand
323 function getJp2Records($zipPath, $file)
326 $cmd = $this->getUnarchiveCommand($zipPath, $file)
327 . ' | ' . $this->kduExpand
328 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
332 foreach ($output as $line) {
333 $elems = explode("=", $line, 2);
334 if (1 == count($elems)) {
335 // delimiter not found
338 $records[$elems[0]] = $elems[1];
345 * Get the image width, height and depth using the EXIF information.
347 function getImageInfoFromExif($zipPath, $file)
350 // We look for all the possible tags of interest then act on the
351 // ones presumed present based on the file type
352 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
353 . ' -BitsPerComponent -ColorSpace' // jp2
354 . ' -BitDepth' // png
355 . ' -BitsPerSample'; // tiff
357 $cmd = $this->getUnarchiveCommand($zipPath, $file)
358 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
362 foreach ($output as $line) {
363 $keyValue = explode(": ", $line);
364 $tags[$keyValue[0]] = $keyValue[1];
367 $width = intval($tags["ImageWidth"]);
368 $height = intval($tags["ImageHeight"]);
369 $type = strtolower($tags["FileType"]);
373 $bits = intval($tags["BitsPerComponent"]);
376 $bits = intval($tags["BitsPerSample"]);
382 $bits = intval($tags["BitDepth"]);
385 $this->BRfatal("Unsupported image type");
390 $retval = Array('width' => $width, 'height' => $height,
391 'bits' => $bits, 'type' => $type);
397 * Output JSON given the imageInfo associative array
399 function outputJSON($imageInfo, $callback)
401 header('Content-type: text/plain');
402 $jsonOutput = json_encode($imageInfo);
404 $jsonOutput = $callback . '(' . $jsonOutput . ');';
409 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
411 switch ($imageType) {
414 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
415 if ($this->decompressToBmp) {
416 // We suppress output since bmptopnm always outputs on stderr
417 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
422 // We need to create a temporary file for tifftopnm since it cannot
423 // work on a pipe (the file must be seekable).
424 // We use the BookReaderTiff prefix to give a hint in case things don't
426 $tempFile = tempnam("/tmp", "BookReaderTiff");
428 // $$$ look at bit depth when reducing
430 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
434 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
438 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
442 $this->BRfatal('Unknown image type: ' . $imageType);
445 return $decompressCmd;
448 // If the command has its initial output on stdout the headers will be emitted followed
449 // by the stdout output. If initial output is on stderr an error message will be
453 // true - if command emits stdout and has zero exit code
454 // false - command has initial output on stderr or non-zero exit code
455 // &$errorMessage - error string if there was an error
457 // $$$ Tested with our command-line image processing. May be deadlocks for
459 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
464 $descriptorspec = array(
465 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
466 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
467 2 => array("pipe", "w"), // stderr is a pipe to write to
473 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
475 if (is_resource($process)) {
476 // $pipes now looks like this:
477 // 0 => writeable handle connected to child stdin
478 // 1 => readable handle connected to child stdout
479 // 2 => readable handle connected to child stderr
485 // check whether we get input first on stdout or stderr
486 $read = array($stdout, $stderr);
489 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
490 if (false === $numChanged) {
492 $errorMessage = 'Select failed';
495 if ($read[0] == $stdout && (1 == $numChanged)) {
496 // Got output first on stdout (only)
497 // $$$ make sure we get all stdout
498 $output = fopen('php://output', 'w');
499 foreach($headers as $header) {
502 stream_copy_to_stream($pipes[1], $output);
503 fclose($output); // okay since tied to special php://output
506 // Got output on stderr
507 // $$$ make sure we get all stderr
508 $errorMessage = stream_get_contents($stderr);
517 // It is important that you close any pipes before calling
518 // proc_close in order to avoid a deadlock
519 $cmdRet = proc_close($process);
522 $errorMessage .= "Command failed with result code " . $cmdRet;
528 function BRfatal($string) {
529 throw new Exception("Image error: $string");
530 //echo "alert('$string');\n";
534 // Returns true if using a power node
535 function onPowerNode() {
536 exec("lspci | fgrep -c Realtek", $output, $return);
537 if ("0" != $output[0]) {
540 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
548 function reduceCommand($scale) {
550 if ($this->onPowerNode()) {
551 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
553 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
560 function checkPrivs($filename) {
561 if (!is_readable($filename)) {
562 header('HTTP/1.1 403 Forbidden');
567 // Given file path (inside archive) and output file extension, return a filename
568 // suitable for Content-disposition header
569 function filenameForClient($filePath, $ext) {
570 $pathParts = pathinfo($filePath);
571 if ('jpeg' == $ext) {
574 return $pathParts['filename'] . '.' . $ext;
577 // Returns the nearest power of 2 reduction factor that results in a larger image
578 function nearestPow2Reduce($desiredDimension, $sourceDimension) {
579 $ratio = floatval($sourceDimension) / floatval($desiredDimension);
580 return $this->nearestPow2ForScale($ratio);
583 // Returns nearest power of 2 reduction factor that results in a larger image
584 function nearestPow2ForScale($scale) {
585 $scale = intval($scale);
589 $binStr = decbin($scale); // convert to binary string. e.g. 5 -> '101'
590 return strlen($binStr) - 1;