4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 class BookReaderImages
29 public $MIMES = array('gif' => 'image/gif',
31 'jpg' => 'image/jpeg',
32 'jpeg' => 'image/jpeg',
34 'tif' => 'image/tiff',
35 'tiff' => 'image/tiff');
37 public $EXTENSIONS = array('gif' => 'gif',
45 // Paths to command-line tools
46 var $exiftool = '/petabox/sw/books/exiftool/exiftool';
47 var $kduExpand = '/petabox/sw/bin/kdu_expand';
52 * Get info about requested image (input)
53 * Get info about requested output format
54 * Determine processing parameters
57 * Clean up temporary files
60 function serveRequest($requestEnv) {
61 // Process some of the request parameters
62 $zipPath = $requestEnv['zip'];
63 $file = $requestEnv['file'];
65 $ext = $requestEnv['ext'];
70 if (isset($requestEnv['callback'])) {
71 // validate callback is valid JS identifier (only)
72 $callback = $requestEnv['callback'];
73 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
74 if (! preg_match($identifierPatt, $callback)) {
75 $this->BRfatal('Invalid callback');
81 if ( !file_exists($zipPath) ) {
82 $this->BRfatal('Image stack does not exist');
84 // Make sure the image stack is readable - return 403 if not
85 $this->checkPrivs($zipPath);
88 // Get the image size and depth
89 $imageInfo = $this->getImageInfo($zipPath, $file);
91 // Output json if requested
93 // $$$ we should determine the output size first based on requested scale
94 $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
98 // Unfortunately kakadu requires us to know a priori if the
99 // output file should be .ppm or .pgm. By decompressing to
100 // .bmp kakadu will write a file we can consistently turn into
101 // .pnm. Really kakadu should support .pnm as the file output
102 // extension and automatically write ppm or pgm format as
104 $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
105 if ($this->decompressToBmp) {
106 $stdoutLink = '/tmp/stdout.bmp';
108 $stdoutLink = '/tmp/stdout.ppm';
111 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
113 // Rotate is currently only supported for jp2 since it does not add server load
114 $allowedRotations = array("0", "90", "180", "270");
115 $rotate = $requestEnv['rotate'];
116 if ( !in_array($rotate, $allowedRotations) ) {
120 // Image conversion options
122 $jpegOptions = '-quality 75';
124 // The pbmreduce reduction factor produces an image with dimension 1/n
125 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
126 // $$$ handle continuous values for scale
127 if (isset($requestEnv['height'])) {
128 $ratio = floatval($requestEnv['origHeight']) / floatval($requestEnv['height']);
132 } else if ($ratio <= 4) {
136 //$powReduce = 3; //too blurry!
142 // $$$ could be cleaner
143 // Provide next smaller power of two reduction
144 $scale = intval($requestEnv['scale']);
147 } else if (2 > $scale) {
149 } else if (4 > $scale) {
151 } else if (8 > $scale) {
153 } else if (16 > $scale) {
155 } else if (32 > $scale) {
157 } else if (64 > $scale) {
160 // $$$ Leaving this in as default though I'm not sure why it is...
163 $scale = pow(2, $powReduce);
166 // Override depending on source image format
167 // $$$ consider doing a 302 here instead, to make better use of the browser cache
168 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
169 if (1 == $imageInfo['bits']) {
174 // Hard limit so there are some black pixels to use!
182 if (!file_exists($stdoutLink))
184 system('ln -s /dev/stdout ' . $stdoutLink);
188 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
190 $unzipCmd = $this->getUnarchiveCommand($zipPath, $file);
192 $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
194 // Non-integer scaling is currently disabled on the cluster
195 // if (isset($_REQUEST['height'])) {
196 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
201 $compressCmd = ' | pnmtopng ' . $pngOptions;
207 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
208 $ext = 'jpeg'; // for matching below
213 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
214 // Just pass through original data if same format and size
217 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
222 $filenameForClient = $this->filenameForClient($file, $ext);
224 $headers = array('Content-type: '. $MIMES[$ext], // XXX is nginx swallowing this?
225 'Cache-Control: max-age=15552000',
226 'Content-disposition: inline; filename=' . $filenameForClient);
230 if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
231 // $$$ automated reporting
232 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
234 // Try some content-specific recovery
236 if ($imageInfo['type'] == 'jp2') {
237 $records = $this->getJp2Records($zipPath, $file);
238 if ($powReduce > intval($records['Clevels'])) {
239 $powReduce = $records['Clevels'];
240 $reduce = pow(2, $powReduce);
246 $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
247 if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
250 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
255 $this->BRfatal('Problem processing image - command failed');
259 if (isset($tempFile)) {
264 function getUnarchiveCommand($archivePath, $file)
266 $lowerPath = strtolower($archivePath);
267 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
268 $suffix = $matches[1];
270 if ($suffix == 'zip') {
272 . escapeshellarg($archivePath)
273 . ' ' . escapeshellarg($file);
274 } else if ($suffix == 'tar') {
275 return ' ( 7z e -so '
276 . escapeshellarg($archivePath)
277 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
279 $this->BRfatal('Incompatible archive format');
283 $this->BRfatal('Bad image stack path');
286 $this->BRfatal('Bad image stack path or archive format');
291 * Returns the image type associated with the file extension.
293 function imageExtensionToType($extension)
296 if (array_key_exists($extension, $this->EXTENSIONS)) {
297 return $this->EXTENSIONS[$extension];
299 $this->BRfatal('Unknown image extension');
304 * Get the image information. The returned associative array fields will
305 * vary depending on the image type. The basic keys are width, height, type
308 function getImageInfo($zipPath, $file)
310 return $this->getImageInfoFromExif($zipPath, $file); // this is fast
313 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
314 $type = imageExtensionToType($fileExt);
318 return getImageInfoFromJp2($zipPath, $file);
321 return getImageInfoFromExif($zipPath, $file);
326 // Get the records of of JP2 as returned by kdu_expand
327 function getJp2Records($zipPath, $file)
330 $cmd = $this->getUnarchiveCommand($zipPath, $file)
331 . ' | ' . $this->kduExpand
332 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
336 foreach ($output as $line) {
337 $elems = explode("=", $line, 2);
338 if (1 == count($elems)) {
339 // delimiter not found
342 $records[$elems[0]] = $elems[1];
349 * Get the image width, height and depth using the EXIF information.
351 function getImageInfoFromExif($zipPath, $file)
354 // We look for all the possible tags of interest then act on the
355 // ones presumed present based on the file type
356 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
357 . ' -BitsPerComponent -ColorSpace' // jp2
358 . ' -BitDepth' // png
359 . ' -BitsPerSample'; // tiff
361 $cmd = $this->getUnarchiveCommand($zipPath, $file)
362 . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
366 foreach ($output as $line) {
367 $keyValue = explode(": ", $line);
368 $tags[$keyValue[0]] = $keyValue[1];
371 $width = intval($tags["ImageWidth"]);
372 $height = intval($tags["ImageHeight"]);
373 $type = strtolower($tags["FileType"]);
377 $bits = intval($tags["BitsPerComponent"]);
380 $bits = intval($tags["BitsPerSample"]);
386 $bits = intval($tags["BitDepth"]);
389 $this->BRfatal("Unsupported image type");
394 $retval = Array('width' => $width, 'height' => $height,
395 'bits' => $bits, 'type' => $type);
401 * Output JSON given the imageInfo associative array
403 function outputJSON($imageInfo, $callback)
405 header('Content-type: text/plain');
406 $jsonOutput = json_encode($imageInfo);
408 $jsonOutput = $callback . '(' . $jsonOutput . ');';
413 function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
415 switch ($imageType) {
418 " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
419 if ($this->decompressToBmp) {
420 // We suppress output since bmptopnm always outputs on stderr
421 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
426 // We need to create a temporary file for tifftopnm since it cannot
427 // work on a pipe (the file must be seekable).
428 // We use the BookReaderTiff prefix to give a hint in case things don't
430 $tempFile = tempnam("/tmp", "BookReaderTiff");
432 // $$$ look at bit depth when reducing
434 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
438 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
442 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
446 $this->BRfatal('Unknown image type: ' . $imageType);
449 return $decompressCmd;
452 // If the command has its initial output on stdout the headers will be emitted followed
453 // by the stdout output. If initial output is on stderr an error message will be
457 // true - if command emits stdout and has zero exit code
458 // false - command has initial output on stderr or non-zero exit code
459 // &$errorMessage - error string if there was an error
461 // $$$ Tested with our command-line image processing. May be deadlocks for
463 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
468 $descriptorspec = array(
469 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
470 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
471 2 => array("pipe", "w"), // stderr is a pipe to write to
477 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
479 if (is_resource($process)) {
480 // $pipes now looks like this:
481 // 0 => writeable handle connected to child stdin
482 // 1 => readable handle connected to child stdout
483 // 2 => readable handle connected to child stderr
489 // check whether we get input first on stdout or stderr
490 $read = array($stdout, $stderr);
493 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
494 if (false === $numChanged) {
496 $errorMessage = 'Select failed';
499 if ($read[0] == $stdout && (1 == $numChanged)) {
500 // Got output first on stdout (only)
501 // $$$ make sure we get all stdout
502 $output = fopen('php://output', 'w');
503 foreach($headers as $header) {
506 stream_copy_to_stream($pipes[1], $output);
507 fclose($output); // okay since tied to special php://output
510 // Got output on stderr
511 // $$$ make sure we get all stderr
512 $errorMessage = stream_get_contents($stderr);
521 // It is important that you close any pipes before calling
522 // proc_close in order to avoid a deadlock
523 $cmdRet = proc_close($process);
526 $errorMessage .= "Command failed with result code " . $cmdRet;
532 function BRfatal($string) {
533 echo "alert('$string');\n";
537 // Returns true if using a power node
538 function onPowerNode() {
539 exec("lspci | fgrep -c Realtek", $output, $return);
540 if ("0" != $output[0]) {
543 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
551 function reduceCommand($scale) {
553 if ($this->onPowerNode()) {
554 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
556 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
563 function checkPrivs($filename) {
564 if (!is_readable($filename)) {
565 header('HTTP/1.1 403 Forbidden');
570 // Given file path (inside archive) and output file extension, return a filename
571 // suitable for Content-disposition header
572 function filenameForClient($filePath, $ext) {
573 $pathParts = pathinfo($filePath);
574 if ('jpeg' == $ext) {
577 return $pathParts['filename'] . '.' . $ext;