4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 $MIMES = array('gif' => 'image/gif',
29 'jpg' => 'image/jpeg',
30 'jpeg' => 'image/jpeg',
32 'tif' => 'image/tiff',
33 'tiff' => 'image/tiff');
35 $EXTENSIONS = array('gif' => 'gif',
43 // Paths to command-line tools
44 $exiftool = '/petabox/sw/books/exiftool/exiftool';
45 $kduExpand = '/petabox/sw/bin/kdu_expand';
47 // Process some of the request parameters
48 $zipPath = $_REQUEST['zip'];
49 $file = $_REQUEST['file'];
50 if (isset($_REQUEST['ext'])) {
51 $ext = $_REQUEST['ext'];
56 if (isset($_REQUEST['callback'])) {
57 // validate callback is valid JS identifier (only)
58 $callback = $_REQUEST['callback'];
59 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
60 if (! preg_match($identifierPatt, $callback)) {
61 BRfatal('Invalid callback');
70 * Get info about requested image (input)
71 * Get info about requested output format
72 * Determine processing parameters
75 * Clean up temporary files
78 function getUnarchiveCommand($archivePath, $file)
80 $lowerPath = strtolower($archivePath);
81 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
82 $suffix = $matches[1];
84 if ($suffix == 'zip') {
86 . escapeshellarg($archivePath)
87 . ' ' . escapeshellarg($file);
88 } else if ($suffix == 'tar') {
90 . escapeshellarg($archivePath)
91 . ' ' . escapeshellarg($file);
93 BRfatal('Incompatible archive format');
97 BRfatal('Bad image stack path');
100 BRfatal('Bad image stack path or archive format');
105 * Returns the image type associated with the file extension.
107 function imageExtensionToType($extension)
111 if (array_key_exists($extension, $EXTENSIONS)) {
112 return $EXTENSIONS[$extension];
114 BRfatal('Unknown image extension');
119 * Get the image information. The returned associative array fields will
120 * vary depending on the image type. The basic keys are width, height, type
123 function getImageInfo($zipPath, $file)
125 return getImageInfoFromExif($zipPath, $file); // this is fast
128 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
129 $type = imageExtensionToType($fileExt);
133 return getImageInfoFromJp2($zipPath, $file);
136 return getImageInfoFromExif($zipPath, $file);
141 // Get the records of of JP2 as returned by kdu_expand
142 function getJp2Records($zipPath, $file)
146 $cmd = getUnarchiveCommand($zipPath, $file)
148 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
152 foreach ($output as $line) {
153 $elems = explode("=", $line, 2);
154 if (1 == count($elems)) {
155 // delimiter not found
158 $records[$elems[0]] = $elems[1];
165 * Get the image width, height and depth using the EXIF information.
167 function getImageInfoFromExif($zipPath, $file)
171 // We look for all the possible tags of interest then act on the
172 // ones presumed present based on the file type
173 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
174 . ' -BitsPerComponent -ColorSpace' // jp2
175 . ' -BitDepth' // png
176 . ' -BitsPerSample'; // tiff
178 $cmd = getUnarchiveCommand($zipPath, $file)
179 . ' | '. $exiftool . ' -S -fast' . $tagsToGet . ' -';
183 foreach ($output as $line) {
184 $keyValue = explode(": ", $line);
185 $tags[$keyValue[0]] = $keyValue[1];
188 $width = intval($tags["ImageWidth"]);
189 $height = intval($tags["ImageHeight"]);
190 $type = strtolower($tags["FileType"]);
194 $bits = intval($tags["BitsPerComponent"]);
197 $bits = intval($tags["BitsPerSample"]);
203 $bits = intval($tags["BitDepth"]);
206 BRfatal("Unsupported image type");
211 $retval = Array('width' => $width, 'height' => $height,
212 'bits' => $bits, 'type' => $type);
218 * Output JSON given the imageInfo associative array
220 function outputJSON($imageInfo, $callback)
222 header('Content-type: text/plain');
223 $jsonOutput = json_encode($imageInfo);
225 $jsonOutput = $callback . '(' . $jsonOutput . ');';
230 // Get the image size and depth
231 $imageInfo = getImageInfo($zipPath, $file);
233 // Output json if requested
234 if ('json' == $ext) {
235 // $$$ we should determine the output size first based on requested scale
236 outputJSON($imageInfo, $callback);
240 // Unfortunately kakadu requires us to know a priori if the
241 // output file should be .ppm or .pgm. By decompressing to
242 // .bmp kakadu will write a file we can consistently turn into
243 // .pnm. Really kakadu should support .pnm as the file output
244 // extension and automatically write ppm or pgm format as
246 $decompressToBmp = true;
247 if ($decompressToBmp) {
248 $stdoutLink = '/tmp/stdout.bmp';
250 $stdoutLink = '/tmp/stdout.ppm';
253 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
255 // Rotate is currently only supported for jp2 since it does not add server load
256 $allowedRotations = array("0", "90", "180", "270");
257 $rotate = $_REQUEST['rotate'];
258 if ( !in_array($rotate, $allowedRotations) ) {
262 // Image conversion options
264 $jpegOptions = '-quality 75';
266 // The pbmreduce reduction factor produces an image with dimension 1/n
267 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
268 // $$$ handle continuous values for scale
269 if (isset($_REQUEST['height'])) {
270 $ratio = floatval($_REQUEST['origHeight']) / floatval($_REQUEST['height']);
274 } else if ($ratio <= 4) {
278 //$powReduce = 3; //too blurry!
284 // $$$ could be cleaner
285 $scale = intval($_REQUEST['scale']);
289 } else if (2 > $scale) {
292 } else if (4 > $scale) {
295 } else if (8 > $scale) {
298 } else if (16 > $scale) {
301 } else if (32 > $scale) {
304 } else if (64 > $scale) {
308 // $$$ Leaving this in as default though I'm not sure why it is...
314 // Override depending on source image format
315 // $$$ consider doing a 302 here instead, to make better use of the browser cache
316 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
317 if (1 == $imageInfo['bits']) {
322 // Hard limit so there are some black pixels to use!
330 if (!file_exists($stdoutLink))
332 system('ln -s /dev/stdout ' . $stdoutLink);
336 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
338 $unzipCmd = getUnarchiveCommand($zipPath, $file);
340 function getDecompressCmd($imageType) {
342 global $powReduce, $rotate, $scale; // $$$ clean up
343 global $decompressToBmp; // $$$ TODO remove now that we have bit depth info
346 switch ($imageType) {
349 " | " . $kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
350 if ($decompressToBmp) {
351 // We suppress output since bmptopnm always outputs on stderr
352 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
357 // We need to create a temporary file for tifftopnm since it cannot
358 // work on a pipe (the file must be seekable).
359 // We use the BookReaderTiff prefix to give a hint in case things don't
361 $tempFile = tempnam("/tmp", "BookReaderTiff");
363 // $$$ look at bit depth when reducing
365 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . reduceCommand($scale);
369 $decompressCmd = ' | jpegtopnm ' . reduceCommand($scale);
373 $decompressCmd = ' | pngtopnm ' . reduceCommand($scale);
377 BRfatal('Unknown image type: ' . $imageType);
380 return $decompressCmd;
383 $decompressCmd = getDecompressCmd($imageInfo['type']);
385 // Non-integer scaling is currently disabled on the cluster
386 // if (isset($_REQUEST['height'])) {
387 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
392 $compressCmd = ' | pnmtopng ' . $pngOptions;
398 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
399 $ext = 'jpeg'; // for matching below
404 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
405 // Just pass through original data if same format and size
408 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
413 // If the command has its initial output on stdout the headers will be emitted followed
414 // by the stdout output. If initial output is on stderr an error message will be
418 // true - if command emits stdout and has zero exit code
419 // false - command has initial output on stderr or non-zero exit code
420 // &$errorMessage - error string if there was an error
422 // $$$ Tested with our command-line image processing. May be deadlocks for
424 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
429 $descriptorspec = array(
430 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
431 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
432 2 => array("pipe", "w"), // stderr is a pipe to write to
438 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
440 if (is_resource($process)) {
441 // $pipes now looks like this:
442 // 0 => writeable handle connected to child stdin
443 // 1 => readable handle connected to child stdout
444 // 2 => readable handle connected to child stderr
450 // check whether we get input first on stdout or stderr
451 $read = array($stdout, $stderr);
454 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
455 if (false === $numChanged) {
457 $errorMessage = 'Select failed';
460 if ($read[0] == $stdout && (1 == $numChanged)) {
461 // Got output first on stdout (only)
462 // $$$ make sure we get all stdout
463 $output = fopen('php://output', 'w');
464 foreach($headers as $header) {
467 stream_copy_to_stream($pipes[1], $output);
468 fclose($output); // okay since tied to special php://output
471 // Got output on stderr
472 // $$$ make sure we get all stderr
473 $errorMessage = stream_get_contents($stderr);
482 // It is important that you close any pipes before calling
483 // proc_close in order to avoid a deadlock
484 $cmdRet = proc_close($process);
487 $errorMessage .= "Command failed with result code " . $cmdRet;
493 $headers = array('Content-type: '. $MIMES[$ext],
494 'Cache-Control: max-age=15552000');
497 if (! passthruIfSuccessful($headers, $cmd, $errorMessage)) {
498 // $$$ automated reporting
499 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
501 // Try some content-specific recovery
503 if ($imageInfo['type'] == 'jp2') {
504 $records = getJp2Records($zipPath, $file);
505 if ($powReduce > intval($records['Clevels'])) {
506 $powReduce = $records['Clevels'];
507 $reduce = pow(2, $powReduce);
513 $cmd = $unzipCmd . getDecompressCmd($imageInfo['type']) . $compressCmd;
514 if (passthruIfSuccessful($headers, $cmd, $errorMessage)) {
517 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
522 BRfatal('Problem processing image - command failed');
526 // passthru ($cmd); # cmd returns image data
528 if (isset($tempFile)) {
532 function BRFatal($string) {
533 echo "alert('$string');\n";
537 // Returns true if using a power node
538 function onPowerNode() {
539 exec("lspci | fgrep -c Realtek", $output, $return);
540 if ("0" != $output[0]) {
543 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
551 function reduceCommand($scale) {
554 return ' | pnmscale -reduce ' . $scale;
556 return ' | pnmscale -nomix -reduce ' . $scale;