4 Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
6 This file is part of BookReader. The full source code can be found at GitHub:
7 http://github.com/openlibrary/bookreader
9 The canonical short name of an image type is the same as in the MIME type.
10 For example both .jpeg and .jpg are considered to have type "jpeg" since
11 the MIME type is "image/jpeg".
13 BookReader is free software: you can redistribute it and/or modify
14 it under the terms of the GNU Affero General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 BookReader is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU Affero General Public License for more details.
23 You should have received a copy of the GNU Affero General Public License
24 along with BookReader. If not, see <http://www.gnu.org/licenses/>.
27 $MIMES = array('gif' => 'image/gif',
29 'jpg' => 'image/jpeg',
30 'jpeg' => 'image/jpeg',
32 'tif' => 'image/tiff',
33 'tiff' => 'image/tiff');
35 $EXTENSIONS = array('gif' => 'gif',
43 // Paths to command-line tools
44 $exiftool = '/petabox/sw/books/exiftool/exiftool';
45 $kduExpand = '/petabox/sw/bin/kdu_expand';
47 // Process some of the request parameters
48 $zipPath = $_REQUEST['zip'];
49 $file = $_REQUEST['file'];
50 if (isset($_REQUEST['ext'])) {
51 $ext = $_REQUEST['ext'];
56 if (isset($_REQUEST['callback'])) {
57 // validate callback is valid JS identifier (only)
58 $callback = $_REQUEST['callback'];
59 $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
60 if (! preg_match($identifierPatt, $callback)) {
61 BRfatal('Invalid callback');
67 // Make sure the image stack is readable - return 403 if not
73 * Get info about requested image (input)
74 * Get info about requested output format
75 * Determine processing parameters
78 * Clean up temporary files
81 function getUnarchiveCommand($archivePath, $file)
83 $lowerPath = strtolower($archivePath);
84 if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
85 $suffix = $matches[1];
87 if ($suffix == 'zip') {
89 . escapeshellarg($archivePath)
90 . ' ' . escapeshellarg($file);
91 } else if ($suffix == 'tar') {
93 . escapeshellarg($archivePath)
94 . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
96 BRfatal('Incompatible archive format');
100 BRfatal('Bad image stack path');
103 BRfatal('Bad image stack path or archive format');
108 * Returns the image type associated with the file extension.
110 function imageExtensionToType($extension)
114 if (array_key_exists($extension, $EXTENSIONS)) {
115 return $EXTENSIONS[$extension];
117 BRfatal('Unknown image extension');
122 * Get the image information. The returned associative array fields will
123 * vary depending on the image type. The basic keys are width, height, type
126 function getImageInfo($zipPath, $file)
128 return getImageInfoFromExif($zipPath, $file); // this is fast
131 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
132 $type = imageExtensionToType($fileExt);
136 return getImageInfoFromJp2($zipPath, $file);
139 return getImageInfoFromExif($zipPath, $file);
144 // Get the records of of JP2 as returned by kdu_expand
145 function getJp2Records($zipPath, $file)
149 $cmd = getUnarchiveCommand($zipPath, $file)
151 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
155 foreach ($output as $line) {
156 $elems = explode("=", $line, 2);
157 if (1 == count($elems)) {
158 // delimiter not found
161 $records[$elems[0]] = $elems[1];
168 * Get the image width, height and depth using the EXIF information.
170 function getImageInfoFromExif($zipPath, $file)
174 // We look for all the possible tags of interest then act on the
175 // ones presumed present based on the file type
176 $tagsToGet = ' -ImageWidth -ImageHeight -FileType' // all formats
177 . ' -BitsPerComponent -ColorSpace' // jp2
178 . ' -BitDepth' // png
179 . ' -BitsPerSample'; // tiff
181 $cmd = getUnarchiveCommand($zipPath, $file)
182 . ' | '. $exiftool . ' -S -fast' . $tagsToGet . ' -';
186 foreach ($output as $line) {
187 $keyValue = explode(": ", $line);
188 $tags[$keyValue[0]] = $keyValue[1];
191 $width = intval($tags["ImageWidth"]);
192 $height = intval($tags["ImageHeight"]);
193 $type = strtolower($tags["FileType"]);
197 $bits = intval($tags["BitsPerComponent"]);
200 $bits = intval($tags["BitsPerSample"]);
206 $bits = intval($tags["BitDepth"]);
209 BRfatal("Unsupported image type");
214 $retval = Array('width' => $width, 'height' => $height,
215 'bits' => $bits, 'type' => $type);
221 * Output JSON given the imageInfo associative array
223 function outputJSON($imageInfo, $callback)
225 header('Content-type: text/plain');
226 $jsonOutput = json_encode($imageInfo);
228 $jsonOutput = $callback . '(' . $jsonOutput . ');';
233 // Get the image size and depth
234 $imageInfo = getImageInfo($zipPath, $file);
236 // Output json if requested
237 if ('json' == $ext) {
238 // $$$ we should determine the output size first based on requested scale
239 outputJSON($imageInfo, $callback);
243 // Unfortunately kakadu requires us to know a priori if the
244 // output file should be .ppm or .pgm. By decompressing to
245 // .bmp kakadu will write a file we can consistently turn into
246 // .pnm. Really kakadu should support .pnm as the file output
247 // extension and automatically write ppm or pgm format as
249 $decompressToBmp = true;
250 if ($decompressToBmp) {
251 $stdoutLink = '/tmp/stdout.bmp';
253 $stdoutLink = '/tmp/stdout.ppm';
256 $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
258 // Rotate is currently only supported for jp2 since it does not add server load
259 $allowedRotations = array("0", "90", "180", "270");
260 $rotate = $_REQUEST['rotate'];
261 if ( !in_array($rotate, $allowedRotations) ) {
265 // Image conversion options
267 $jpegOptions = '-quality 75';
269 // The pbmreduce reduction factor produces an image with dimension 1/n
270 // The kakadu reduction factor produceds an image with dimension 1/(2^n)
271 // $$$ handle continuous values for scale
272 if (isset($_REQUEST['height'])) {
273 $ratio = floatval($_REQUEST['origHeight']) / floatval($_REQUEST['height']);
277 } else if ($ratio <= 4) {
281 //$powReduce = 3; //too blurry!
287 // $$$ could be cleaner
288 // Provide next smaller power of two reduction
289 $scale = intval($_REQUEST['scale']);
292 } else if (2 > $scale) {
294 } else if (4 > $scale) {
296 } else if (8 > $scale) {
298 } else if (16 > $scale) {
300 } else if (32 > $scale) {
302 } else if (64 > $scale) {
305 // $$$ Leaving this in as default though I'm not sure why it is...
308 $scale = pow(2, $powReduce);
311 // Override depending on source image format
312 // $$$ consider doing a 302 here instead, to make better use of the browser cache
313 // Limit scaling for 1-bit images. See https://bugs.edge.launchpad.net/bookreader/+bug/486011
314 if (1 == $imageInfo['bits']) {
319 // Hard limit so there are some black pixels to use!
327 if (!file_exists($stdoutLink))
329 system('ln -s /dev/stdout ' . $stdoutLink);
333 putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
335 $unzipCmd = getUnarchiveCommand($zipPath, $file);
337 function getDecompressCmd($imageType) {
339 global $powReduce, $rotate, $scale; // $$$ clean up
340 global $decompressToBmp; // $$$ TODO remove now that we have bit depth info
343 switch ($imageType) {
346 " | " . $kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
347 if ($decompressToBmp) {
348 // We suppress output since bmptopnm always outputs on stderr
349 $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
354 // We need to create a temporary file for tifftopnm since it cannot
355 // work on a pipe (the file must be seekable).
356 // We use the BookReaderTiff prefix to give a hint in case things don't
358 $tempFile = tempnam("/tmp", "BookReaderTiff");
360 // $$$ look at bit depth when reducing
362 ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . reduceCommand($scale);
366 $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . reduceCommand($scale);
370 $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . reduceCommand($scale);
374 BRfatal('Unknown image type: ' . $imageType);
377 return $decompressCmd;
380 $decompressCmd = getDecompressCmd($imageInfo['type']);
382 // Non-integer scaling is currently disabled on the cluster
383 // if (isset($_REQUEST['height'])) {
384 // $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
389 $compressCmd = ' | pnmtopng ' . $pngOptions;
395 $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
396 $ext = 'jpeg'; // for matching below
401 if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
402 // Just pass through original data if same format and size
405 $cmd = $unzipCmd . $decompressCmd . $compressCmd;
410 // If the command has its initial output on stdout the headers will be emitted followed
411 // by the stdout output. If initial output is on stderr an error message will be
415 // true - if command emits stdout and has zero exit code
416 // false - command has initial output on stderr or non-zero exit code
417 // &$errorMessage - error string if there was an error
419 // $$$ Tested with our command-line image processing. May be deadlocks for
421 function passthruIfSuccessful($headers, $cmd, &$errorMessage)
426 $descriptorspec = array(
427 0 => array("pipe", "r"), // stdin is a pipe that the child will read from
428 1 => array("pipe", "w"), // stdout is a pipe that the child will write to
429 2 => array("pipe", "w"), // stderr is a pipe to write to
435 $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
437 if (is_resource($process)) {
438 // $pipes now looks like this:
439 // 0 => writeable handle connected to child stdin
440 // 1 => readable handle connected to child stdout
441 // 2 => readable handle connected to child stderr
447 // check whether we get input first on stdout or stderr
448 $read = array($stdout, $stderr);
451 $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
452 if (false === $numChanged) {
454 $errorMessage = 'Select failed';
457 if ($read[0] == $stdout && (1 == $numChanged)) {
458 // Got output first on stdout (only)
459 // $$$ make sure we get all stdout
460 $output = fopen('php://output', 'w');
461 foreach($headers as $header) {
464 stream_copy_to_stream($pipes[1], $output);
465 fclose($output); // okay since tied to special php://output
468 // Got output on stderr
469 // $$$ make sure we get all stderr
470 $errorMessage = stream_get_contents($stderr);
479 // It is important that you close any pipes before calling
480 // proc_close in order to avoid a deadlock
481 $cmdRet = proc_close($process);
484 $errorMessage .= "Command failed with result code " . $cmdRet;
490 $headers = array('Content-type: '. $MIMES[$ext],
491 'Cache-Control: max-age=15552000');
494 if (! passthruIfSuccessful($headers, $cmd, $errorMessage)) {
495 // $$$ automated reporting
496 trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
498 // Try some content-specific recovery
500 if ($imageInfo['type'] == 'jp2') {
501 $records = getJp2Records($zipPath, $file);
502 if ($powReduce > intval($records['Clevels'])) {
503 $powReduce = $records['Clevels'];
504 $reduce = pow(2, $powReduce);
510 $cmd = $unzipCmd . getDecompressCmd($imageInfo['type']) . $compressCmd;
511 if (passthruIfSuccessful($headers, $cmd, $errorMessage)) {
514 trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
519 BRfatal('Problem processing image - command failed');
523 // passthru ($cmd); # cmd returns image data
525 if (isset($tempFile)) {
529 function BRFatal($string) {
530 echo "alert('$string');\n";
534 // Returns true if using a power node
535 function onPowerNode() {
536 exec("lspci | fgrep -c Realtek", $output, $return);
537 if ("0" != $output[0]) {
540 exec("egrep -q AMD /proc/cpuinfo", $output, $return);
548 function reduceCommand($scale) {
551 return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
553 return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
560 function checkPrivs($filename) {
561 if (!is_readable($filename)) {
562 header('HTTP/1.1 403 Forbidden');