Refactor BookReaderImages into class (.inc.php) and web request (.php) files. Add...

author Michael Ang <mang@archive.org>

Tue, 27 Apr 2010 18:54:50 +0000 (18:54 +0000)

committer Michael Ang <mang@archive.org>

Tue, 27 Apr 2010 18:54:50 +0000 (18:54 +0000)
author Michael Ang <mang@archive.org>
Tue, 27 Apr 2010 18:54:50 +0000 (18:54 +0000)
committer Michael Ang <mang@archive.org>
Tue, 27 Apr 2010 18:54:50 +0000 (18:54 +0000)
diff --git a/BookReaderIA/datanode/BookReaderImages.inc.php b/BookReaderIA/datanode/BookReaderImages.inc.php

new file mode 100644 (file)

index 0000000..b324481
--- /dev/null
+++ b/BookReaderIA/datanode/BookReaderImages.inc.php
@@ -0,0 +1,578 @@
+<?php
+
+/*
+Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
+
+This file is part of BookReader.  The full source code can be found at GitHub:
+http://github.com/openlibrary/bookreader
+
+The canonical short name of an image type is the same as in the MIME type.
+For example both .jpeg and .jpg are considered to have type "jpeg" since
+the MIME type is "image/jpeg".
+
+    BookReader is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    BookReader is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class BookReaderImages
+{
+    public $MIMES = array('gif' => 'image/gif',
+                   'jp2' => 'image/jp2',
+                   'jpg' => 'image/jpeg',
+                   'jpeg' => 'image/jpeg',
+                   'png' => 'image/png',
+                   'tif' => 'image/tiff',
+                   'tiff' => 'image/tiff');
+                   
+    public $EXTENSIONS = array('gif' => 'gif',
+                        'jp2' => 'jp2',
+                        'jpeg' => 'jpeg',
+                        'jpg' => 'jpeg',
+                        'png' => 'png',
+                        'tif' => 'tiff',
+                        'tiff' => 'tiff');
+                   
+    // Paths to command-line tools
+    var $exiftool = '/petabox/sw/books/exiftool/exiftool';
+    var $kduExpand = '/petabox/sw/bin/kdu_expand';
+    
+    /*
+     * Approach:
+     * 
+     * Get info about requested image (input)
+     * Get info about requested output format
+     * Determine processing parameters
+     * Process image
+     * Return image data
+     * Clean up temporary files
+     */
+     
+     function serveRequest($requestEnv) {
+        // Process some of the request parameters
+        $zipPath  = $requestEnv['zip'];
+        $file     = $requestEnv['file'];
+        if (! $ext) {
+            $ext = $requestEnv['ext'];
+        } else {
+            // Default to jpg
+            $ext = 'jpeg';
+        }
+        if (isset($requestEnv['callback'])) {
+            // validate callback is valid JS identifier (only)
+            $callback = $requestEnv['callback'];
+            $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
+            if (! preg_match($identifierPatt, $callback)) {
+                $this->BRfatal('Invalid callback');
+            }
+        } else {
+            $callback = null;
+        }
+        
+        // Make sure the image stack is readable - return 403 if not
+        $this->checkPrivs($zipPath);
+        
+        
+        // Get the image size and depth
+        $imageInfo = $this->getImageInfo($zipPath, $file);
+        
+        // Output json if requested
+        if ('json' == $ext) {
+            // $$$ we should determine the output size first based on requested scale
+            $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
+            exit;
+        }
+        
+        // Unfortunately kakadu requires us to know a priori if the
+        // output file should be .ppm or .pgm.  By decompressing to
+        // .bmp kakadu will write a file we can consistently turn into
+        // .pnm.  Really kakadu should support .pnm as the file output
+        // extension and automatically write ppm or pgm format as
+        // appropriate.
+        $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
+        if ($this->decompressToBmp) {
+          $stdoutLink = '/tmp/stdout.bmp';
+        } else {
+          $stdoutLink = '/tmp/stdout.ppm';
+        }
+        
+        $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
+        
+        // Rotate is currently only supported for jp2 since it does not add server load
+        $allowedRotations = array("0", "90", "180", "270");
+        $rotate = $requestEnv['rotate'];
+        if ( !in_array($rotate, $allowedRotations) ) {
+            $rotate = "0";
+        }
+        
+        // Image conversion options
+        $pngOptions = '';
+        $jpegOptions = '-quality 75';
+        
+        // The pbmreduce reduction factor produces an image with dimension 1/n
+        // The kakadu reduction factor produceds an image with dimension 1/(2^n)
+        // $$$ handle continuous values for scale
+        if (isset($requestEnv['height'])) {
+            $ratio = floatval($requestEnv['origHeight']) / floatval($requestEnv['height']);
+            if ($ratio <= 2) {
+                $scale = 2;
+                $powReduce = 1;    
+            } else if ($ratio <= 4) {
+                $scale = 4;
+                $powReduce = 2;
+            } else {
+                //$powReduce = 3; //too blurry!
+                $scale = 2;
+                $powReduce = 1;
+            }
+        
+        } else {
+            // $$$ could be cleaner
+            // Provide next smaller power of two reduction
+            $scale = intval($requestEnv['scale']);
+            if (1 >= $scale) {
+                $powReduce = 0;
+            } else if (2 > $scale) {
+                $powReduce = 0;
+            } else if (4 > $scale) {
+                $powReduce = 1;
+            } else if (8 > $scale) {
+                $powReduce = 2;
+            } else if (16 > $scale) {
+                $powReduce = 3;
+            } else if (32 > $scale) {
+                $powReduce = 4;
+            } else if (64 > $scale) {
+                $powReduce = 5;
+            } else {
+                // $$$ Leaving this in as default though I'm not sure why it is...
+                $powReduce = 3;
+            }
+            $scale = pow(2, $powReduce);
+        }
+        
+        // Override depending on source image format
+        // $$$ consider doing a 302 here instead, to make better use of the browser cache
+        // Limit scaling for 1-bit images.  See https://bugs.edge.launchpad.net/bookreader/+bug/486011
+        if (1 == $imageInfo['bits']) {
+            if ($scale > 1) {
+                $scale /= 2;
+                $powReduce -= 1;
+                
+                // Hard limit so there are some black pixels to use!
+                if ($scale > 4) {
+                    $scale = 4;
+                    $powReduce = 2;
+                }
+            }
+        }
+        
+        if (!file_exists($stdoutLink)) 
+        {  
+          system('ln -s /dev/stdout ' . $stdoutLink);  
+        }
+        
+        
+        putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
+        
+        $unzipCmd  = $this->getUnarchiveCommand($zipPath, $file);
+        
+        $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
+               
+        // Non-integer scaling is currently disabled on the cluster
+        // if (isset($_REQUEST['height'])) {
+        //     $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
+        // }
+        
+        switch ($ext) {
+            case 'png':
+                $compressCmd = ' | pnmtopng ' . $pngOptions;
+                break;
+                
+            case 'jpeg':
+            case 'jpg':
+            default:
+                $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
+                $ext = 'jpeg'; // for matching below
+                break;
+        
+        }
+        
+        if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
+            // Just pass through original data if same format and size
+            $cmd = $unzipCmd;
+        } else {
+            $cmd = $unzipCmd . $decompressCmd . $compressCmd;
+        }
+        
+        // print $cmd;
+        
+        $filenameForClient = $this->filenameForClient($file, $ext);
+        
+        $headers = array('Content-type: '. $MIMES[$ext],
+                         'Cache-Control: max-age=15552000',
+                         'Content-disposition: inline; filename=' . $filenameForClient);
+                          
+        
+        $errorMessage = '';
+        if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
+            // $$$ automated reporting
+            trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
+            
+            // Try some content-specific recovery
+            $recovered = false;    
+            if ($imageInfo['type'] == 'jp2') {
+                $records = $this->getJp2Records($zipPath, $file);
+                if ($powReduce > intval($records['Clevels'])) {
+                    $powReduce = $records['Clevels'];
+                    $reduce = pow(2, $powReduce);
+                } else {
+                    $reduce = 1;
+                    $powReduce = 0;
+                }
+                 
+                $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
+                if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
+                    $recovered = true;
+                } else {
+                    trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
+                }
+            }
+            
+            if (! $recovered) {
+                $this->BRfatal('Problem processing image - command failed');
+            }
+        }
+        
+        if (isset($tempFile)) {
+            unlink($tempFile);
+        }
+    }    
+    
+    function getUnarchiveCommand($archivePath, $file)
+    {
+        $lowerPath = strtolower($archivePath);
+        if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
+            $suffix = $matches[1];
+            
+            if ($suffix == 'zip') {
+                return 'unzip -p '
+                    . escapeshellarg($archivePath)
+                    . ' ' . escapeshellarg($file);
+            } else if ($suffix == 'tar') {
+                return ' ( 7z e -so '
+                    . escapeshellarg($archivePath)
+                    . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
+            } else {
+                $this->BRfatal('Incompatible archive format');
+            }
+    
+        } else {
+            $this->BRfatal('Bad image stack path');
+        }
+        
+        $this->BRfatal('Bad image stack path or archive format');
+        
+    }
+    
+    /*
+     * Returns the image type associated with the file extension.
+     */
+    function imageExtensionToType($extension)
+    {
+        
+        if (array_key_exists($extension, $this->EXTENSIONS)) {
+            return $this->EXTENSIONS[$extension];
+        } else {
+            $this->BRfatal('Unknown image extension');
+        }            
+    }
+    
+    /*
+     * Get the image information.  The returned associative array fields will
+     * vary depending on the image type.  The basic keys are width, height, type
+     * and bits.
+     */
+    function getImageInfo($zipPath, $file)
+    {
+        return $this->getImageInfoFromExif($zipPath, $file); // this is fast
+        
+        /*
+        $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
+        $type = imageExtensionToType($fileExt);
+        
+        switch ($type) {
+            case "jp2":
+                return getImageInfoFromJp2($zipPath, $file);
+                
+            default:
+                return getImageInfoFromExif($zipPath, $file);
+        }
+        */
+    }
+    
+    // Get the records of of JP2 as returned by kdu_expand
+    function getJp2Records($zipPath, $file)
+    {
+        
+        $cmd = $this->getUnarchiveCommand($zipPath, $file)
+                 . ' | ' . $this->kduExpand
+                 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
+        exec($cmd, $output);
+        
+        $records = Array();
+        foreach ($output as $line) {
+            $elems = explode("=", $line, 2);
+            if (1 == count($elems)) {
+                // delimiter not found
+                continue;
+            }
+            $records[$elems[0]] = $elems[1];
+        }
+        
+        return $records;
+    }
+    
+    /*
+     * Get the image width, height and depth using the EXIF information.
+     */
+    function getImageInfoFromExif($zipPath, $file)
+    {
+        
+        // We look for all the possible tags of interest then act on the
+        // ones presumed present based on the file type
+        $tagsToGet = ' -ImageWidth -ImageHeight -FileType'        // all formats
+                     . ' -BitsPerComponent -ColorSpace'          // jp2
+                     . ' -BitDepth'                              // png
+                     . ' -BitsPerSample';                        // tiff
+                            
+        $cmd = $this->getUnarchiveCommand($zipPath, $file)
+            . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
+        exec($cmd, $output);
+        
+        $tags = Array();
+        foreach ($output as $line) {
+            $keyValue = explode(": ", $line);
+            $tags[$keyValue[0]] = $keyValue[1];
+        }
+        
+        $width = intval($tags["ImageWidth"]);
+        $height = intval($tags["ImageHeight"]);
+        $type = strtolower($tags["FileType"]);
+        
+        switch ($type) {
+            case "jp2":
+                $bits = intval($tags["BitsPerComponent"]);
+                break;
+            case "tiff":
+                $bits = intval($tags["BitsPerSample"]);
+                break;
+            case "jpeg":
+                $bits = 8;
+                break;
+            case "png":
+                $bits = intval($tags["BitDepth"]);
+                break;
+            default:
+                $this->BRfatal("Unsupported image type");
+                break;
+        }
+       
+       
+        $retval = Array('width' => $width, 'height' => $height,
+            'bits' => $bits, 'type' => $type);
+        
+        return $retval;
+    }
+    
+    /*
+     * Output JSON given the imageInfo associative array
+     */
+    function outputJSON($imageInfo, $callback)
+    {
+        header('Content-type: text/plain');
+        $jsonOutput = json_encode($imageInfo);
+        if ($callback) {
+            $jsonOutput = $callback . '(' . $jsonOutput . ');';
+        }
+        echo $jsonOutput;
+    }
+    
+    function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
+        
+        switch ($imageType) {
+            case 'jp2':
+                $decompressCmd = 
+                    " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
+                if ($this->decompressToBmp) {
+                    // We suppress output since bmptopnm always outputs on stderr
+                    $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
+                }
+                break;
+        
+            case 'tiff':
+                // We need to create a temporary file for tifftopnm since it cannot
+                // work on a pipe (the file must be seekable).
+                // We use the BookReaderTiff prefix to give a hint in case things don't
+                // get cleaned up.
+                $tempFile = tempnam("/tmp", "BookReaderTiff");
+            
+                // $$$ look at bit depth when reducing
+                $decompressCmd = 
+                    ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
+                break;
+         
+            case 'jpeg':
+                $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
+                break;
+        
+            case 'png':
+                $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
+                break;
+                
+            default:
+                $this->BRfatal('Unknown image type: ' . $imageType);
+                break;
+        }
+        return $decompressCmd;
+    }
+    
+    // If the command has its initial output on stdout the headers will be emitted followed
+    // by the stdout output.  If initial output is on stderr an error message will be
+    // returned.
+    // 
+    // Returns:
+    //   true - if command emits stdout and has zero exit code
+    //   false - command has initial output on stderr or non-zero exit code
+    //   &$errorMessage - error string if there was an error
+    //
+    // $$$ Tested with our command-line image processing.  May be deadlocks for
+    //     other cases.
+    function passthruIfSuccessful($headers, $cmd, &$errorMessage)
+    {
+        $retVal = false;
+        $errorMessage = '';
+        
+        $descriptorspec = array(
+           0 => array("pipe", "r"),  // stdin is a pipe that the child will read from
+           1 => array("pipe", "w"),  // stdout is a pipe that the child will write to
+           2 => array("pipe", "w"),   // stderr is a pipe to write to
+        );
+        
+        $cwd = NULL;
+        $env = NULL;
+        
+        $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
+        
+        if (is_resource($process)) {
+            // $pipes now looks like this:
+            // 0 => writeable handle connected to child stdin
+            // 1 => readable handle connected to child stdout
+            // 2 => readable handle connected to child stderr
+        
+            $stdin = $pipes[0];        
+            $stdout = $pipes[1];
+            $stderr = $pipes[2];
+            
+            // check whether we get input first on stdout or stderr
+            $read = array($stdout, $stderr);
+            $write = NULL;
+            $except = NULL;
+            $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
+            if (false === $numChanged) {
+                // select failed
+                $errorMessage = 'Select failed';
+                $retVal = false;
+            }
+            if ($read[0] == $stdout && (1 == $numChanged)) {
+                // Got output first on stdout (only)
+                // $$$ make sure we get all stdout
+                $output = fopen('php://output', 'w');
+                foreach($headers as $header) {
+                    header($header);
+                }
+                stream_copy_to_stream($pipes[1], $output);
+                fclose($output); // okay since tied to special php://output
+                $retVal = true;
+            } else {
+                // Got output on stderr
+                // $$$ make sure we get all stderr
+                $errorMessage = stream_get_contents($stderr);
+                $retVal = false;
+            }
+    
+            fclose($stderr);
+            fclose($stdout);
+            fclose($stdin);
+    
+            
+            // It is important that you close any pipes before calling
+            // proc_close in order to avoid a deadlock
+            $cmdRet = proc_close($process);
+            if (0 != $cmdRet) {
+                $retVal = false;
+                $errorMessage .= "Command failed with result code " . $cmdRet;
+            }
+        }
+        return $retVal;
+    }
+    
+    function BRfatal($string) {
+        echo "alert('$string');\n";
+        die(-1);
+    }
+    
+    // Returns true if using a power node
+    function onPowerNode() {
+        exec("lspci | fgrep -c Realtek", $output, $return);
+        if ("0" != $output[0]) {
+            return true;
+        } else {
+            exec("egrep -q AMD /proc/cpuinfo", $output, $return);
+            if ($return == 0) {
+                return true;
+            }
+        }
+        return false;
+    }
+    
+    function reduceCommand($scale) {
+        if (1 != $scale) {
+            if ($this->onPowerNode()) {
+                return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
+            } else {
+                return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
+            }
+        } else {
+            return '';
+        }
+    }
+    
+    function checkPrivs($filename) {
+        if (!is_readable($filename)) {
+            header('HTTP/1.1 403 Forbidden');
+            exit(0);
+        }
+    }
+    
+    // Given file path (inside archive) and output file extension, return a filename
+    // suitable for Content-disposition header
+    function filenameForClient($filePath, $ext) {
+        $pathParts = pathinfo($filePath);
+        if ('jpeg' == $ext) {
+            $ext = 'jpg';
+        }
+        return $pathParts['filename'] . '.' . $ext;
+    }
+}
+
+?>
+\ No newline at end of file
diff --git a/BookReaderIA/datanode/BookReaderImages.php b/BookReaderIA/datanode/BookReaderImages.php

deleted file mode 100644 (file)

index 5d3c99d..0000000
--- a/BookReaderIA/datanode/BookReaderImages.php
+++ /dev/null
@@ -1,568 +0,0 @@
-<?php
-
-/*
-Copyright(c) 2008-2010 Internet Archive. Software license AGPL version 3.
-
-This file is part of BookReader.  The full source code can be found at GitHub:
-http://github.com/openlibrary/bookreader
-
-The canonical short name of an image type is the same as in the MIME type.
-For example both .jpeg and .jpg are considered to have type "jpeg" since
-the MIME type is "image/jpeg".
-
-    BookReader is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    BookReader is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-class BookReaderImages
-{
-    public $MIMES = array('gif' => 'image/gif',
-                   'jp2' => 'image/jp2',
-                   'jpg' => 'image/jpeg',
-                   'jpeg' => 'image/jpeg',
-                   'png' => 'image/png',
-                   'tif' => 'image/tiff',
-                   'tiff' => 'image/tiff');
-                   
-    public $EXTENSIONS = array('gif' => 'gif',
-                        'jp2' => 'jp2',
-                        'jpeg' => 'jpeg',
-                        'jpg' => 'jpeg',
-                        'png' => 'png',
-                        'tif' => 'tiff',
-                        'tiff' => 'tiff');
-                   
-    // Paths to command-line tools
-    var $exiftool = '/petabox/sw/books/exiftool/exiftool';
-    var $kduExpand = '/petabox/sw/bin/kdu_expand';
-    
-    /*
-     * Approach:
-     * 
-     * Get info about requested image (input)
-     * Get info about requested output format
-     * Determine processing parameters
-     * Process image
-     * Return image data
-     * Clean up temporary files
-     */
-     
-     function serveRequest($requestEnv) {
-        // Process some of the request parameters
-        $zipPath  = $requestEnv['zip'];
-        $file     = $requestEnv['file'];
-        if (! $ext) {
-            $ext = $requestEnv['ext'];
-        } else {
-            // Default to jpg
-            $ext = 'jpeg';
-        }
-        if (isset($requestEnv['callback'])) {
-            // validate callback is valid JS identifier (only)
-            $callback = $requestEnv['callback'];
-            $identifierPatt = '/^[[:alpha:]$_]([[:alnum:]$_])*$/';
-            if (! preg_match($identifierPatt, $callback)) {
-                $this->BRfatal('Invalid callback');
-            }
-        } else {
-            $callback = null;
-        }
-        
-        // Make sure the image stack is readable - return 403 if not
-        $this->checkPrivs($zipPath);
-        
-        
-        // Get the image size and depth
-        $imageInfo = $this->getImageInfo($zipPath, $file);
-        
-        // Output json if requested
-        if ('json' == $ext) {
-            // $$$ we should determine the output size first based on requested scale
-            $this->outputJSON($imageInfo, $callback); // $$$ move to BookReaderRequest
-            exit;
-        }
-        
-        // Unfortunately kakadu requires us to know a priori if the
-        // output file should be .ppm or .pgm.  By decompressing to
-        // .bmp kakadu will write a file we can consistently turn into
-        // .pnm.  Really kakadu should support .pnm as the file output
-        // extension and automatically write ppm or pgm format as
-        // appropriate.
-        $this->decompressToBmp = true; // $$$ shouldn't be necessary if we use file info to determine output format
-        if ($this->decompressToBmp) {
-          $stdoutLink = '/tmp/stdout.bmp';
-        } else {
-          $stdoutLink = '/tmp/stdout.ppm';
-        }
-        
-        $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
-        
-        // Rotate is currently only supported for jp2 since it does not add server load
-        $allowedRotations = array("0", "90", "180", "270");
-        $rotate = $requestEnv['rotate'];
-        if ( !in_array($rotate, $allowedRotations) ) {
-            $rotate = "0";
-        }
-        
-        // Image conversion options
-        $pngOptions = '';
-        $jpegOptions = '-quality 75';
-        
-        // The pbmreduce reduction factor produces an image with dimension 1/n
-        // The kakadu reduction factor produceds an image with dimension 1/(2^n)
-        // $$$ handle continuous values for scale
-        if (isset($requestEnv['height'])) {
-            $ratio = floatval($requestEnv['origHeight']) / floatval($requestEnv['height']);
-            if ($ratio <= 2) {
-                $scale = 2;
-                $powReduce = 1;    
-            } else if ($ratio <= 4) {
-                $scale = 4;
-                $powReduce = 2;
-            } else {
-                //$powReduce = 3; //too blurry!
-                $scale = 2;
-                $powReduce = 1;
-            }
-        
-        } else {
-            // $$$ could be cleaner
-            // Provide next smaller power of two reduction
-            $scale = intval($requestEnv['scale']);
-            if (1 >= $scale) {
-                $powReduce = 0;
-            } else if (2 > $scale) {
-                $powReduce = 0;
-            } else if (4 > $scale) {
-                $powReduce = 1;
-            } else if (8 > $scale) {
-                $powReduce = 2;
-            } else if (16 > $scale) {
-                $powReduce = 3;
-            } else if (32 > $scale) {
-                $powReduce = 4;
-            } else if (64 > $scale) {
-                $powReduce = 5;
-            } else {
-                // $$$ Leaving this in as default though I'm not sure why it is...
-                $powReduce = 3;
-            }
-            $scale = pow(2, $powReduce);
-        }
-        
-        // Override depending on source image format
-        // $$$ consider doing a 302 here instead, to make better use of the browser cache
-        // Limit scaling for 1-bit images.  See https://bugs.edge.launchpad.net/bookreader/+bug/486011
-        if (1 == $imageInfo['bits']) {
-            if ($scale > 1) {
-                $scale /= 2;
-                $powReduce -= 1;
-                
-                // Hard limit so there are some black pixels to use!
-                if ($scale > 4) {
-                    $scale = 4;
-                    $powReduce = 2;
-                }
-            }
-        }
-        
-        if (!file_exists($stdoutLink)) 
-        {  
-          system('ln -s /dev/stdout ' . $stdoutLink);  
-        }
-        
-        
-        putenv('LD_LIBRARY_PATH=/petabox/sw/lib/kakadu');
-        
-        $unzipCmd  = $this->getUnarchiveCommand($zipPath, $file);
-        
-        $decompressCmd = $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink);
-               
-        // Non-integer scaling is currently disabled on the cluster
-        // if (isset($_REQUEST['height'])) {
-        //     $cmd .= " | pnmscale -height {$_REQUEST['height']} ";
-        // }
-        
-        switch ($ext) {
-            case 'png':
-                $compressCmd = ' | pnmtopng ' . $pngOptions;
-                break;
-                
-            case 'jpeg':
-            case 'jpg':
-            default:
-                $compressCmd = ' | pnmtojpeg ' . $jpegOptions;
-                $ext = 'jpeg'; // for matching below
-                break;
-        
-        }
-        
-        if (($ext == $fileExt) && ($scale == 1) && ($rotate === "0")) {
-            // Just pass through original data if same format and size
-            $cmd = $unzipCmd;
-        } else {
-            $cmd = $unzipCmd . $decompressCmd . $compressCmd;
-        }
-        
-        // print $cmd;
-        
-        $headers = array('Content-type: '. $MIMES[$ext],
-                          'Cache-Control: max-age=15552000');
-        
-        $errorMessage = '';
-        if (! $this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
-            // $$$ automated reporting
-            trigger_error('BookReader Processing Error: ' . $cmd . ' -- ' . $errorMessage, E_USER_WARNING);
-            
-            // Try some content-specific recovery
-            $recovered = false;    
-            if ($imageInfo['type'] == 'jp2') {
-                $records = $this->getJp2Records($zipPath, $file);
-                if ($powReduce > intval($records['Clevels'])) {
-                    $powReduce = $records['Clevels'];
-                    $reduce = pow(2, $powReduce);
-                } else {
-                    $reduce = 1;
-                    $powReduce = 0;
-                }
-                 
-                $cmd = $unzipCmd . $this->getDecompressCmd($imageInfo['type'], $powReduce, $rotate, $scale, $stdoutLink) . $compressCmd;
-                if ($this->passthruIfSuccessful($headers, $cmd, $errorMessage)) { // $$$ move to BookReaderRequest
-                    $recovered = true;
-                } else {
-                    trigger_error('BookReader fallback image processing also failed: ' . $errorMessage, E_USER_WARNING);
-                }
-            }
-            
-            if (! $recovered) {
-                $this->BRfatal('Problem processing image - command failed');
-            }
-        }
-        
-        if (isset($tempFile)) {
-            unlink($tempFile);
-        }
-    }    
-    
-    function getUnarchiveCommand($archivePath, $file)
-    {
-        $lowerPath = strtolower($archivePath);
-        if (preg_match('/\.([^\.]+)$/', $lowerPath, $matches)) {
-            $suffix = $matches[1];
-            
-            if ($suffix == 'zip') {
-                return 'unzip -p '
-                    . escapeshellarg($archivePath)
-                    . ' ' . escapeshellarg($file);
-            } else if ($suffix == 'tar') {
-                return ' ( 7z e -so '
-                    . escapeshellarg($archivePath)
-                    . ' ' . escapeshellarg($file) . ' 2>/dev/null ) ';
-            } else {
-                $this->BRfatal('Incompatible archive format');
-            }
-    
-        } else {
-            $this->BRfatal('Bad image stack path');
-        }
-        
-        $this->BRfatal('Bad image stack path or archive format');
-        
-    }
-    
-    /*
-     * Returns the image type associated with the file extension.
-     */
-    function imageExtensionToType($extension)
-    {
-        
-        if (array_key_exists($extension, $this->EXTENSIONS)) {
-            return $this->EXTENSIONS[$extension];
-        } else {
-            $this->BRfatal('Unknown image extension');
-        }            
-    }
-    
-    /*
-     * Get the image information.  The returned associative array fields will
-     * vary depending on the image type.  The basic keys are width, height, type
-     * and bits.
-     */
-    function getImageInfo($zipPath, $file)
-    {
-        return $this->getImageInfoFromExif($zipPath, $file); // this is fast
-        
-        /*
-        $fileExt = strtolower(pathinfo($file, PATHINFO_EXTENSION));
-        $type = imageExtensionToType($fileExt);
-        
-        switch ($type) {
-            case "jp2":
-                return getImageInfoFromJp2($zipPath, $file);
-                
-            default:
-                return getImageInfoFromExif($zipPath, $file);
-        }
-        */
-    }
-    
-    // Get the records of of JP2 as returned by kdu_expand
-    function getJp2Records($zipPath, $file)
-    {
-        
-        $cmd = $this->getUnarchiveCommand($zipPath, $file)
-                 . ' | ' . $this->kduExpand
-                 . ' -no_seek -quiet -i /dev/stdin -record /dev/stdout';
-        exec($cmd, $output);
-        
-        $records = Array();
-        foreach ($output as $line) {
-            $elems = explode("=", $line, 2);
-            if (1 == count($elems)) {
-                // delimiter not found
-                continue;
-            }
-            $records[$elems[0]] = $elems[1];
-        }
-        
-        return $records;
-    }
-    
-    /*
-     * Get the image width, height and depth using the EXIF information.
-     */
-    function getImageInfoFromExif($zipPath, $file)
-    {
-        
-        // We look for all the possible tags of interest then act on the
-        // ones presumed present based on the file type
-        $tagsToGet = ' -ImageWidth -ImageHeight -FileType'        // all formats
-                     . ' -BitsPerComponent -ColorSpace'          // jp2
-                     . ' -BitDepth'                              // png
-                     . ' -BitsPerSample';                        // tiff
-                            
-        $cmd = $this->getUnarchiveCommand($zipPath, $file)
-            . ' | '. $this->exiftool . ' -S -fast' . $tagsToGet . ' -';
-        exec($cmd, $output);
-        
-        $tags = Array();
-        foreach ($output as $line) {
-            $keyValue = explode(": ", $line);
-            $tags[$keyValue[0]] = $keyValue[1];
-        }
-        
-        $width = intval($tags["ImageWidth"]);
-        $height = intval($tags["ImageHeight"]);
-        $type = strtolower($tags["FileType"]);
-        
-        switch ($type) {
-            case "jp2":
-                $bits = intval($tags["BitsPerComponent"]);
-                break;
-            case "tiff":
-                $bits = intval($tags["BitsPerSample"]);
-                break;
-            case "jpeg":
-                $bits = 8;
-                break;
-            case "png":
-                $bits = intval($tags["BitDepth"]);
-                break;
-            default:
-                $this->BRfatal("Unsupported image type");
-                break;
-        }
-       
-       
-        $retval = Array('width' => $width, 'height' => $height,
-            'bits' => $bits, 'type' => $type);
-        
-        return $retval;
-    }
-    
-    /*
-     * Output JSON given the imageInfo associative array
-     */
-    function outputJSON($imageInfo, $callback)
-    {
-        header('Content-type: text/plain');
-        $jsonOutput = json_encode($imageInfo);
-        if ($callback) {
-            $jsonOutput = $callback . '(' . $jsonOutput . ');';
-        }
-        echo $jsonOutput;
-    }
-    
-    function getDecompressCmd($imageType, $powReduce, $rotate, $scale, $stdoutLink) {
-        
-        switch ($imageType) {
-            case 'jp2':
-                $decompressCmd = 
-                    " | " . $this->kduExpand . " -no_seek -quiet -reduce $powReduce -rotate $rotate -i /dev/stdin -o " . $stdoutLink;
-                if ($this->decompressToBmp) {
-                    // We suppress output since bmptopnm always outputs on stderr
-                    $decompressCmd .= ' | (bmptopnm 2>/dev/null)';
-                }
-                break;
-        
-            case 'tiff':
-                // We need to create a temporary file for tifftopnm since it cannot
-                // work on a pipe (the file must be seekable).
-                // We use the BookReaderTiff prefix to give a hint in case things don't
-                // get cleaned up.
-                $tempFile = tempnam("/tmp", "BookReaderTiff");
-            
-                // $$$ look at bit depth when reducing
-                $decompressCmd = 
-                    ' > ' . $tempFile . ' ; tifftopnm ' . $tempFile . ' 2>/dev/null' . $this->reduceCommand($scale);
-                break;
-         
-            case 'jpeg':
-                $decompressCmd = ' | ( jpegtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
-                break;
-        
-            case 'png':
-                $decompressCmd = ' | ( pngtopnm 2>/dev/null ) ' . $this->reduceCommand($scale);
-                break;
-                
-            default:
-                $this->BRfatal('Unknown image type: ' . $imageType);
-                break;
-        }
-        return $decompressCmd;
-    }
-    
-    // If the command has its initial output on stdout the headers will be emitted followed
-    // by the stdout output.  If initial output is on stderr an error message will be
-    // returned.
-    // 
-    // Returns:
-    //   true - if command emits stdout and has zero exit code
-    //   false - command has initial output on stderr or non-zero exit code
-    //   &$errorMessage - error string if there was an error
-    //
-    // $$$ Tested with our command-line image processing.  May be deadlocks for
-    //     other cases.
-    function passthruIfSuccessful($headers, $cmd, &$errorMessage)
-    {
-        $retVal = false;
-        $errorMessage = '';
-        
-        $descriptorspec = array(
-           0 => array("pipe", "r"),  // stdin is a pipe that the child will read from
-           1 => array("pipe", "w"),  // stdout is a pipe that the child will write to
-           2 => array("pipe", "w"),   // stderr is a pipe to write to
-        );
-        
-        $cwd = NULL;
-        $env = NULL;
-        
-        $process = proc_open($cmd, $descriptorspec, $pipes, $cwd, $env);
-        
-        if (is_resource($process)) {
-            // $pipes now looks like this:
-            // 0 => writeable handle connected to child stdin
-            // 1 => readable handle connected to child stdout
-            // 2 => readable handle connected to child stderr
-        
-            $stdin = $pipes[0];        
-            $stdout = $pipes[1];
-            $stderr = $pipes[2];
-            
-            // check whether we get input first on stdout or stderr
-            $read = array($stdout, $stderr);
-            $write = NULL;
-            $except = NULL;
-            $numChanged = stream_select($read, $write, $except, NULL); // $$$ no timeout
-            if (false === $numChanged) {
-                // select failed
-                $errorMessage = 'Select failed';
-                $retVal = false;
-            }
-            if ($read[0] == $stdout && (1 == $numChanged)) {
-                // Got output first on stdout (only)
-                // $$$ make sure we get all stdout
-                $output = fopen('php://output', 'w');
-                foreach($headers as $header) {
-                    header($header);
-                }
-                stream_copy_to_stream($pipes[1], $output);
-                fclose($output); // okay since tied to special php://output
-                $retVal = true;
-            } else {
-                // Got output on stderr
-                // $$$ make sure we get all stderr
-                $errorMessage = stream_get_contents($stderr);
-                $retVal = false;
-            }
-    
-            fclose($stderr);
-            fclose($stdout);
-            fclose($stdin);
-    
-            
-            // It is important that you close any pipes before calling
-            // proc_close in order to avoid a deadlock
-            $cmdRet = proc_close($process);
-            if (0 != $cmdRet) {
-                $retVal = false;
-                $errorMessage .= "Command failed with result code " . $cmdRet;
-            }
-        }
-        return $retVal;
-    }
-    
-    function BRfatal($string) {
-        echo "alert('$string');\n";
-        die(-1);
-    }
-    
-    // Returns true if using a power node
-    function onPowerNode() {
-        exec("lspci | fgrep -c Realtek", $output, $return);
-        if ("0" != $output[0]) {
-            return true;
-        } else {
-            exec("egrep -q AMD /proc/cpuinfo", $output, $return);
-            if ($return == 0) {
-                return true;
-            }
-        }
-        return false;
-    }
-    
-    function reduceCommand($scale) {
-        if (1 != $scale) {
-            if ($this->onPowerNode()) {
-                return ' | pnmscale -reduce ' . $scale . ' 2>/dev/null ';
-            } else {
-                return ' | pnmscale -nomix -reduce ' . $scale . ' 2>/dev/null ';
-            }
-        } else {
-            return '';
-        }
-    }
-    
-    function checkPrivs($filename) {
-        if (!is_readable($filename)) {
-            header('HTTP/1.1 403 Forbidden');
-            exit(0);
-        }
-    }
-}
-
-$bri = new BookReaderImages();
-$bri->serveRequest($_REQUEST);
-
-?>
-
diff --git a/BookReaderIA/datanode/BookReaderMeta.inc.php b/BookReaderIA/datanode/BookReaderMeta.inc.php

new file mode 100644 (file)

index 0000000..85096cc
--- /dev/null
+++ b/BookReaderIA/datanode/BookReaderMeta.inc.php
@@ -0,0 +1,336 @@
+<?
+/*
+
+Builds metadata about a book on the Internet Archive in json(p) format so that the book
+can be accessed by other software including the Internet Archive BookReader.
+
+Michael Ang <http://github.com/mangtronix>
+
+Copyright (c) 2008-2010 Internet Archive. Software license AGPL version 3.
+
+This file is part of BookReader.
+
+    BookReader is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    BookReader is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class BookReaderMeta {
+
+    // Builds metadata object (to be encoded as JSON)
+    function buildMetadata($id, $itemPath, $bookId, $server) {
+    
+        $response = array();
+        
+        if (! $subPrefix) {
+            $subPrefix = $id;
+        }
+        $subItemPath = $itemPath . '/' . $subPrefix;
+        
+        if ("" == $id) {
+            $this->BRFatal("No identifier specified!");
+        }
+        
+        if ("" == $itemPath) {
+            $this->BRFatal("No itemPath specified!");
+        }
+        
+        if ("" == $server) {
+            $this->BRFatal("No server specified!");
+        }
+        
+        if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
+            $this->BRFatal("Bad id!");
+        }
+        
+        // XXX check here that subitem is okay
+        
+        $filesDataFile = "$itemPath/${id}_files.xml";
+        
+        if (file_exists($filesDataFile)) {
+            $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
+        } else {
+            $this->BRfatal("File metadata not found!");
+        }
+        
+        $imageStackInfo = $this->findImageStack($subPrefix, $filesData);
+        if ($imageStackInfo['imageFormat'] == 'unknown') {
+            $this->BRfatal('Couldn\'t find image stack');
+        }
+        
+        $imageFormat = $imageStackInfo['imageFormat'];
+        $archiveFormat = $imageStackInfo['archiveFormat'];
+        $imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
+        
+        if ("unknown" == $imageFormat) {
+          $this->BRfatal("Unknown image format");
+        }
+        
+        if ("unknown" == $archiveFormat) {
+          $this->BRfatal("Unknown archive format");
+        }
+        
+        
+        $scanDataFile = "${subItemPath}_scandata.xml";
+        $scanDataZip  = "$itemPath/scandata.zip";
+        if (file_exists($scanDataFile)) {
+            $scanData = simplexml_load_file($scanDataFile);
+        } else if (file_exists($scanDataZip)) {
+            $cmd  = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml';
+            exec($cmd, $output, $retval);
+            if ($retval != 0) {
+                $this->BRFatal("Could not unzip ScanData!");
+            }
+            
+            $dump = join("\n", $output);
+            $scanData = simplexml_load_string($dump);
+        } else if (file_exists("$itemPath/scandata.xml")) {
+            // For e.g. Scribe v.0 books!
+            $scanData = simplexml_load_file("$itemPath/scandata.xml");
+        } else {
+            $this->BRFatal("ScanData file not found!");
+        }
+        
+        $metaDataFile = "$itemPath/{$id}_meta.xml";
+        if (!file_exists($metaDataFile)) {
+            $this->BRFatal("MetaData file not found!");
+        }
+        
+        
+        $metaData = simplexml_load_file($metaDataFile);
+        
+        /* Find pages by type */
+        $titleLeaf = '';
+        $coverLeafs = array();
+        foreach ($scanData->pageData->page as $page) {
+            if (("Title Page" == $page->pageType) || ("Title" == $page->pageType)) {
+                if ('' == $titleLeaf) {
+                    // not already set
+                    $titleLeaf = "{$page['leafNum']}";
+                }
+            }
+            
+            if (('Cover' == $page->pageType) || ('Cover Page' == $page->pageType)) {
+                array_push($coverLeafs, $page['leafNum']);
+            }
+        }
+        
+        // These arrays map accessible page index numbers to width, height, scanned leaf numbers
+        // and page number strings (NB: these may not be unique)
+        $pageWidths = array();
+        $pageHeights = array();
+        $leafNums = array();
+        $i=0;
+        $totalHeight = 0;
+        foreach ($scanData->pageData->page as $page) {
+            if ($this->shouldAddPage($page)) {
+                $pageWidths[$i] = intval($page->cropBox->w);
+                $pageHeights[$i] = intval($page->cropBox->h);
+                $totalHeight += intval($page->cropBox->h/4) + 10;
+                $leafNums[$i] = intval($page['leafNum']);
+                $pageNums[$i] = $page->pageNumber . '';
+                $i++;
+            }
+        }
+                
+        # Load some values from meta.xml
+        $pageProgression = 'lr'; // default
+        if ('' != $metaData->{'page-progression'}) {
+          $pageProgression = $metaData->{"page-progression"};
+        }
+        
+        // General metadata
+        $response['title'] = $metaData->title . ''; // XXX renamed
+        $response['numPages'] = count($pageNums); // XXX renamed    
+        if ('' != $titleLeaf) {
+            $response['titleLeaf'] = $titleLeaf; // XXX change to titleIndex - do leaf mapping here
+            $titleIndex = $this->indexForLeaf($titleLeaf, $leafNums);
+            if ($titleIndex !== NULL) {
+                $response['titleIndex'] = intval($titleIndex);
+            }
+        }
+        $response['url'] = "http://www.archive.org/details/$id";
+        $response['pageProgression'] = $pageProgression . '';
+        $response['pageWidths'] = $pageWidths;
+        $response['pageHeights'] = $pageHeights;
+        $response['pageNums'] = $pageNums;
+        
+        // Internet Archive specific
+        $response['itemId'] = $id; // XXX renamed
+        $response['bookId'] = $subPrefix;  // XXX renamed
+        $response['zip'] = $imageStackFile;
+        $response['server'] = $server;
+        $response['imageFormat'] = $imageFormat;
+        $response['archiveFormat'] = $archiveFormat;
+        $response['leafNums'] = $leafNums;
+        
+        // URL to title image
+        if ('' != $titleLeaf) {
+            $response['titleImage'] = $this->imageURL($titleLeaf, $response);
+        }
+        
+        if (count($coverLeafs) > 0) {
+            $coverIndices = array();
+            $coverImages = array();
+            foreach ($coverLeafs as $key => $leafNum) {
+                array_push($coverIndices, $this->indexForLeaf($leafNum, $leafNums));
+                array_push($coverImages, $this->imageUrl($leafNum, $response));
+            }
+            
+            $response['coverIndices'] = $coverIndices;
+            $response['coverImages'] = $coverImages;
+        }
+        
+        // Determine "preview" image, which may be the cover, title, or first page
+        if (array_key_exists('titleImage', $response)) {
+            // Use title image if was assert
+            $previewImage = $response['titleImage'];
+        } else if (array_key_exists('coverImages', $response)) {
+            // Try for the cover page
+            $previewImage = $response['coverImages'][0];
+        } else {
+            // Neither title nor cover asserted, use first page
+            $previewImage = $this->imageURL(0, $response);
+        }
+        $response['previewImage'] = $previewImage;
+        
+        return $response;
+    }
+    
+    function emitResponse($metadata) {
+        $callback = $_REQUEST['callback'];
+        
+        $contentType = 'application/json'; // default
+        if ($callback) {
+            if (! $this->isValidCallback($callback) ) {
+                $this->BRfatal("Invalid callback");
+            }
+            $contentType = 'text/javascript'; // JSONP is not JSON
+        }
+        
+        header('Content-type: ' . $contentType . ';charset=UTF-8');
+        header('Access-Control-Allow-Origin: *'); // allow cross-origin requests
+        
+        if ($callback) {
+            print $callback . '( ';
+        }
+        print json_encode($metadata);
+        if ($callback) {
+            print ' );';
+        }
+    }
+    
+    function BRFatal($string) {
+        // $$$ TODO log error
+        echo "alert('$string')\n";
+        die(-1);
+    }
+    
+    // Returns true if a page should be added based on it's information in
+    // the metadata
+    function shouldAddPage($page) {
+        // Return false only if the page is marked addToAccessFormats false.
+        // If there is no assertion we assume it should be added.
+        if (isset($page->addToAccessFormats)) {
+            if ("false" == strtolower(trim($page->addToAccessFormats))) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+    
+    // Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
+    function findImageStack($subPrefix, $filesData) {
+    
+        // $$$ The order of the image formats determines which will be returned first
+        $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
+        $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
+        $imageGroup = implode('|', array_keys($imageFormats));
+        $archiveGroup = implode('|', array_keys($archiveFormats));
+        // $$$ Currently only return processed images
+        $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
+            
+        foreach ($filesData->file as $file) {        
+            if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
+                if (preg_match($imageStackRegex, $file->format, $matches)) {
+                
+                    // Make sure we have a regular image stack
+                    $imageFormat = $imageFormats[$matches[2]];
+                    if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {            
+                        return array('imageFormat' => $imageFormat,
+                                     'archiveFormat' => $archiveFormats[$matches[3]],
+                                     'imageStackFile' => $file['name']);
+                    }
+                }
+            }
+        }
+        
+        return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');    
+    }
+    
+    function isValidCallback($identifier) {
+        $pattern = '/^[a-zA-Z_$][a-zA-Z0-9_$]*$/';
+        return preg_match($pattern, $identifier) == 1;
+    }
+    
+    function indexForLeaf($leafNum, $leafNums) {
+        $key = array_search($leafNum, $leafNums);
+        if ($key === FALSE) {
+            return NULL;
+        } else {
+            return $key;
+        }
+    }
+    
+    function imageURL($leafNum, $metadata, $scale, $rotate) {
+        // "Under the hood", non-public, dynamically changing (achtung!) image URLs currently look like:
+        // http://{server}/BookReader/BookReaderImages.php?zip={zipPath}&file={filePath}&scale={scale}&rotate={rotate}
+        // e.g. http://ia311213.us.archive.org/BookReader/BookReaderImages.php?zip=/0/items/coloritsapplicat00andriala/coloritsapplicat00andriala_jp2.zip&file=coloritsapplicat00andriala_jp2/coloritsapplicat00andriala_0009.jp2&scale=8&rotate=0
+        
+    
+        $filePath = $this->imageFilePath($leafNum, $metadata['bookId'], $metadata['imageFormat']);
+        $url = 'http://' . $metadata['server'] . '/BookReader/BookReaderImages.php?zip=' . $metadata['zip'] . '&file=' . $filePath;
+        
+        if (defined($scale)) {
+            $url .= '&scale=' . $scale;
+        }
+        if (defined($rotate)) {
+            $url .= '&rotate=' . $rotate;
+        }
+        
+        return $url;
+    }
+    
+    function imageFilePath($leafNum, $bookId, $format) {
+        return sprintf("%s_%s/%s_%04d.%s", $bookId, $format, $bookId, intval($leafNum), $format);
+    }
+    
+    function processRequest($requestEnv) {
+        $id = $requestEnv['itemId']; // XXX renamed
+        $itemPath = $requestEnv['itemPath'];
+        $bookId = $requestEnv['bookId']; // XXX renamed
+        $server = $requestEnv['server'];
+        
+        // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
+        // $$$ TODO consolidate this logic
+        if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
+            $server .= ':80/~mang';
+        } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
+            $server .= ':80/~testflip';
+        }
+        
+        $this->emitResponse( $this->buildMetadata($id, $itemPath, $bookId, $server) );
+    }
+}
+
+?>
diff --git a/BookReaderIA/datanode/BookReaderMeta.php b/BookReaderIA/datanode/BookReaderMeta.php

deleted file mode 100644 (file)

index 85096cc..0000000
--- a/BookReaderIA/datanode/BookReaderMeta.php
+++ /dev/null
@@ -1,336 +0,0 @@
-<?
-/*
-
-Builds metadata about a book on the Internet Archive in json(p) format so that the book
-can be accessed by other software including the Internet Archive BookReader.
-
-Michael Ang <http://github.com/mangtronix>
-
-Copyright (c) 2008-2010 Internet Archive. Software license AGPL version 3.
-
-This file is part of BookReader.
-
-    BookReader is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    BookReader is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-class BookReaderMeta {
-
-    // Builds metadata object (to be encoded as JSON)
-    function buildMetadata($id, $itemPath, $bookId, $server) {
-    
-        $response = array();
-        
-        if (! $subPrefix) {
-            $subPrefix = $id;
-        }
-        $subItemPath = $itemPath . '/' . $subPrefix;
-        
-        if ("" == $id) {
-            $this->BRFatal("No identifier specified!");
-        }
-        
-        if ("" == $itemPath) {
-            $this->BRFatal("No itemPath specified!");
-        }
-        
-        if ("" == $server) {
-            $this->BRFatal("No server specified!");
-        }
-        
-        if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
-            $this->BRFatal("Bad id!");
-        }
-        
-        // XXX check here that subitem is okay
-        
-        $filesDataFile = "$itemPath/${id}_files.xml";
-        
-        if (file_exists($filesDataFile)) {
-            $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
-        } else {
-            $this->BRfatal("File metadata not found!");
-        }
-        
-        $imageStackInfo = $this->findImageStack($subPrefix, $filesData);
-        if ($imageStackInfo['imageFormat'] == 'unknown') {
-            $this->BRfatal('Couldn\'t find image stack');
-        }
-        
-        $imageFormat = $imageStackInfo['imageFormat'];
-        $archiveFormat = $imageStackInfo['archiveFormat'];
-        $imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
-        
-        if ("unknown" == $imageFormat) {
-          $this->BRfatal("Unknown image format");
-        }
-        
-        if ("unknown" == $archiveFormat) {
-          $this->BRfatal("Unknown archive format");
-        }
-        
-        
-        $scanDataFile = "${subItemPath}_scandata.xml";
-        $scanDataZip  = "$itemPath/scandata.zip";
-        if (file_exists($scanDataFile)) {
-            $scanData = simplexml_load_file($scanDataFile);
-        } else if (file_exists($scanDataZip)) {
-            $cmd  = 'unzip -p ' . escapeshellarg($scanDataZip) . ' scandata.xml';
-            exec($cmd, $output, $retval);
-            if ($retval != 0) {
-                $this->BRFatal("Could not unzip ScanData!");
-            }
-            
-            $dump = join("\n", $output);
-            $scanData = simplexml_load_string($dump);
-        } else if (file_exists("$itemPath/scandata.xml")) {
-            // For e.g. Scribe v.0 books!
-            $scanData = simplexml_load_file("$itemPath/scandata.xml");
-        } else {
-            $this->BRFatal("ScanData file not found!");
-        }
-        
-        $metaDataFile = "$itemPath/{$id}_meta.xml";
-        if (!file_exists($metaDataFile)) {
-            $this->BRFatal("MetaData file not found!");
-        }
-        
-        
-        $metaData = simplexml_load_file($metaDataFile);
-        
-        /* Find pages by type */
-        $titleLeaf = '';
-        $coverLeafs = array();
-        foreach ($scanData->pageData->page as $page) {
-            if (("Title Page" == $page->pageType) || ("Title" == $page->pageType)) {
-                if ('' == $titleLeaf) {
-                    // not already set
-                    $titleLeaf = "{$page['leafNum']}";
-                }
-            }
-            
-            if (('Cover' == $page->pageType) || ('Cover Page' == $page->pageType)) {
-                array_push($coverLeafs, $page['leafNum']);
-            }
-        }
-        
-        // These arrays map accessible page index numbers to width, height, scanned leaf numbers
-        // and page number strings (NB: these may not be unique)
-        $pageWidths = array();
-        $pageHeights = array();
-        $leafNums = array();
-        $i=0;
-        $totalHeight = 0;
-        foreach ($scanData->pageData->page as $page) {
-            if ($this->shouldAddPage($page)) {
-                $pageWidths[$i] = intval($page->cropBox->w);
-                $pageHeights[$i] = intval($page->cropBox->h);
-                $totalHeight += intval($page->cropBox->h/4) + 10;
-                $leafNums[$i] = intval($page['leafNum']);
-                $pageNums[$i] = $page->pageNumber . '';
-                $i++;
-            }
-        }
-                
-        # Load some values from meta.xml
-        $pageProgression = 'lr'; // default
-        if ('' != $metaData->{'page-progression'}) {
-          $pageProgression = $metaData->{"page-progression"};
-        }
-        
-        // General metadata
-        $response['title'] = $metaData->title . ''; // XXX renamed
-        $response['numPages'] = count($pageNums); // XXX renamed    
-        if ('' != $titleLeaf) {
-            $response['titleLeaf'] = $titleLeaf; // XXX change to titleIndex - do leaf mapping here
-            $titleIndex = $this->indexForLeaf($titleLeaf, $leafNums);
-            if ($titleIndex !== NULL) {
-                $response['titleIndex'] = intval($titleIndex);
-            }
-        }
-        $response['url'] = "http://www.archive.org/details/$id";
-        $response['pageProgression'] = $pageProgression . '';
-        $response['pageWidths'] = $pageWidths;
-        $response['pageHeights'] = $pageHeights;
-        $response['pageNums'] = $pageNums;
-        
-        // Internet Archive specific
-        $response['itemId'] = $id; // XXX renamed
-        $response['bookId'] = $subPrefix;  // XXX renamed
-        $response['zip'] = $imageStackFile;
-        $response['server'] = $server;
-        $response['imageFormat'] = $imageFormat;
-        $response['archiveFormat'] = $archiveFormat;
-        $response['leafNums'] = $leafNums;
-        
-        // URL to title image
-        if ('' != $titleLeaf) {
-            $response['titleImage'] = $this->imageURL($titleLeaf, $response);
-        }
-        
-        if (count($coverLeafs) > 0) {
-            $coverIndices = array();
-            $coverImages = array();
-            foreach ($coverLeafs as $key => $leafNum) {
-                array_push($coverIndices, $this->indexForLeaf($leafNum, $leafNums));
-                array_push($coverImages, $this->imageUrl($leafNum, $response));
-            }
-            
-            $response['coverIndices'] = $coverIndices;
-            $response['coverImages'] = $coverImages;
-        }
-        
-        // Determine "preview" image, which may be the cover, title, or first page
-        if (array_key_exists('titleImage', $response)) {
-            // Use title image if was assert
-            $previewImage = $response['titleImage'];
-        } else if (array_key_exists('coverImages', $response)) {
-            // Try for the cover page
-            $previewImage = $response['coverImages'][0];
-        } else {
-            // Neither title nor cover asserted, use first page
-            $previewImage = $this->imageURL(0, $response);
-        }
-        $response['previewImage'] = $previewImage;
-        
-        return $response;
-    }
-    
-    function emitResponse($metadata) {
-        $callback = $_REQUEST['callback'];
-        
-        $contentType = 'application/json'; // default
-        if ($callback) {
-            if (! $this->isValidCallback($callback) ) {
-                $this->BRfatal("Invalid callback");
-            }
-            $contentType = 'text/javascript'; // JSONP is not JSON
-        }
-        
-        header('Content-type: ' . $contentType . ';charset=UTF-8');
-        header('Access-Control-Allow-Origin: *'); // allow cross-origin requests
-        
-        if ($callback) {
-            print $callback . '( ';
-        }
-        print json_encode($metadata);
-        if ($callback) {
-            print ' );';
-        }
-    }
-    
-    function BRFatal($string) {
-        // $$$ TODO log error
-        echo "alert('$string')\n";
-        die(-1);
-    }
-    
-    // Returns true if a page should be added based on it's information in
-    // the metadata
-    function shouldAddPage($page) {
-        // Return false only if the page is marked addToAccessFormats false.
-        // If there is no assertion we assume it should be added.
-        if (isset($page->addToAccessFormats)) {
-            if ("false" == strtolower(trim($page->addToAccessFormats))) {
-                return false;
-            }
-        }
-        
-        return true;
-    }
-    
-    // Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
-    function findImageStack($subPrefix, $filesData) {
-    
-        // $$$ The order of the image formats determines which will be returned first
-        $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
-        $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
-        $imageGroup = implode('|', array_keys($imageFormats));
-        $archiveGroup = implode('|', array_keys($archiveFormats));
-        // $$$ Currently only return processed images
-        $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
-            
-        foreach ($filesData->file as $file) {        
-            if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
-                if (preg_match($imageStackRegex, $file->format, $matches)) {
-                
-                    // Make sure we have a regular image stack
-                    $imageFormat = $imageFormats[$matches[2]];
-                    if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {            
-                        return array('imageFormat' => $imageFormat,
-                                     'archiveFormat' => $archiveFormats[$matches[3]],
-                                     'imageStackFile' => $file['name']);
-                    }
-                }
-            }
-        }
-        
-        return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');    
-    }
-    
-    function isValidCallback($identifier) {
-        $pattern = '/^[a-zA-Z_$][a-zA-Z0-9_$]*$/';
-        return preg_match($pattern, $identifier) == 1;
-    }
-    
-    function indexForLeaf($leafNum, $leafNums) {
-        $key = array_search($leafNum, $leafNums);
-        if ($key === FALSE) {
-            return NULL;
-        } else {
-            return $key;
-        }
-    }
-    
-    function imageURL($leafNum, $metadata, $scale, $rotate) {
-        // "Under the hood", non-public, dynamically changing (achtung!) image URLs currently look like:
-        // http://{server}/BookReader/BookReaderImages.php?zip={zipPath}&file={filePath}&scale={scale}&rotate={rotate}
-        // e.g. http://ia311213.us.archive.org/BookReader/BookReaderImages.php?zip=/0/items/coloritsapplicat00andriala/coloritsapplicat00andriala_jp2.zip&file=coloritsapplicat00andriala_jp2/coloritsapplicat00andriala_0009.jp2&scale=8&rotate=0
-        
-    
-        $filePath = $this->imageFilePath($leafNum, $metadata['bookId'], $metadata['imageFormat']);
-        $url = 'http://' . $metadata['server'] . '/BookReader/BookReaderImages.php?zip=' . $metadata['zip'] . '&file=' . $filePath;
-        
-        if (defined($scale)) {
-            $url .= '&scale=' . $scale;
-        }
-        if (defined($rotate)) {
-            $url .= '&rotate=' . $rotate;
-        }
-        
-        return $url;
-    }
-    
-    function imageFilePath($leafNum, $bookId, $format) {
-        return sprintf("%s_%s/%s_%04d.%s", $bookId, $format, $bookId, intval($leafNum), $format);
-    }
-    
-    function processRequest($requestEnv) {
-        $id = $requestEnv['itemId']; // XXX renamed
-        $itemPath = $requestEnv['itemPath'];
-        $bookId = $requestEnv['bookId']; // XXX renamed
-        $server = $requestEnv['server'];
-        
-        // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
-        // $$$ TODO consolidate this logic
-        if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
-            $server .= ':80/~mang';
-        } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
-            $server .= ':80/~testflip';
-        }
-        
-        $this->emitResponse( $this->buildMetadata($id, $itemPath, $bookId, $server) );
-    }
-}
-
-?>
author	Michael Ang <mang@archive.org>
	Tue, 27 Apr 2010 18:54:50 +0000 (18:54 +0000)
committer	Michael Ang <mang@archive.org>
	Tue, 27 Apr 2010 18:54:50 +0000 (18:54 +0000)
BookReaderIA/datanode/BookReaderImages.inc.php	[new file with mode: 0644]	patch \| blob
BookReaderIA/datanode/BookReaderImages.php	[deleted file]	patch \| blob \| history
BookReaderIA/datanode/BookReaderMeta.inc.php	[new file with mode: 0644]	patch \| blob
BookReaderIA/datanode/BookReaderMeta.php	[deleted file]	patch \| blob \| history