From: Michael Ang <mang@archive.org>
Date: Wed, 20 Jan 2010 23:58:51 +0000 (+0000)
Subject: Determine image stack format and filename by loading _files.xml
X-Git-Url: http://git.rot13.org/?a=commitdiff_plain;h=2434a662ca02c528d3022e749a8e73333f69e17a;p=bookreader.git

Determine image stack format and filename by loading _files.xml
---

diff --git a/BookReaderIA/datanode/BookReaderImages.php b/BookReaderIA/datanode/BookReaderImages.php
index 1adcfbb..d2192ae 100644
--- a/BookReaderIA/datanode/BookReaderImages.php
+++ b/BookReaderIA/datanode/BookReaderImages.php
@@ -52,6 +52,7 @@ function getImageSizeAndDepth($zipPath, $file)
 {
     global $exiftool;
     
+    # $$$ will exiftool work for *all* of our images?
     $cmd = getUnzipCommand($zipPath, $file)
         . ' | '. $exiftool . ' -s -s -s -ImageWidth -ImageHeight -BitsPerComponent -';
     exec($cmd, $output);
@@ -64,8 +65,6 @@ function getImageSizeAndDepth($zipPath, $file)
     return $retval;
 }
 
-getImageSizeAndDepth($zipPath, $file); // XXX
-
 // Unfortunately kakadu requires us to know a priori if the
 // output file should be .ppm or .pgm.  By decompressing to
 // .bmp kakadu will write a file we can consistently turn into
@@ -194,10 +193,10 @@ $cmd = $unzipCmd . $decompressCmd . $compressCmd;
 
 //print $cmd;
 
+
 header('Content-type: ' . $MIMES[$ext]);
 header('Cache-Control: max-age=15552000');
-
-passthru ($cmd);
+passthru ($cmd); # cmd returns image data
 
 if (isset($tempFile)) {
   unlink($tempFile);
diff --git a/BookReaderIA/datanode/BookReaderJSIA.php b/BookReaderIA/datanode/BookReaderJSIA.php
index e2c1528..1a9ac10 100755
--- a/BookReaderIA/datanode/BookReaderJSIA.php
+++ b/BookReaderIA/datanode/BookReaderJSIA.php
@@ -54,22 +54,32 @@ if (!preg_match("|^/[0-3]/items/{$id}$|", $itemPath)) {
 
 // XXX check here that subitem is okay
 
-$imageFormat = 'unknown';
-$zipFile = "${subItemPath}_jp2.zip";
+$filesDataFile = "$itemPath/${id}_files.xml";
 
-if (file_exists($zipFile)) {
-    $imageFormat = 'jp2';
+if (file_exists($filesDataFile)) {
+    $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
 } else {
-  $zipFile = "${subItemPath}_tif.zip";
-  if (file_exists($zipFile)) {
-    $imageFormat = 'tif';
-  }
-} // $$$ check here for tar image stack
+    BRfatal("File metadata not found!");
+}
+
+$imageStackInfo = findImageStack($subPrefix, $filesData);
+if ($imageStackInfo['imageFormat'] == 'unknown') {
+    BRfatal('Couldn\'t find image stack');
+}
+
+$imageFormat = $imageStackInfo['imageFormat'];
+$archiveFormat = $imageStackInfo['archiveFormat'];
+$imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
 
 if ("unknown" == $imageFormat) {
   BRfatal("Unknown image format");
 }
 
+if ("unknown" == $archiveFormat) {
+  BRfatal("Unknown archive format");
+}
+
+
 $scanDataFile = "${subItemPath}_scandata.xml";
 $scanDataZip  = "$itemPath/scandata.zip";
 if (file_exists($scanDataFile)) {
@@ -362,13 +372,14 @@ br.pageNums = [
 br.numLeafs = br.pageW.length;
 
 br.bookId   = '<?echo $id;?>';
-br.zip      = '<?echo $zipFile;?>';
+br.zip      = '<?echo $imageStackFile;?>';
 br.subPrefix = '<?echo $subPrefix;?>';
 br.server   = '<?echo $server;?>';
 br.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
 br.bookPath = '<?echo $subItemPath;?>';
 br.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
 br.imageFormat = '<?echo $imageFormat;?>';
+br.archiveFormat = '<?echo $archiveFormat;?>';
 
 <?
 
@@ -422,6 +433,7 @@ br.init();
 
 
 function BRFatal($string) {
+    // $$$ TODO log error
     echo "alert('$string')\n";
     die(-1);
 }
@@ -440,4 +452,28 @@ function shouldAddPage($page) {
     return true;
 }
 
+// Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
+function findImageStack($subPrefix, $filesData) {
+
+    // $$$ Add jpeg here
+    $imageFormats = array('TIFF' => 'tif', 'JP2' => 'jp2');
+    $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
+    $imageGroup = implode('|', array_keys($imageFormats));
+    $archiveGroup = implode('|', array_keys($archiveFormats));
+    $imageStackRegex = "/Single Page (Processed|Original) (${imageGroup}) (${archiveGroup})/";
+        
+    foreach ($filesData->file as $file) {        
+        if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning    
+            if (preg_match($imageStackRegex, $file->format, $matches)) {
+                return array('imageFormat' => $imageFormats[$matches[2]],
+                             'archiveFormat' => $archiveFormats[$matches[3]],
+                             'imageStackFile' => $file['name']);
+            }
+        }
+    }
+    
+    return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
+        
+}
+
 ?>
diff --git a/BookReaderIA/inc/BookReader.inc b/BookReaderIA/inc/BookReader.inc
index d61de5e..f3c1b7a 100644
--- a/BookReaderIA/inc/BookReader.inc
+++ b/BookReaderIA/inc/BookReader.inc
@@ -19,12 +19,12 @@ class BookReader
     // $$$ TODO add support for jpg and tar stacks
     // https://bugs.edge.launchpad.net/gnubook/+bug/323003
     // https://bugs.edge.launchpad.net/gnubook/+bug/385397
-    $imageFormatRegex = '@' . preg_quote($prefix, '@') . '_(jp2|tif)\.zip$@';
+    $imageFormatRegex = '@' . preg_quote($prefix, '@') . '_(jp2|tif)\.(zip|tar)$@';
     
     $baseLength = strlen($item->metadataGrabber->mainDir . '/');
     foreach ($item->getFiles() as $location => $fileInfo) {
         $filename = substr($location, $baseLength);
-
+        
         if ($checkOldScandata) {
             if ($filename == 'scandata.xml' || $filename == 'scandata.zip') {
                 $foundScandata = $filename;
@@ -39,7 +39,7 @@ class BookReader
             $foundImageStack = $filename;
         }
     }
-        
+    
     if ($foundScandata && $foundImageStack) {
         return true;
     }