Determine image stack format and filename by loading _files.xml

author Michael Ang <mang@archive.org>

Wed, 20 Jan 2010 23:58:51 +0000 (23:58 +0000)

committer Michael Ang <mang@archive.org>

Wed, 20 Jan 2010 23:58:51 +0000 (23:58 +0000)
author Michael Ang <mang@archive.org>
Wed, 20 Jan 2010 23:58:51 +0000 (23:58 +0000)
committer Michael Ang <mang@archive.org>
Wed, 20 Jan 2010 23:58:51 +0000 (23:58 +0000)
diff --git a/BookReaderIA/datanode/BookReaderImages.php b/BookReaderIA/datanode/BookReaderImages.php

index 1adcfbb..d2192ae 100644 (file)
--- a/BookReaderIA/datanode/BookReaderImages.php
+++ b/BookReaderIA/datanode/BookReaderImages.php
@@ -52,6 +52,7 @@ function getImageSizeAndDepth($zipPath, $file)
  {
      global $exiftool;
      
+    # $$$ will exiftool work for *all* of our images?
      $cmd = getUnzipCommand($zipPath, $file)
          . ' | '. $exiftool . ' -s -s -s -ImageWidth -ImageHeight -BitsPerComponent -';
      exec($cmd, $output);
@@ -64,8 +65,6 @@ function getImageSizeAndDepth($zipPath, $file)
      return $retval;
  }
  
-getImageSizeAndDepth($zipPath, $file); // XXX
-
  // Unfortunately kakadu requires us to know a priori if the
  // output file should be .ppm or .pgm.  By decompressing to
  // .bmp kakadu will write a file we can consistently turn into
@@ -194,10 +193,10 @@ $cmd = $unzipCmd . $decompressCmd . $compressCmd;
  
  //print $cmd;
  
+
  header('Content-type: ' . $MIMES[$ext]);
  header('Cache-Control: max-age=15552000');
-
-passthru ($cmd);
+passthru ($cmd); # cmd returns image data
  
  if (isset($tempFile)) {
    unlink($tempFile);
diff --git a/BookReaderIA/datanode/BookReaderJSIA.php b/BookReaderIA/datanode/BookReaderJSIA.php

index e2c1528..1a9ac10 100755 (executable)
--- a/BookReaderIA/datanode/BookReaderJSIA.php
+++ b/BookReaderIA/datanode/BookReaderJSIA.php
@@ -54,22 +54,32 @@ if (!preg_match("|^/[0-3]/items/{$id}$|", $itemPath)) {
  
  // XXX check here that subitem is okay
  
-$imageFormat = 'unknown';
-$zipFile = "${subItemPath}_jp2.zip";
+$filesDataFile = "$itemPath/${id}_files.xml";
  
-if (file_exists($zipFile)) {
-    $imageFormat = 'jp2';
+if (file_exists($filesDataFile)) {
+    $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
  } else {
-  $zipFile = "${subItemPath}_tif.zip";
-  if (file_exists($zipFile)) {
-    $imageFormat = 'tif';
-  }
-} // $$$ check here for tar image stack
+    BRfatal("File metadata not found!");
+}
+
+$imageStackInfo = findImageStack($subPrefix, $filesData);
+if ($imageStackInfo['imageFormat'] == 'unknown') {
+    BRfatal('Couldn\'t find image stack');
+}
+
+$imageFormat = $imageStackInfo['imageFormat'];
+$archiveFormat = $imageStackInfo['archiveFormat'];
+$imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
  
  if ("unknown" == $imageFormat) {
    BRfatal("Unknown image format");
  }
  
+if ("unknown" == $archiveFormat) {
+  BRfatal("Unknown archive format");
+}
+
+
  $scanDataFile = "${subItemPath}_scandata.xml";
  $scanDataZip  = "$itemPath/scandata.zip";
  if (file_exists($scanDataFile)) {
@@ -362,13 +372,14 @@ br.pageNums = [
  br.numLeafs = br.pageW.length;
  
  br.bookId   = '<?echo $id;?>';
-br.zip      = '<?echo $zipFile;?>';
+br.zip      = '<?echo $imageStackFile;?>';
  br.subPrefix = '<?echo $subPrefix;?>';
  br.server   = '<?echo $server;?>';
  br.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
  br.bookPath = '<?echo $subItemPath;?>';
  br.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
  br.imageFormat = '<?echo $imageFormat;?>';
+br.archiveFormat = '<?echo $archiveFormat;?>';
  
  <?
  
@@ -422,6 +433,7 @@ br.init();
  
  
  function BRFatal($string) {
+    // $$$ TODO log error
      echo "alert('$string')\n";
      die(-1);
  }
@@ -440,4 +452,28 @@ function shouldAddPage($page) {
      return true;
  }
  
+// Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
+function findImageStack($subPrefix, $filesData) {
+
+    // $$$ Add jpeg here
+    $imageFormats = array('TIFF' => 'tif', 'JP2' => 'jp2');
+    $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
+    $imageGroup = implode('|', array_keys($imageFormats));
+    $archiveGroup = implode('|', array_keys($archiveFormats));
+    $imageStackRegex = "/Single Page (Processed|Original) (${imageGroup}) (${archiveGroup})/";
+        
+    foreach ($filesData->file as $file) {        
+        if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning    
+            if (preg_match($imageStackRegex, $file->format, $matches)) {
+                return array('imageFormat' => $imageFormats[$matches[2]],
+                             'archiveFormat' => $archiveFormats[$matches[3]],
+                             'imageStackFile' => $file['name']);
+            }
+        }
+    }
+    
+    return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
+        
+}
+
  ?>
diff --git a/BookReaderIA/inc/BookReader.inc b/BookReaderIA/inc/BookReader.inc

index d61de5e..f3c1b7a 100644 (file)
--- a/BookReaderIA/inc/BookReader.inc
+++ b/BookReaderIA/inc/BookReader.inc
@@ -19,12 +19,12 @@ class BookReader
      // $$$ TODO add support for jpg and tar stacks
      // https://bugs.edge.launchpad.net/gnubook/+bug/323003
      // https://bugs.edge.launchpad.net/gnubook/+bug/385397
-    $imageFormatRegex = '@' . preg_quote($prefix, '@') . '_(jp2|tif)\.zip$@';
+    $imageFormatRegex = '@' . preg_quote($prefix, '@') . '_(jp2|tif)\.(zip|tar)$@';
      
      $baseLength = strlen($item->metadataGrabber->mainDir . '/');
      foreach ($item->getFiles() as $location => $fileInfo) {
          $filename = substr($location, $baseLength);
-
+        
          if ($checkOldScandata) {
              if ($filename == 'scandata.xml' || $filename == 'scandata.zip') {
                  $foundScandata = $filename;
@@ -39,7 +39,7 @@ class BookReader
              $foundImageStack = $filename;
          }
      }
-        
+    
      if ($foundScandata && $foundImageStack) {
          return true;
      }
author	Michael Ang <mang@archive.org>
	Wed, 20 Jan 2010 23:58:51 +0000 (23:58 +0000)
committer	Michael Ang <mang@archive.org>
	Wed, 20 Jan 2010 23:58:51 +0000 (23:58 +0000)
BookReaderIA/datanode/BookReaderImages.php		patch \| blob \| history
BookReaderIA/datanode/BookReaderJSIA.php		patch \| blob \| history
BookReaderIA/inc/BookReader.inc		patch \| blob \| history