Refactor BookReaderImages and BookReaderMeta. New BookReaderPreview to provide acces...

[bookreader.git] / BookReaderIA / datanode / BookReaderJSIA.php
diff --git a/BookReaderIA/datanode/BookReaderJSIA.php b/BookReaderIA/datanode/BookReaderJSIA.php

old mode 100755 (executable)

new mode 100644 (file)

index e2c1528..e549d44
--- a/BookReaderIA/datanode/BookReaderJSIA.php
+++ b/BookReaderIA/datanode/BookReaderJSIA.php
@@ -48,28 +48,38 @@ if ("" == $server) {
      BRFatal("No server specified!");
  }
  
-if (!preg_match("|^/[0-3]/items/{$id}$|", $itemPath)) {
+if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
      BRFatal("Bad id!");
  }
  
  // XXX check here that subitem is okay
  
-$imageFormat = 'unknown';
-$zipFile = "${subItemPath}_jp2.zip";
+$filesDataFile = "$itemPath/${id}_files.xml";
  
-if (file_exists($zipFile)) {
-    $imageFormat = 'jp2';
+if (file_exists($filesDataFile)) {
+    $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
  } else {
-  $zipFile = "${subItemPath}_tif.zip";
-  if (file_exists($zipFile)) {
-    $imageFormat = 'tif';
-  }
-} // $$$ check here for tar image stack
+    BRfatal("File metadata not found!");
+}
+
+$imageStackInfo = findImageStack($subPrefix, $filesData);
+if ($imageStackInfo['imageFormat'] == 'unknown') {
+    BRfatal('Couldn\'t find image stack');
+}
+
+$imageFormat = $imageStackInfo['imageFormat'];
+$archiveFormat = $imageStackInfo['archiveFormat'];
+$imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
  
  if ("unknown" == $imageFormat) {
    BRfatal("Unknown image format");
  }
  
+if ("unknown" == $archiveFormat) {
+  BRfatal("Unknown archive format");
+}
+
+
  $scanDataFile = "${subItemPath}_scandata.xml";
  $scanDataZip  = "$itemPath/scandata.zip";
  if (file_exists($scanDataFile)) {
@@ -214,6 +224,16 @@ br.getPageNum = function(index) {
      }
  }
  
+// Single images in the Internet Archive scandata.xml metadata are (somewhat incorrectly)
+// given a "leaf" number.  Some of these images from the scanning process should not
+// be displayed in the BookReader (for example colour calibration cards).  Since some
+// of the scanned images will not be displayed in the BookReader (those marked with
+// addToAccessFormats false in the scandata.xml) leaf numbers and BookReader page
+// indexes are generally not the same.  This function returns the BookReader page
+// index given a scanned leaf number.
+//
+// This function is used, for example, to map between search results (that use the
+// leaf numbers) and the displayed pages in the BookReader.
  br.leafNumToIndex = function(leafNum) {
      for (var index = 0; index < this.leafMap.length; index++) {
          if (this.leafMap[index] == leafNum) {
@@ -301,7 +321,7 @@ br.getEmbedCode = function() {
      return "<iframe src='" + this.getEmbedURL() + "' width='480px' height='430px'></iframe>";
  }
  
-br.pageW =             [
+br.pageW =  [
              <?
              $i=0;
              foreach ($scanData->pageData->page as $page) {
@@ -314,7 +334,7 @@ br.pageW =          [
              ?>
              ];
  
-br.pageH =             [
+br.pageH =  [
              <?
              $totalHeight = 0;
              $i=0;            
@@ -362,13 +382,14 @@ br.pageNums = [
  br.numLeafs = br.pageW.length;
  
  br.bookId   = '<?echo $id;?>';
-br.zip      = '<?echo $zipFile;?>';
+br.zip      = '<?echo $imageStackFile;?>';
  br.subPrefix = '<?echo $subPrefix;?>';
  br.server   = '<?echo $server;?>';
  br.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
  br.bookPath = '<?echo $subItemPath;?>';
  br.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
  br.imageFormat = '<?echo $imageFormat;?>';
+br.archiveFormat = '<?echo $archiveFormat;?>';
  
  <?
  
@@ -422,6 +443,7 @@ br.init();
  
  
  function BRFatal($string) {
+    // $$$ TODO log error
      echo "alert('$string')\n";
      die(-1);
  }
@@ -440,4 +462,34 @@ function shouldAddPage($page) {
      return true;
  }
  
+// Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
+function findImageStack($subPrefix, $filesData) {
+
+    // $$$ The order of the image formats determines which will be returned first
+    $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
+    $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
+    $imageGroup = implode('|', array_keys($imageFormats));
+    $archiveGroup = implode('|', array_keys($archiveFormats));
+    // $$$ Currently only return processed images
+    $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
+        
+    foreach ($filesData->file as $file) {        
+        if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
+            if (preg_match($imageStackRegex, $file->format, $matches)) {
+            
+                // Make sure we have a regular image stack
+                $imageFormat = $imageFormats[$matches[2]];
+                if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {            
+                    return array('imageFormat' => $imageFormat,
+                                 'archiveFormat' => $archiveFormats[$matches[3]],
+                                 'imageStackFile' => $file['name']);
+                }
+            }
+        }
+    }
+    
+    return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
+        
+}
+
  ?>