Work in progress on retrieving table of contents from Open Library

[bookreader.git] / BookReaderIA / datanode / BookReaderJSIA.php
diff --git a/BookReaderIA/datanode/BookReaderJSIA.php b/BookReaderIA/datanode/BookReaderJSIA.php

old mode 100755 (executable)

new mode 100644 (file)

index e2c1528..8b48238
--- a/BookReaderIA/datanode/BookReaderJSIA.php
+++ b/BookReaderIA/datanode/BookReaderJSIA.php
@@ -23,10 +23,17 @@ $itemPath = $_REQUEST['itemPath'];
  $subPrefix = $_REQUEST['subPrefix'];
  $server = $_REQUEST['server'];
  
+// $$$mang this code has been refactored into BookReaderMeta.inc.php for use e.g. by
+//         BookReaderPreview.php and BookReaderImages.php.  The code below should be
+//         taken out and replaced by calls into BookReaderMeta
+
  // Check if we're on a dev vhost and point to JSIA in the user's public_html on the datanode
+
  // $$$ TODO consolidate this logic
  if (strpos($_SERVER["REQUEST_URI"], "/~mang") === 0) { // Serving out of home dir
      $server .= ':80/~mang';
+} else if (strpos($_SERVER["REQUEST_URI"], "/~rkumar") === 0) { // Serving out of home dir
+    $server .= ':80/~rkumar';
  } else if (strpos($_SERVER["REQUEST_URI"], "/~testflip") === 0) { // Serving out of home dir
      $server .= ':80/~testflip';
  }
@@ -48,28 +55,38 @@ if ("" == $server) {
      BRFatal("No server specified!");
  }
  
-if (!preg_match("|^/[0-3]/items/{$id}$|", $itemPath)) {
+if (!preg_match("|^/\d+/items/{$id}$|", $itemPath)) {
      BRFatal("Bad id!");
  }
  
  // XXX check here that subitem is okay
  
-$imageFormat = 'unknown';
-$zipFile = "${subItemPath}_jp2.zip";
+$filesDataFile = "$itemPath/${id}_files.xml";
  
-if (file_exists($zipFile)) {
-    $imageFormat = 'jp2';
+if (file_exists($filesDataFile)) {
+    $filesData = simplexml_load_file("$itemPath/${id}_files.xml");
  } else {
-  $zipFile = "${subItemPath}_tif.zip";
-  if (file_exists($zipFile)) {
-    $imageFormat = 'tif';
-  }
-} // $$$ check here for tar image stack
+    BRfatal("File metadata not found!");
+}
+
+$imageStackInfo = findImageStack($subPrefix, $filesData);
+if ($imageStackInfo['imageFormat'] == 'unknown') {
+    BRfatal('Couldn\'t find image stack');
+}
+
+$imageFormat = $imageStackInfo['imageFormat'];
+$archiveFormat = $imageStackInfo['archiveFormat'];
+$imageStackFile = $itemPath . "/" . $imageStackInfo['imageStackFile'];
  
  if ("unknown" == $imageFormat) {
    BRfatal("Unknown image format");
  }
  
+if ("unknown" == $archiveFormat) {
+  BRfatal("Unknown archive format");
+}
+
+
  $scanDataFile = "${subItemPath}_scandata.xml";
  $scanDataZip  = "$itemPath/scandata.zip";
  if (file_exists($scanDataFile)) {
@@ -214,6 +231,16 @@ br.getPageNum = function(index) {
      }
  }
  
+// Single images in the Internet Archive scandata.xml metadata are (somewhat incorrectly)
+// given a "leaf" number.  Some of these images from the scanning process should not
+// be displayed in the BookReader (for example colour calibration cards).  Since some
+// of the scanned images will not be displayed in the BookReader (those marked with
+// addToAccessFormats false in the scandata.xml) leaf numbers and BookReader page
+// indexes are generally not the same.  This function returns the BookReader page
+// index given a scanned leaf number.
+//
+// This function is used, for example, to map between search results (that use the
+// leaf numbers) and the displayed pages in the BookReader.
  br.leafNumToIndex = function(leafNum) {
      for (var index = 0; index < this.leafMap.length; index++) {
          if (this.leafMap[index] == leafNum) {
@@ -301,7 +328,36 @@ br.getEmbedCode = function() {
      return "<iframe src='" + this.getEmbedURL() + "' width='480px' height='430px'></iframe>";
  }
  
-br.pageW =             [
+// getOpenLibraryJSON
+br.getOpenLibraryJSON = function(callback) {
+    // Try looking up by ocaid first, then by source_record
+    
+    var jsonURL = 'http://openlibrary.org/query.json?type=/type/edition&*=&ocaid=' + br.bookId;
+    $.ajax({
+        url: jsonURL,
+        success: function(data) {
+            if (data && data.length > 0) {
+                callback(data[0]);
+            } else {
+                // try sourceid
+                console.log('XXXmang couldnt find via ocaid');
+                jsonURL = 'http://openlibrary.org/query.json?type=/type/edition&*=&source_records=ia:' + br.bookId;
+                $.ajax({
+                    url: jsonURL,
+                    success: function(data) {
+                        if (data && data.length > 0) {
+                            callback(data[0]);
+                        }
+                    },
+                    dataType: 'jsonp'
+                });
+            }
+        },
+        dataType: 'jsonp'
+    });
+}
+
+br.pageW =  [
              <?
              $i=0;
              foreach ($scanData->pageData->page as $page) {
@@ -314,7 +370,7 @@ br.pageW =          [
              ?>
              ];
  
-br.pageH =             [
+br.pageH =  [
              <?
              $totalHeight = 0;
              $i=0;            
@@ -362,13 +418,14 @@ br.pageNums = [
  br.numLeafs = br.pageW.length;
  
  br.bookId   = '<?echo $id;?>';
-br.zip      = '<?echo $zipFile;?>';
+br.zip      = '<?echo $imageStackFile;?>';
  br.subPrefix = '<?echo $subPrefix;?>';
  br.server   = '<?echo $server;?>';
  br.bookTitle= '<?echo preg_replace("/\'/", "\\'", $metaData->title);?>';
  br.bookPath = '<?echo $subItemPath;?>';
  br.bookUrl  = '<?echo "http://www.archive.org/details/$id";?>';
  br.imageFormat = '<?echo $imageFormat;?>';
+br.archiveFormat = '<?echo $archiveFormat;?>';
  
  <?
  
@@ -422,6 +479,7 @@ br.init();
  
  
  function BRFatal($string) {
+    // $$$ TODO log error
      echo "alert('$string')\n";
      die(-1);
  }
@@ -440,4 +498,34 @@ function shouldAddPage($page) {
      return true;
  }
  
+// Returns { 'imageFormat' => , 'archiveFormat' => '} given a sub-item prefix and loaded xml data
+function findImageStack($subPrefix, $filesData) {
+
+    // $$$ The order of the image formats determines which will be returned first
+    $imageFormats = array('JP2' => 'jp2', 'TIFF' => 'tif', 'JPEG' => 'jpg');
+    $archiveFormats = array('ZIP' => 'zip', 'Tar' => 'tar');
+    $imageGroup = implode('|', array_keys($imageFormats));
+    $archiveGroup = implode('|', array_keys($archiveFormats));
+    // $$$ Currently only return processed images
+    $imageStackRegex = "/Single Page (Processed) (${imageGroup}) (${archiveGroup})/";
+        
+    foreach ($filesData->file as $file) {        
+        if (strpos($file['name'], $subPrefix) === 0) { // subprefix matches beginning
+            if (preg_match($imageStackRegex, $file->format, $matches)) {
+            
+                // Make sure we have a regular image stack
+                $imageFormat = $imageFormats[$matches[2]];
+                if (strpos($file['name'], $subPrefix . '_' . $imageFormat) === 0) {            
+                    return array('imageFormat' => $imageFormat,
+                                 'archiveFormat' => $archiveFormats[$matches[3]],
+                                 'imageStackFile' => $file['name']);
+                }
+            }
+        }
+    }
+    
+    return array('imageFormat' => 'unknown', 'archiveFormat' => 'unknown', 'imageStackFile' => 'unknown');
+        
+}
+
  ?>