BookReaderIA/inc/FlipSearchMap.inc

   1 <?php
   2
   3 /**
   4  * This here puppy is a bandaid ontop of the existing flip book ajax
   5  * javascript/php implementation to incorporate the ability to exclude
   6  * add-to-access=false pages from the flip book and still allow search
   7  * functionality to work properly.
   8  *
   9  * We anticipate a bigger and better flip reader to come around soon so we
  10  * didn't jump in and re-engineer the internals at this juncture
  11  *
  12  * The basic idea is we build a map between the new indexes of a sequential
  13  * ordering of all add-to-access=true pages and the existing indices
  14  * associated with the actual image, scandata, and djvu.xml pages. We use said
  15  * map when we (1) construct (an thus renumber and restructure) the flip.zip
  16  * image sequence and (2) when we associate search results with one such flip
  17  * book image
  18  *
  19  *
  20  **/
  21
  22 class FlipSearchMap {
  23
  24   private $flipToLeafMap;
  25   private $leafToFlipMap;
  26   private $identifier;
  27   private $count;
  28   private $startIndex;
  29   private $useScandata;
  30   # $scandata is a DomDocument object
  31   public function __construct($scandata=null,$identifier=null) {
  32     $this->useScandata = isset($scandata);
  33     if (!$this->useScandata) return;
  34
  35     $this->identifier = $identifier;
  36     $xpath = new DOMXPath($scandata);
  37     $xpath->registerNamespace("s", "http://archive.org/scribe/xml");
  38     $squery = '//s:book/s:pageData/s:page';
  39     $query = '//book/pageData/page';
  40     $pages = $xpath->query($squery);
  41
  42     if($pages->length == 0) {
  43       $pages = $xpath->query($query);//Hack, xmlns not always supplied in scandata
  44     }
  45     $index = null;
  46     foreach ($pages as $page) { // Find the first access format page
  47
  48       $access = strtolower(trim($page->getElementsByTagName('addToAccessFormats')->item(0)->nodeValue));
  49
  50       if("true" != $access) {
  51         continue;
  52       }
  53
  54       $elems = $page->getElementsByTagName('handSide');
  55       $hand = (($elems->length == 0) ? 'none' : strtolower(trim($elems->item(0)->nodeValue)));
  56
  57       if     ("left" == $hand)
  58         { $index = 0; break; }
  59       elseif ("right" == $hand)
  60         { $index = 1; break; }
  61       else
  62         { $index = 0; break;
  63         }
  64     }
  65
  66     if(!isset($index)) {
  67       //echo "Warning: No addToAccessFormats=true in scandata, using all leafs\n";
  68       $index = 2;//The javascript bookreader must start at 2
  69     }
  70     $this->startIndex = $index;
  71     $this->flipToLeafMap = array(null,null,null);
  72     $this->leafToFlipMap = array();
  73
  74     foreach ($pages as $page) {
  75       $access = strtolower(trim($page->getElementsByTagName('addToAccessFormats')->item(0)->nodeValue));
  76       $this->leafToFlipMap[$page->getAttribute('leafNum')] = null;
  77       if("true" == $access) {
  78         //echo "Flip $index is leaf {$page->getAttribute('leafNum')} on {$page->getElementsByTagName('handSide')->item(0)->nodeValue}\n";
  79         $this->leafToFlipMap[$page->getAttribute('leafNum')] = sprintf("%04d", $index);
  80         $this->flipToLeafMap[$index++] = intval($page->getAttribute('leafNum'));
  81         $this->count++;
  82         //echo "Pagecount {$this->getPageCount()}\n";
  83       }
  84     }
  85   }
  86
  87   function useScandata() {return   $this->useScandata;}
  88   function getPageCount() {return $this->count;}
  89   function getStartIndex() {return $this->startIndex;}
  90   function getAllMappedLeaves() {
  91     // array_slice to skip non-access pages at the beginning, array_filter to remove any nulls
  92     // that remain from initialization of the map, and array_values to resequence the keys
  93     return array_values(array_filter(array_slice($this->flipToLeafMap, $this->startIndex),
  94                                      'is_numeric'));
  95   }
  96
  97   function flipToLeaf($i) {
  98     if($i < $this->startIndex || $i > $this->getPageCount() + $this->startIndex)
  99       throw new Exception("Flip map index $i out of bounds");
 100     return $this->flipToLeafMap[$i];
 101   }
 102
 103   /**
 104    * This takes the result of the flipbook_search.php call, and drops
 105    * non-access pages and remaps the page indexes
 106    **/
 107   function remapSearch($result) {
 108     if(!$this->useScandata) return $result;
 109     $naming = new Naming($this->identifier);
 110     $dom = new DomDocument();
 111     $dom->loadXML($result);
 112     $xpath = new DOMXPath($dom);
 113     $query = '//PAGE';
 114     $pages = $xpath->query($query);
 115     foreach($pages as $page) {
 116       $name = $page->getAttribute('file');
 117       $leaf = intval($naming->imageIndex('DJVU_XML',$name));
 118       $idx = $this->leafToFlipMap[$leaf];
 119       if(isset($idx)) {
 120         //echo "Reverse mapping $leaf as $idx\n";
 121         $page->setAttribute('file',"_$idx");//The flipbook js regex looks for the "_"
 122       }
 123       else {
 124         $page->parentNode->removeChild($page);
 125       }
 126     }
 127     return $dom->saveXML();
 128
 129   }
 130   /**
 131    * A utility for creating the FlipSearchMap from within flipbook_search.php.
 132    * $searchUrl is provided to flipbook_search.php by the flipbook.php ajax
 133    * interface. We decode it, find the scandata, and create a FlipSearchMap
 134    * from it.
 135    **/
 136   static function buildSearchMap($searchUrl) {
 137     $unzip    = configGetValue('bin-unzip');
 138     $url = urldecode($searchUrl);
 139     //Another way is for the javascript code to pass in server, itemdir, and identifier directly
 140     //For now we'll parse the $url passed us.
 141     #if (!preg_match('|http://\w+.archive.org(/[0-9]+/items/\w+)/(\w+)_djvu.xml$|', $url, $match))
 142     #if (!preg_match('|(\w+)/(\w+)_djvu.xml$|', $url, $match))
 143     if (!preg_match('|(/[0-9]+/items/[\w-]+)/([\w-]+)_djvu.xml$|', $url, $match))
 144       fatal("Can't get server and identifier from url $url");
 145     $bookDir = $match[1];
 146     $identifier = $match[2];
 147     //Look for the marker indicating a "new" flipbook with scandata usage.
 148     $naming = new Naming($identifier);
 149     $zipName = $naming->zipName('FLIP_JPG');
 150     $handle = popen("$unzip -p $bookDir/$zipName config.xml",'r');
 151     $config = fgets($handle);
 152     pclose($handle);
 153     if("" != $config) {
 154       $dom = Scandata::getScandataDomDocument($bookDir,$identifier);
 155       return new FlipSearchMap($dom,$identifier);
 156     }
 157     else {
 158       return new FlipSearchMap();
 159     }
 160 }
 161
 162 }
 163
 164 ?>