Merge commit 'ol/newui' into newui

author rajbot <raj@archive.org>

Tue, 12 Oct 2010 21:38:47 +0000 (21:38 +0000)

committer rajbot <raj@archive.org>

Tue, 12 Oct 2010 21:38:47 +0000 (21:38 +0000)
author rajbot <raj@archive.org>
Tue, 12 Oct 2010 21:38:47 +0000 (21:38 +0000)
committer rajbot <raj@archive.org>
Tue, 12 Oct 2010 21:38:47 +0000 (21:38 +0000)
diff --git a/BookReader/BookReader.css b/BookReader/BookReader.css

index 2f8194d..4774b81 100644 (file)
--- a/BookReader/BookReader.css
+++ b/BookReader/BookReader.css
@@ -678,3 +678,12 @@ div#BRzoombtn {
      left: 0;
      background: url("images/icon_zoomer.png");
  }
+
+.BRttsPopUp {
+    position: absolute;
+    background-color: #E6E4E1;
+    background-image: url(images/progressbar.gif);
+    background-repeat:no-repeat;
+    font-size: 0.8em; 
+    z-index: 3;    
+}
diff --git a/BookReader/BookReader.js b/BookReader/BookReader.js

index abc931b..36379a1 100644 (file)
--- a/BookReader/BookReader.js
+++ b/BookReader/BookReader.js
@@ -123,6 +123,14 @@ function BookReader() {
          // embed/share ui
          // info ui
      };
+
+    // Text-to-Speech params
+    this.ttsPlaying     = false;
+    this.ttsIndex       = null;  //leaf index
+    this.ttsPosition    = -1;    //chunk (paragraph) number
+    this.ttsBuffering   = false;
+    this.ttsPoller      = null;
+    this.ttsFormat      = null;
      
      return this;
  };
@@ -1297,6 +1305,7 @@ BookReader.prototype.switchMode = function(mode) {
      }
  
      this.autoStop();
+    this.ttsStop();
      this.removeSearchHilites();
  
      this.mode = mode;
@@ -1579,12 +1588,14 @@ BookReader.prototype.prepareTwoPagePopUp = function() {
  
      $(this.leafEdgeL).bind('click', this, function(e) { 
          e.data.autoStop();
+        e.data.ttsStop();
          var jumpIndex = e.data.jumpIndexForLeftEdgePageX(e.pageX);
          e.data.jumpToIndex(jumpIndex);
      });
  
      $(this.leafEdgeR).bind('click', this, function(e) { 
          e.data.autoStop();
+        e.data.ttsStop();
          var jumpIndex = e.data.jumpIndexForRightEdgePageX(e.pageX);
          e.data.jumpToIndex(jumpIndex);    
      });
@@ -2323,6 +2334,7 @@ BookReader.prototype.setMouseHandlers2UP = function() {
      this.setClickHandler2UP( this.prefetchedImgs[this.twoPage.currentIndexL],
          { self: this },
          function(e) {
+            e.data.self.ttsStop();
              e.data.self.left();
              e.preventDefault();
          }
@@ -2331,6 +2343,7 @@ BookReader.prototype.setMouseHandlers2UP = function() {
      this.setClickHandler2UP( this.prefetchedImgs[this.twoPage.currentIndexR],
          { self: this },
          function(e) {
+            e.data.self.ttsStop();
              e.data.self.right();
              e.preventDefault();
          }
@@ -2843,28 +2856,7 @@ BookReader.prototype.updateSearchHilites2UP = function() {
                  //console.log('appending ' + key);
              }
  
-            // We calculate the reduction factor for the specific page because it can be different
-            // for each page in the spread
-            var height = this._getPageHeight(key);
-            var width  = this._getPageWidth(key)
-            var reduce = this.twoPage.height/height;
-            var scaledW = parseInt(width*reduce);
-            
-            var gutter = this.twoPageGutter();
-            var pageL;
-            if ('L' == this.getPageSide(key)) {
-                pageL = gutter-scaledW;
-            } else {
-                pageL = gutter;
-            }
-            var pageT  = this.twoPageTop();
-            
-            $(result.div).css({
-                width:  (result.r-result.l)*reduce + 'px',
-                height: (result.b-result.t)*reduce + 'px',
-                left:   pageL+(result.l)*reduce + 'px',
-                top:    pageT+(result.t)*reduce +'px'
-            });
+            this.setHilightCss2UP(result.div, key, result.l, result.r, result.t, result.b);
  
          } else {
              //console.log(key + ' not displayed');
@@ -2877,6 +2869,35 @@ BookReader.prototype.updateSearchHilites2UP = function() {
      }
  }
  
+// setHilightCss2UP()
+//______________________________________________________________________________
+//position calculation shared between search and text-to-speech functions
+BookReader.prototype.setHilightCss2UP = function(div, index, left, right, top, bottom) {
+
+    // We calculate the reduction factor for the specific page because it can be different
+    // for each page in the spread
+    var height = this._getPageHeight(index);
+    var width  = this._getPageWidth(index)
+    var reduce = this.twoPage.height/height;
+    var scaledW = parseInt(width*reduce);
+    
+    var gutter = this.twoPageGutter();
+    var pageL;
+    if ('L' == this.getPageSide(index)) {
+        pageL = gutter-scaledW;
+    } else {
+        pageL = gutter;
+    }
+    var pageT  = this.twoPageTop();
+    
+    $(div).css({
+        width:  (right-left)*reduce + 'px',
+        height: (bottom-top)*reduce + 'px',
+        left:   pageL+left*reduce + 'px',
+        top:    pageT+top*reduce +'px'
+    });
+}
+
  // removeSearchHilites()
  //______________________________________________________________________________
  BookReader.prototype.removeSearchHilites = function() {
@@ -2970,6 +2991,12 @@ BookReader.prototype.updatePrintFrame = function(delta) {
  // showEmbedCode()
  //______________________________________________________________________________
  BookReader.prototype.showEmbedCode = function() {
+    if (null != this.embedPopup) { // check if already showing
+        return;
+    }
+    this.autoStop();
+    this.ttsStop();
+
      this.embedPopup = document.createElement("div");
      $(this.embedPopup).css({
          position: 'absolute',
@@ -3720,6 +3747,11 @@ BookReader.prototype.bindToolbarNavHandlers = function(jToolbar) {
          self.rightmost();
          return false;
      });
+
+    jToolbar.find('.read').click(function(e) {
+        self.ttsToggle();
+        return false;
+    });
      
      // $$$mang cleanup
      $('#BRzoomer .zoom_in').bind('click', function() {
@@ -4143,6 +4175,8 @@ BookReader.prototype.startLocationPolling = function() {
              if (newHash != self.oldUserHash) { // Only process new user hash once
                  //console.log('url change detected ' + self.oldLocationHash + " -> " + newHash);
                  
+                self.ttsStop();
+                
                  // Queue change if animating
                  if (self.animating) {
                      self.autoStop();
@@ -4304,3 +4338,413 @@ BookReader.util = {
      }
      // The final property here must NOT have a comma after it - IE7
  }
+
+
+// ttsToggle()
+//______________________________________________________________________________
+BookReader.prototype.ttsToggle = function () {
+    if (false == this.ttsPlaying) {        
+        if(soundManager.supported()) {
+            this.ttsStart();            
+        } else {               
+            soundManager.onready(function(oStatus) {
+              if (oStatus.success) {                
+                this.ttsStart();
+              } else {
+                alert('Could not load soundManger2, possibly due to FlashBlock. Audio playback is disabled');
+              }
+            }, this);        
+        }
+    } else {
+        this.ttsStop();
+    }
+}
+
+// ttsStart()
+//______________________________________________________________________________
+BookReader.prototype.ttsStart = function () {
+    if (soundManager.debugMode) console.log('starting readAloud');
+    if (this.constModeThumb == this.mode) this.switchMode(this.constMode1up);
+    
+    this.ttsPlaying = true;
+    this.ttsIndex = this.currentIndex();
+    this.ttsFormat = 'mp3';
+    if ($.browser.mozilla) {
+        this.ttsFormat = 'ogg';
+    }
+    this.ttsGetText(this.ttsIndex, 'ttsStartCB');    
+}
+
+// ttsStop()
+//______________________________________________________________________________
+BookReader.prototype.ttsStop = function () {
+    if (false == this.ttsPlaying) return;
+    
+    if (soundManager.debugMode) console.log('stopping readaloud');
+    soundManager.stopAll();
+    soundManager.destroySound('chunk'+this.ttsIndex+'-'+this.ttsPosition);
+    this.ttsRemoveHilites();
+    this.ttsRemovePopup();
+
+    this.ttsPlaying     = false;
+    this.ttsIndex       = null;  //leaf index
+    this.ttsPosition    = -1;    //chunk (paragraph) number
+    this.ttsBuffering   = false;
+    this.ttsPoller      = null;
+}
+
+// ttsGetText()
+//______________________________________________________________________________
+BookReader.prototype.ttsGetText = function(index, callback) {
+    var url = 'http://'+this.server+'/BookReader/BookReaderGetTextWrapper.php?path='+this.bookPath+'_djvu.xml&page='+index;    
+    this.ttsAjax = $.ajax({url:url, dataType:'jsonp', jsonpCallback:callback});
+}
+
+// ttsStartCB(): text-to-speech callback
+//______________________________________________________________________________
+BookReader.prototype.ttsStartCB = function (data) {
+    if (soundManager.debugMode)  console.log('ttsStartCB got data: ' + data);
+    this.ttsChunks = data;
+    this.ttsHilites = [];
+    
+    //deal with the page being blank
+    if (0 == data.length) {
+        if (soundManager.debugMode) console.log('first page is blank!');
+        if(this.ttsAdvance(true)) {
+            this.ttsGetText(this.ttsIndex, 'ttsStartCB');            
+        }
+        return;
+    }
+    
+    this.ttsShowPopup();
+    
+    ///// whileloading: broken on safari
+    ///// onload fires on safari, but *after* the sound starts playing..
+    this.ttsPosition = -1;    
+    var snd = soundManager.createSound({
+     id: 'chunk'+this.ttsIndex+'-0',
+     //url: 'http://home.us.archive.org/~rkumar/arctic.ogg',
+     url: 'http://'+this.server+'/BookReader/BookReaderGetTTS.php?string=' + escape(data[0][0]) + '&format=.'+this.ttsFormat, //the .ogg is to trick SoundManager2 to use the HTML5 audio player
+     whileloading: function(){if (this.bytesLoaded == this.bytesTotal) this.br.ttsRemovePopup();}, //onload never fires in FF...
+     onload: function(){this.br.ttsRemovePopup();} //whileloading never fires in safari...
+    });    
+    snd.br = this;
+    snd.load();
+
+    this.ttsNextChunk();
+}
+
+// ttsShowPopup
+//______________________________________________________________________________
+BookReader.prototype.ttsShowPopup = function() {
+    if (soundManager.debugMode) console.log('ttsShowPopup index='+this.ttsIndex+' pos='+this.ttsPosition);
+    
+    this.popup = document.createElement("div");
+    $(this.popup).css({
+        top:      $('#BRtoolbar').height() + 'px',
+        left:     $('#BookReader').width()-220 + 'px',
+        width:    '220px',
+        height:   '20px',
+    }).attr('className', 'BRttsPopUp').appendTo('#BookReader');
+
+    htmlStr =  '&nbsp;';
+
+    this.popup.innerHTML = htmlStr;
+}
+
+// ttsRemovePopup
+//______________________________________________________________________________
+BookReader.prototype.ttsRemovePopup = function() {
+    $(this.popup).remove(); 
+    this.popup=null;
+}
+
+// ttsNextPageCB
+//______________________________________________________________________________
+BookReader.prototype.ttsNextPageCB = function (data) {
+    this.ttsNextChunks = data;
+    if (soundManager.debugMode) console.log('preloaded next chunks.. data is ' + data);
+    
+    if (true == this.ttsBuffering) {
+        if (soundManager.debugMode) console.log('ttsNextPageCB: ttsBuffering is true');
+        this.ttsBuffering = false;
+    }
+}
+
+// ttsLoadChunk
+//______________________________________________________________________________
+BookReader.prototype.ttsLoadChunk = function (page, pos, string) {
+    var snd = soundManager.createSound({
+     id: 'chunk'+page+'-'+pos,
+     url: 'http://'+this.server+'/BookReader/BookReaderGetTTS.php?string=' + escape(string) + '&format=.'+this.ttsFormat //the .ogg is to trick SoundManager2 to use the HTML5 audio player
+    });
+    snd.br = this;
+    snd.load()
+}
+
+
+// ttsNextChunk()
+//______________________________________________________________________________
+// This function into two parts: ttsNextChunk gets run before page flip animation
+// and ttsNextChunkPhase2 get run after page flip animation.
+// If a page flip is necessary, ttsAdvance() will return false so Phase2 isn't
+// called. Instead, this.animationFinishedCallback is set, so that Phase2
+// continues after animation is finished.
+
+BookReader.prototype.ttsNextChunk = function () {
+    if (soundManager.debugMode) console.log('nextchunk pos=' + this.ttsPosition);
+    
+    if (-1 != this.ttsPosition) {
+        soundManager.destroySound('chunk'+this.ttsIndex+'-'+this.ttsPosition);
+    }
+
+    this.ttsRemoveHilites(); //remove old hilights
+        
+    var moreToPlay = this.ttsAdvance();
+    
+    if (moreToPlay) {
+        this.ttsNextChunkPhase2();
+    }    
+    
+    //This function is called again when ttsPlay() has finished playback.
+    //If the next chunk of text has not yet finished loading, ttsPlay()
+    //will start polling until the next chunk is ready.
+}
+
+// ttsNextChunkPhase2()
+//______________________________________________________________________________
+// page flip animation has now completed
+BookReader.prototype.ttsNextChunkPhase2 = function () {
+    if (null == this.ttsChunks) {
+        alert('error: ttsChunks is null?'); //TODO
+        return;
+    }
+    
+    if (0 == this.ttsChunks.length) {
+        if (soundManager.debugMode) console.log('ttsNextChunk2: ttsChunks.length is zero.. hacking...');
+        this.ttsStartCB(this.ttsChunks);
+        return;
+    }
+    
+    if (soundManager.debugMode) console.log('next chunk is ' + this.ttsPosition);  
+
+    //prefetch next page of text
+    if (0 == this.ttsPosition) {
+        if (this.ttsIndex<(this.numLeafs-1)) {
+            this.ttsGetText(this.ttsIndex+1, 'ttsNextPageCB');
+        }
+    }
+    
+    this.ttsPrefetchAudio();
+    
+    this.ttsPlay();
+}
+
+// ttsAdvance()
+//______________________________________________________________________________
+// 1. advance ttsPosition
+// 2. if necessary, advance ttsIndex, and copy ttsNextChunks to ttsChunks
+// 3. if necessary, flip to current page, or scroll so chunk is visible
+// 4. do something smart is ttsNextChunks has not yet finished preloading (TODO)
+// 5. stop playing at end of book
+
+BookReader.prototype.ttsAdvance = function (starting) {
+    this.ttsPosition++;
+
+    if (this.ttsPosition >= this.ttsChunks.length) {
+        
+        if (this.ttsIndex == (this.numLeafs-1)) {
+            if (soundManager.debugMode) console.log('tts stop');
+            return false;
+        } else {
+            if ((null != this.ttsNextChunks) || (starting)) {
+                if (soundManager.debugMode) console.log('moving to next page!');
+                this.ttsIndex++;
+                this.ttsPosition = 0;
+                this.ttsChunks = this.ttsNextChunks;
+                this.ttsNextChunks = null;
+
+                //A page flip might be necessary. This code is confusing since
+                //ttsNextChunks might be null if we are starting on a blank page.
+                if (2 == this.mode) {
+                    if ((this.ttsIndex != this.twoPage.currentIndexL) && (this.ttsIndex != this.twoPage.currentIndexR)) {
+                        if (!starting) {
+                            this.animationFinishedCallback = this.ttsNextChunkPhase2;
+                            this.next();
+                            return false;
+                        } else {
+                            this.next();
+                            return true;
+                        }
+                    } else {
+                        return true;
+                    }
+                }
+            } else {
+                if (soundManager.debugMode) console.log('ttsAdvance: ttsNextChunks is null');
+                return false; 
+            }
+        }
+    }
+        
+    return true;
+}
+
+// ttsPrefetchAudio()
+//______________________________________________________________________________
+BookReader.prototype.ttsPrefetchAudio = function () {
+
+    if(false != this.ttsBuffering) {
+        alert('TTS Error: prefetch() called while content still buffering!');
+        return;
+    }    
+
+    //preload next chunk
+    var nextPos = this.ttsPosition+1;
+    if (nextPos < this.ttsChunks.length) {     
+        this.ttsLoadChunk(this.ttsIndex, nextPos, this.ttsChunks[nextPos][0]);
+    } else {
+        //for a short page, preload might nt have yet returned..
+        if (soundManager.debugMode) console.log('preloading chunk 0 from next page, index='+(this.ttsIndex+1));
+        if (null != this.ttsNextChunks) {
+            if (0 != this.ttsNextChunks.length) {
+                this.ttsLoadChunk(this.ttsIndex+1, 0, this.ttsNextChunks[0][0]);        
+            } else {
+                if (soundManager.debugMode) console.log('prefetchAudio(): ttsNextChunks is zero length!');
+            }
+        } else {
+            if (soundManager.debugMode) console.log('ttsNextChunks is null, not preloading next page');
+            this.ttsBuffering = true;
+        }
+    }
+
+}
+
+// ttsPlay()
+//______________________________________________________________________________
+BookReader.prototype.ttsPlay = function () {
+        
+    var chunk = this.ttsChunks[this.ttsPosition];
+    if (soundManager.debugMode) {
+        console.log('ttsPlay position = ' + this.ttsPosition);
+        console.log('chunk = ' + chunk);
+        console.log(this.ttsChunks);
+    }
+    
+    //add new hilights
+    if (2 == this.mode) {
+        this.ttsHilite2UP(chunk);
+    } else {
+        this.ttsHilite1UP(chunk);
+    }
+    
+    this.ttsScrollToChunk(chunk);
+        
+    //play current chunk
+    if (false == this.ttsBuffering) {        
+        soundManager.play('chunk'+this.ttsIndex+'-'+this.ttsPosition,{onfinish:function(){br.ttsNextChunk();}});
+    } else {
+        soundManager.play('chunk'+this.ttsIndex+'-'+this.ttsPosition,{onfinish:function(){br.ttsStartPolling();}});
+    }
+}
+
+// scrollToChunk()
+//______________________________________________________________________________
+BookReader.prototype.ttsScrollToChunk = function(chunk) {
+    if (this.constMode1up != this.mode) return;
+
+    var leafTop = 0;
+    var h;
+    var i;
+    for (i=0; i<this.ttsIndex; i++) {
+        h = parseInt(this._getPageHeight(i)/this.reduce); 
+        leafTop += h + this.padding;
+    }
+    
+    var chunkTop = chunk[1][3]; //coords are in l,b,r,t order
+    var chunkBot = chunk[chunk.length-1][1];
+    
+    var topOfFirstChunk = leafTop + chunkTop/this.reduce;
+    var botOfLastChunk  = leafTop + chunkBot/this.reduce;
+    
+    if (soundManager.debugMode) console.log('leafTop = ' + leafTop + ' topOfFirstChunk = ' + topOfFirstChunk + ' botOfLastChunk = ' + botOfLastChunk);
+
+    var containerTop = $('#BRcontainer').attr('scrollTop');
+    var containerBot = containerTop + $('#BRcontainer').height();
+    if (soundManager.debugMode) console.log('containerTop = ' + containerTop + ' containerBot = ' + containerBot);
+
+    if ((topOfFirstChunk < containerTop) || (botOfLastChunk > containerBot)) {
+        //jumpToIndex scrolls so that chunkTop is centered.. we want chunkTop at the top
+        //this.jumpToIndex(this.ttsIndex, null, chunkTop);
+        $('#BRcontainer').animate({scrollTop: topOfFirstChunk},'fast');            
+    }    
+}
+
+// ttsHilite1UP()
+//______________________________________________________________________________
+BookReader.prototype.ttsHilite1UP = function(chunk) {
+    var i;
+    for (i=1; i<chunk.length; i++) {
+        //each rect is an array of l,b,r,t coords (djvu.xml ordering...)       
+        var l = chunk[i][0];
+        var b = chunk[i][1];
+        var r = chunk[i][2];
+        var t = chunk[i][3];
+        
+        var div = document.createElement('div');
+        this.ttsHilites.push(div);        
+        $(div).attr('className', 'BookReaderSearchHilite').appendTo('#pagediv'+this.ttsIndex);
+
+        $(div).css({
+            width:  (r-l)/this.reduce + 'px',
+            height: (b-t)/this.reduce + 'px',
+            left:   l/this.reduce + 'px',
+            top:    t/this.reduce +'px'
+        });
+    }
+
+}
+
+// ttsHilite2UP()
+//______________________________________________________________________________
+BookReader.prototype.ttsHilite2UP = function (chunk) {
+    var i;
+    for (i=1; i<chunk.length; i++) {
+        //each rect is an array of l,b,r,t coords (djvu.xml ordering...)       
+        var l = chunk[i][0];
+        var b = chunk[i][1];
+        var r = chunk[i][2];
+        var t = chunk[i][3];
+        
+        var div = document.createElement('div');
+        this.ttsHilites.push(div);        
+        $(div).attr('className', 'BookReaderSearchHilite').css('zIndex', 3).appendTo('#BRtwopageview');
+        this.setHilightCss2UP(div, this.ttsIndex, l, r, t, b);        
+    }
+}
+
+// ttsRemoveHilites()
+//______________________________________________________________________________
+BookReader.prototype.ttsRemoveHilites = function (chunk) {
+    $(this.ttsHilites).remove();
+    this.ttsHilites = [];
+}
+
+// ttsStartPolling()
+//______________________________________________________________________________
+// Play of the current chunk has ended, but the next chunk has not yet been loaded.
+// We need to wait for the text for the next page to be loaded, so we can
+// load the next audio chunk
+BookReader.prototype.ttsStartPolling = function () {
+    if (soundManager.debugMode) console.log('Starting the TTS poller...');
+    var self = this;
+    this.ttsPoller=setInterval(function(){
+        if (self.ttsBuffering) {return;}
+        
+        if (soundManager.debugMode) console.log('TTS buffering finished!');
+        clearInterval(self.ttsPoller);
+        self.ttsPoller = null;
+        self.ttsPrefetchAudio();
+        self.ttsNextChunk();
+    },500);    
+}
diff --git a/BookReader/images/progressbar.gif b/BookReader/images/progressbar.gif

new file mode 100644 (file)

index 0000000..d84f653

Binary files /dev/null and b/BookReader/images/progressbar.gif differ
diff --git a/BookReaderIA/datanode/BookReaderGetTTS.php b/BookReaderIA/datanode/BookReaderGetTTS.php

new file mode 100644 (file)

index 0000000..0597ecc
--- /dev/null
+++ b/BookReaderIA/datanode/BookReaderGetTTS.php
@@ -0,0 +1,41 @@
+<?
+
+/*
+Copyright(c)2008-2010 Internet Archive. Software license AGPL version 3.
+
+This file is part of BookReader.
+
+    BookReader is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    BookReader is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
+    
+    The BookReader source is hosted at http://github.com/openlibrary/bookreader/
+*/
+
+$useMP3 = true;
+if ('.ogg' == $_GET['format']) {
+    $useMP3 = false;
+}
+
+$cmd = 'echo ' . escapeshellarg($_GET['string']);
+#$cmd .= ' | /home/rkumar/dev/festival/build/festival/bin/text2wave -eval "(voice_cmu_us_slt_arctic_hts)"';
+$cmd .= ' | /home/rkumar/petabox/sw/bin/text2wave';
+if ($useMP3) {
+    header('Content-Type: audio/mpeg');
+    $cmd .= ' |ffmpeg -i - -f mp3 -';
+} else {
+    header('Content-Type: application/ogg');
+    $cmd .= ' |oggenc --quiet -';
+}
+
+passthru($cmd);
+?>
diff --git a/BookReaderIA/datanode/BookReaderGetText.py b/BookReaderIA/datanode/BookReaderGetText.py

new file mode 100644 (file)

index 0000000..7fef6e2
--- /dev/null
+++ b/BookReaderIA/datanode/BookReaderGetText.py
@@ -0,0 +1,242 @@
+#!/usr/bin/python
+
+# Copyright(c)2008-2010 Internet Archive. Software license AGPL version 3.
+# 
+# This file is part of BookReader.
+# 
+#     BookReader is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU Affero General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+# 
+#     BookReader is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU Affero General Public License for more details.
+# 
+#     You should have received a copy of the GNU Affero General Public License
+#     along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
+#     
+#     The BookReader source is hosted at http://github.com/openlibrary/bookreader/
+
+
+#watch out for blank lines (<LINE></LINE>)
+
+from lxml import etree
+import sys
+import re
+import json
+import re
+
+from windowed_iterator import windowed_iterator
+from diff_match_patch import diff_match_patch
+
+minWordsInBlock = 25
+maxWordsInBlock = 50
+
+# Header/Footer detection parameters
+
+# 'Window' of neighboring pages to check for similar text that may
+# mark headers / footers
+windowsize = 10
+
+# Weights to assign to potential headers / footers.
+# len(weights) should be even.
+weights = (1.0, .75,
+           .75, 1.0)
+# weights = (1.0, .75, .5,
+#            .5, .75, 1.0)
+
+# allow potential headers/footers with this length difference
+max_length_difference = 4
+
+dmp = diff_match_patch()
+dmp.Match_Distance = 2 # number of prepended characters allowed before match
+dmp.Match_Threshold = .5 # 0 to 1 ... higher => more fanciful matches,
+                         # slower execution.
+
+# minimum match score for a line to be considered a header or footer.
+min_score = .9
+
+
+def guess_hfs(page, pages):
+    """ Given a page and a 'windowed iterator' giving access to neighboring
+    pages, return a dict containing likely header/footer lines on that page.
+
+    A line is considered a likely header/footer if it's near the
+    start/end of the page, and if it is textually similar the same
+    line on neighboring pages.
+    """
+    
+    result = {}
+    
+    hf_candidates = get_hf_candidates(page)
+    neighbor_info = {}
+    for i in range(len(weights)):
+        if hf_candidates[i] is None:
+            continue
+        score = 0
+        for neighbor_page in pages.neighbors():
+            if neighbor_page in neighbor_info:
+                neighbor_candidates = neighbor_info[neighbor_page]
+            else:
+                neighbor_candidates = get_hf_candidates(neighbor_page)
+                neighbor_info[neighbor_page] = neighbor_candidates
+            if neighbor_candidates[i] is None:
+                continue
+            text = hf_candidates[i][1]
+            neighbor_text = neighbor_candidates[i][1]
+            if abs(len(text) - len(neighbor_text)) > max_length_difference:
+                continue
+            
+            matchstart = dmp.match_main(hf_candidates[i][1],
+                                        neighbor_candidates[i][1], 0)
+            if matchstart != -1:
+                score += weights[i]
+            if score > min_score:
+                result[hf_candidates[i][0]] = True
+                break
+    return result
+
+        
+def simplify_line_text(line):
+    text = etree.tostring(line, method='text', encoding=unicode).lower();
+    # collape numbers (roman too) to '@' so headers will be more
+    # similar from page to page
+    text = re.sub(r'[ivx\d]', r'@', text)
+    text = re.sub(r'\s+', r' ', text)
+    return text
+
+
+def get_hf_candidates(page):
+    result = []
+    hfwin = len(weights) / 2
+    lines = [line for line in page.findall('.//LINE')]
+    for i in range(hfwin) + range(-hfwin, 0):
+        if abs(i) < len(lines):
+            result.append((lines[i], simplify_line_text(lines[i])))
+        else:
+            result.append(None)
+    return result
+
+
+def main(args):
+    path = args[0]
+    pageNum = int(args[1])
+    callback = args[2]
+
+    if not re.match('^/\d{1,2}/items/.+_djvu.xml$', path):
+        sys.exit(-1);
+    
+    if ('ttsNextPageCB' != callback):
+        callback = 'ttsStartCB'
+
+    f = open(path)
+    context = etree.iterparse(f, tag='OBJECT')
+    def drop_event(iter):
+        for event, page in iter:
+            yield page
+    pages = drop_event(context)
+    def clear_page(page):
+        page.clear()
+    pages = windowed_iterator(pages, windowsize, clear_page)
+    for i, page in enumerate(pages):
+        if i == pageNum:
+            break
+    hfs = guess_hfs(page, pages)
+
+    lines = page.findall('.//LINE')
+    
+    #print 'got %s .//lines' % len(lines)
+
+    textBlocks = []
+    block = ''
+    rects = []
+
+    numWords = 0
+
+    for line in lines:
+        # skip headers/footers
+        if line in hfs:
+            continue
+
+        top = sys.maxint
+        left = sys.maxint
+        right = -1
+        bottom = -1
+
+        numWordsInLine = 0
+
+        words = line.findall('.//WORD')
+
+        #print 'at start of line, rects ='
+        #print rects
+
+        for word in words:
+
+            numWordsInLine += 1
+
+            text = word.text
+            #print 'got text ' + text
+
+            coords = word.get('coords').split(',') #l,b,r,t
+            coords = map(int, coords)
+
+            if int(coords[0]) < left:
+                left = coords[0]
+
+            if coords[1] > bottom:
+                bottom = coords[1]
+
+            if coords[2] > right:
+                right = coords[2]
+
+            if coords[3] < top:
+                top = coords[3] 
+
+            block += word.text + ' '
+            numWords += 1
+
+            if text.endswith('.') and (numWords>minWordsInBlock):
+                #print 'end of block with numWords=%d' % numWords
+                #print 'block = ' + block
+
+                rects.append([left, bottom, right, top])            
+
+                #textBlocks.append(block.strip())
+                rects.insert(0, block.strip())            
+                textBlocks.append(rects)
+                block = ''
+                rects = []
+                numWords = 0
+                numWordsInLine = 0
+                top = sys.maxint
+                left = sys.maxint
+                right = -1
+                bottom = -1
+
+        #end of line
+        if numWordsInLine > 0:
+            rects.append([left, bottom, right, top])
+
+        if numWords>maxWordsInBlock:
+            #textBlocks.append(block.strip())        
+            rects.insert(0, block.strip())            
+            textBlocks.append(rects)        
+            block = ''
+            numWords = 0
+            rects = []        
+
+        #print 'at end of line, rects ='
+        #print rects
+
+    if '' != block:
+        #textBlocks.append(block.strip())
+        rects.insert(0, block.strip())            
+        textBlocks.append(rects)
+
+    print 'br.%s(%s);' % (callback, json.dumps(textBlocks))
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
diff --git a/BookReaderIA/datanode/BookReaderGetTextWrapper.php b/BookReaderIA/datanode/BookReaderGetTextWrapper.php

new file mode 100644 (file)

index 0000000..8e3fd25
--- /dev/null
+++ b/BookReaderIA/datanode/BookReaderGetTextWrapper.php
@@ -0,0 +1,30 @@
+<?
+
+/*
+Copyright(c)2008-2010 Internet Archive. Software license AGPL version 3.
+
+This file is part of BookReader.
+
+    BookReader is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    BookReader is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
+    
+    The BookReader source is hosted at http://github.com/openlibrary/bookreader/
+*/
+
+//$env = 'LD_LIBRARY_PATH=/petabox/sw/lib/lxml/lib PYTHONPATH=/petabox/sw/lib/lxml/lib/python2.5/site-packages:$PYTHONPATH';
+$path     = escapeshellarg($_GET['path']);
+$page     = escapeshellarg($_GET['page']);
+$callback = escapeshellarg($_GET['callback']);
+header('Content-Type: application/javascript');
+passthru("python BookReaderGetText.py $path $page $callback");
+?>
diff --git a/BookReaderIA/datanode/BookReaderJSIA.php b/BookReaderIA/datanode/BookReaderJSIA.php

index 5418573..ee71384 100644 (file)
--- a/BookReaderIA/datanode/BookReaderJSIA.php
+++ b/BookReaderIA/datanode/BookReaderJSIA.php
@@ -18,6 +18,8 @@ This file is part of BookReader.
      along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
  */
  
+header('Content-Type: application/javascript');
+
  $id = $_REQUEST['id'];
  $itemPath = $_REQUEST['itemPath'];
  $subPrefix = $_REQUEST['subPrefix'];
diff --git a/BookReaderIA/datanode/diff_match_patch.py b/BookReaderIA/datanode/diff_match_patch.py

new file mode 100644 (file)

index 0000000..039f475
--- /dev/null
+++ b/BookReaderIA/datanode/diff_match_patch.py
@@ -0,0 +1,1859 @@
+#!/usr/bin/python2.4
+
+"""Diff Match and Patch
+
+Copyright 2006 Google Inc.
+http://code.google.com/p/google-diff-match-patch/
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+"""Functions for diff, match and patch.
+
+Computes the difference between two texts to create a patch.
+Applies the patch onto another text, allowing for errors.
+"""
+
+__author__ = 'fraser@google.com (Neil Fraser)'
+
+import math
+import time
+import urllib
+import re
+
+class diff_match_patch:
+  """Class containing the diff, match and patch methods.
+
+  Also contains the behaviour settings.
+  """
+
+  def __init__(self):
+    """Inits a diff_match_patch object with default settings.
+    Redefine these in your program to override the defaults.
+    """
+
+    # Number of seconds to map a diff before giving up (0 for infinity).
+    self.Diff_Timeout = 1.0
+    # Cost of an empty edit operation in terms of edit characters.
+    self.Diff_EditCost = 4
+    # The size beyond which the double-ended diff activates.
+    # Double-ending is twice as fast, but less accurate.
+    self.Diff_DualThreshold = 32
+    # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
+    self.Match_Threshold = 0.5
+    # How far to search for a match (0 = exact location, 1000+ = broad match).
+    # A match this many characters away from the expected location will add
+    # 1.0 to the score (0.0 is a perfect match).
+    self.Match_Distance = 1000
+    # When deleting a large block of text (over ~64 characters), how close does
+    # the contents have to match the expected contents. (0.0 = perfection,
+    # 1.0 = very loose).  Note that Match_Threshold controls how closely the
+    # end points of a delete need to match.
+    self.Patch_DeleteThreshold = 0.5
+    # Chunk size for context length.
+    self.Patch_Margin = 4
+
+    # How many bits in a number?
+    # Python has no maximum, thus to disable patch splitting set to 0.
+    # However to avoid long patches in certain pathological cases, use 32.
+    # Multiple short patches (using native ints) are much faster than long ones.
+    self.Match_MaxBits = 32
+
+  #  DIFF FUNCTIONS
+
+  # The data structure representing a diff is an array of tuples:
+  # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
+  # which means: delete "Hello", add "Goodbye" and keep " world."
+  DIFF_DELETE = -1
+  DIFF_INSERT = 1
+  DIFF_EQUAL = 0
+
+  def diff_main(self, text1, text2, checklines=True):
+    """Find the differences between two texts.  Simplifies the problem by
+      stripping any common prefix or suffix off the texts before diffing.
+
+    Args:
+      text1: Old string to be diffed.
+      text2: New string to be diffed.
+      checklines: Optional speedup flag.  If present and false, then don't run
+        a line-level diff first to identify the changed areas.
+        Defaults to true, which does a faster, slightly less optimal diff.
+
+    Returns:
+      Array of changes.
+    """
+
+    # Check for null inputs.
+    if text1 == None or text2 == None:
+      raise ValueError("Null inputs. (diff_main)")
+
+    # Check for equality (speedup).
+    if text1 == text2:
+      return [(self.DIFF_EQUAL, text1)]
+
+    # Trim off common prefix (speedup).
+    commonlength = self.diff_commonPrefix(text1, text2)
+    commonprefix = text1[:commonlength]
+    text1 = text1[commonlength:]
+    text2 = text2[commonlength:]
+
+    # Trim off common suffix (speedup).
+    commonlength = self.diff_commonSuffix(text1, text2)
+    if commonlength == 0:
+      commonsuffix = ''
+    else:
+      commonsuffix = text1[-commonlength:]
+      text1 = text1[:-commonlength]
+      text2 = text2[:-commonlength]
+
+    # Compute the diff on the middle block.
+    diffs = self.diff_compute(text1, text2, checklines)
+
+    # Restore the prefix and suffix.
+    if commonprefix:
+      diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
+    if commonsuffix:
+      diffs.append((self.DIFF_EQUAL, commonsuffix))
+    self.diff_cleanupMerge(diffs)
+    return diffs
+
+  def diff_compute(self, text1, text2, checklines):
+    """Find the differences between two texts.  Assumes that the texts do not
+      have any common prefix or suffix.
+
+    Args:
+      text1: Old string to be diffed.
+      text2: New string to be diffed.
+      checklines: Speedup flag.  If false, then don't run a line-level diff
+        first to identify the changed areas.
+        If true, then run a faster, slightly less optimal diff.
+
+    Returns:
+      Array of changes.
+    """
+    if not text1:
+      # Just add some text (speedup).
+      return [(self.DIFF_INSERT, text2)]
+
+    if not text2:
+      # Just delete some text (speedup).
+      return [(self.DIFF_DELETE, text1)]
+
+    if len(text1) > len(text2):
+      (longtext, shorttext) = (text1, text2)
+    else:
+      (shorttext, longtext) = (text1, text2)
+    i = longtext.find(shorttext)
+    if i != -1:
+      # Shorter text is inside the longer text (speedup).
+      diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
+               (self.DIFF_INSERT, longtext[i + len(shorttext):])]
+      # Swap insertions for deletions if diff is reversed.
+      if len(text1) > len(text2):
+        diffs[0] = (self.DIFF_DELETE, diffs[0][1])
+        diffs[2] = (self.DIFF_DELETE, diffs[2][1])
+      return diffs
+    longtext = shorttext = None  # Garbage collect.
+
+    # Check to see if the problem can be split in two.
+    hm = self.diff_halfMatch(text1, text2)
+    if hm:
+      # A half-match was found, sort out the return data.
+      (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
+      # Send both pairs off for separate processing.
+      diffs_a = self.diff_main(text1_a, text2_a, checklines)
+      diffs_b = self.diff_main(text1_b, text2_b, checklines)
+      # Merge the results.
+      return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
+
+    # Perform a real diff.
+    if checklines and (len(text1) < 100 or len(text2) < 100):
+      checklines = False  # Too trivial for the overhead.
+    if checklines:
+      # Scan the text on a line-by-line basis first.
+      (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
+
+    diffs = self.diff_map(text1, text2)
+    if not diffs:  # No acceptable result.
+      diffs = [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
+    if checklines:
+      # Convert the diff back to original text.
+      self.diff_charsToLines(diffs, linearray)
+      # Eliminate freak matches (e.g. blank lines)
+      self.diff_cleanupSemantic(diffs)
+
+      # Rediff any replacement blocks, this time character-by-character.
+      # Add a dummy entry at the end.
+      diffs.append((self.DIFF_EQUAL, ''))
+      pointer = 0
+      count_delete = 0
+      count_insert = 0
+      text_delete = ''
+      text_insert = ''
+      while pointer < len(diffs):
+        if diffs[pointer][0] == self.DIFF_INSERT:
+          count_insert += 1
+          text_insert += diffs[pointer][1]
+        elif diffs[pointer][0] == self.DIFF_DELETE:
+          count_delete += 1
+          text_delete += diffs[pointer][1]
+        elif diffs[pointer][0] == self.DIFF_EQUAL:
+          # Upon reaching an equality, check for prior redundancies.
+          if count_delete >= 1 and count_insert >= 1:
+            # Delete the offending records and add the merged ones.
+            a = self.diff_main(text_delete, text_insert, False)
+            diffs[pointer - count_delete - count_insert : pointer] = a
+            pointer = pointer - count_delete - count_insert + len(a)
+          count_insert = 0
+          count_delete = 0
+          text_delete = ''
+          text_insert = ''
+
+        pointer += 1
+
+      diffs.pop()  # Remove the dummy entry at the end.
+    return diffs
+
+  def diff_linesToChars(self, text1, text2):
+    """Split two texts into an array of strings.  Reduce the texts to a string
+    of hashes where each Unicode character represents one line.
+
+    Args:
+      text1: First string.
+      text2: Second string.
+
+    Returns:
+      Three element tuple, containing the encoded text1, the encoded text2 and
+      the array of unique strings.  The zeroth element of the array of unique
+      strings is intentionally blank.
+    """
+    lineArray = []  # e.g. lineArray[4] == "Hello\n"
+    lineHash = {}   # e.g. lineHash["Hello\n"] == 4
+
+    # "\x00" is a valid character, but various debuggers don't like it.
+    # So we'll insert a junk entry to avoid generating a null character.
+    lineArray.append('')
+
+    def diff_linesToCharsMunge(text):
+      """Split a text into an array of strings.  Reduce the texts to a string
+      of hashes where each Unicode character represents one line.
+      Modifies linearray and linehash through being a closure.
+
+      Args:
+        text: String to encode.
+
+      Returns:
+        Encoded string.
+      """
+      chars = []
+      # Walk the text, pulling out a substring for each line.
+      # text.split('\n') would would temporarily double our memory footprint.
+      # Modifying text would create many large strings to garbage collect.
+      lineStart = 0
+      lineEnd = -1
+      while lineEnd < len(text) - 1:
+        lineEnd = text.find('\n', lineStart)
+        if lineEnd == -1:
+          lineEnd = len(text) - 1
+        line = text[lineStart:lineEnd + 1]
+        lineStart = lineEnd + 1
+
+        if line in lineHash:
+          chars.append(unichr(lineHash[line]))
+        else:
+          lineArray.append(line)
+          lineHash[line] = len(lineArray) - 1
+          chars.append(unichr(len(lineArray) - 1))
+      return "".join(chars)
+
+    chars1 = diff_linesToCharsMunge(text1)
+    chars2 = diff_linesToCharsMunge(text2)
+    return (chars1, chars2, lineArray)
+
+  def diff_charsToLines(self, diffs, lineArray):
+    """Rehydrate the text in a diff from a string of line hashes to real lines
+    of text.
+
+    Args:
+      diffs: Array of diff tuples.
+      lineArray: Array of unique strings.
+    """
+    for x in xrange(len(diffs)):
+      text = []
+      for char in diffs[x][1]:
+        text.append(lineArray[ord(char)])
+      diffs[x] = (diffs[x][0], "".join(text))
+
+  def diff_map(self, text1, text2):
+    """Explore the intersection points between the two texts.
+
+    Args:
+      text1: Old string to be diffed.
+      text2: New string to be diffed.
+
+    Returns:
+      Array of diff tuples or None if no diff available.
+    """
+
+    # Unlike in most languages, Python counts time in seconds.
+    s_end = time.time() + self.Diff_Timeout  # Don't run for too long.
+    # Cache the text lengths to prevent multiple calls.
+    text1_length = len(text1)
+    text2_length = len(text2)
+    max_d = text1_length + text2_length - 1
+    doubleEnd = self.Diff_DualThreshold * 2 < max_d
+    # Python efficiency note: (x << 32) + y is the fastest way to combine
+    # x and y into a single hashable value.  Tested in Python 2.5.
+    # It is unclear why it is faster for v_map[d] to be indexed with an
+    # integer whereas footsteps is indexed with a string.
+    v_map1 = []
+    v_map2 = []
+    v1 = {}
+    v2 = {}
+    v1[1] = 0
+    v2[1] = 0
+    footsteps = {}
+    done = False
+    # If the total number of characters is odd, then the front path will
+    # collide with the reverse path.
+    front = (text1_length + text2_length) % 2
+    for d in xrange(max_d):
+      # Bail out if timeout reached.
+      if self.Diff_Timeout > 0 and time.time() > s_end:
+        return None
+
+      # Walk the front path one step.
+      v_map1.append({})
+      for k in xrange(-d, d + 1, 2):
+        if k == -d or k != d and v1[k - 1] < v1[k + 1]:
+          x = v1[k + 1]
+        else:
+          x = v1[k - 1] + 1
+        y = x - k
+        if doubleEnd:
+          footstep = str((x << 32) + y)
+          if front and footstep in footsteps:
+            done = True
+          if not front:
+            footsteps[footstep] = d
+
+        while (not done and x < text1_length and y < text2_length and
+               text1[x] == text2[y]):
+          x += 1
+          y += 1
+          if doubleEnd:
+            footstep = str((x << 32) + y)
+            if front and footstep in footsteps:
+              done = True
+            if not front:
+              footsteps[footstep] = d
+
+        v1[k] = x
+        v_map1[d][(x << 32) + y] = True
+        if x == text1_length and y == text2_length:
+          # Reached the end in single-path mode.
+          return self.diff_path1(v_map1, text1, text2)
+        elif done:
+          # Front path ran over reverse path.
+          v_map2 = v_map2[:footsteps[footstep] + 1]
+          a = self.diff_path1(v_map1, text1[:x], text2[:y])
+          b = self.diff_path2(v_map2, text1[x:], text2[y:])
+          return a + b
+
+      if doubleEnd:
+        # Walk the reverse path one step.
+        v_map2.append({})
+        for k in xrange(-d, d + 1, 2):
+          if k == -d or k != d and v2[k - 1] < v2[k + 1]:
+            x = v2[k + 1]
+          else:
+            x = v2[k - 1] + 1
+          y = x - k
+          footstep = str((text1_length - x << 32) + text2_length - y)
+          if not front and footstep in footsteps:
+            done = True
+          if front:
+            footsteps[footstep] = d
+          while (not done and x < text1_length and y < text2_length and
+                 text1[-x - 1] == text2[-y - 1]):
+            x += 1
+            y += 1
+            footstep = str((text1_length - x << 32) + text2_length - y)
+            if not front and footstep in footsteps:
+              done = True
+            if front:
+              footsteps[footstep] = d
+
+          v2[k] = x
+          v_map2[d][(x << 32) + y] = True
+          if done:
+            # Reverse path ran over front path.
+            v_map1 = v_map1[:footsteps[footstep] + 1]
+            a = self.diff_path1(v_map1, text1[:text1_length - x],
+                                text2[:text2_length - y])
+            b = self.diff_path2(v_map2, text1[text1_length - x:],
+                                text2[text2_length - y:])
+            return a + b
+
+    # Number of diffs equals number of characters, no commonality at all.
+    return None
+
+  def diff_path1(self, v_map, text1, text2):
+    """Work from the middle back to the start to determine the path.
+
+    Args:
+      v_map: Array of paths.
+      text1: Old string fragment to be diffed.
+      text2: New string fragment to be diffed.
+
+    Returns:
+      Array of diff tuples.
+    """
+    path = []
+    x = len(text1)
+    y = len(text2)
+    last_op = None
+    for d in xrange(len(v_map) - 2, -1, -1):
+      while True:
+        if (x - 1 << 32) + y in v_map[d]:
+          x -= 1
+          if last_op == self.DIFF_DELETE:
+            path[0] = (self.DIFF_DELETE, text1[x] + path[0][1])
+          else:
+            path[:0] = [(self.DIFF_DELETE, text1[x])]
+          last_op = self.DIFF_DELETE
+          break
+        elif (x << 32) + y - 1 in v_map[d]:
+          y -= 1
+          if last_op == self.DIFF_INSERT:
+            path[0] = (self.DIFF_INSERT, text2[y] + path[0][1])
+          else:
+            path[:0] = [(self.DIFF_INSERT, text2[y])]
+          last_op = self.DIFF_INSERT
+          break
+        else:
+          x -= 1
+          y -= 1
+          assert text1[x] == text2[y], ("No diagonal.  " +
+              "Can't happen. (diff_path1)")
+          if last_op == self.DIFF_EQUAL:
+            path[0] = (self.DIFF_EQUAL, text1[x] + path[0][1])
+          else:
+            path[:0] = [(self.DIFF_EQUAL, text1[x])]
+          last_op = self.DIFF_EQUAL
+    return path
+
+  def diff_path2(self, v_map, text1, text2):
+    """Work from the middle back to the end to determine the path.
+
+    Args:
+      v_map: Array of paths.
+      text1: Old string fragment to be diffed.
+      text2: New string fragment to be diffed.
+
+    Returns:
+      Array of diff tuples.
+    """
+    path = []
+    x = len(text1)
+    y = len(text2)
+    last_op = None
+    for d in xrange(len(v_map) - 2, -1, -1):
+      while True:
+        if (x - 1 << 32) + y in v_map[d]:
+          x -= 1
+          if last_op == self.DIFF_DELETE:
+            path[-1] = (self.DIFF_DELETE, path[-1][1] + text1[-x - 1])
+          else:
+            path.append((self.DIFF_DELETE, text1[-x - 1]))
+          last_op = self.DIFF_DELETE
+          break
+        elif (x << 32) + y - 1 in v_map[d]:
+          y -= 1
+          if last_op == self.DIFF_INSERT:
+            path[-1] = (self.DIFF_INSERT, path[-1][1] + text2[-y - 1])
+          else:
+            path.append((self.DIFF_INSERT, text2[-y - 1]))
+          last_op = self.DIFF_INSERT
+          break
+        else:
+          x -= 1
+          y -= 1
+          assert text1[-x - 1] == text2[-y - 1], ("No diagonal.  " +
+              "Can't happen. (diff_path2)")
+          if last_op == self.DIFF_EQUAL:
+            path[-1] = (self.DIFF_EQUAL, path[-1][1] + text1[-x - 1])
+          else:
+            path.append((self.DIFF_EQUAL, text1[-x - 1]))
+          last_op = self.DIFF_EQUAL
+    return path
+
+  def diff_commonPrefix(self, text1, text2):
+    """Determine the common prefix of two strings.
+
+    Args:
+      text1: First string.
+      text2: Second string.
+
+    Returns:
+      The number of characters common to the start of each string.
+    """
+    # Quick check for common null cases.
+    if not text1 or not text2 or text1[0] != text2[0]:
+      return 0
+    # Binary search.
+    # Performance analysis: http://neil.fraser.name/news/2007/10/09/
+    pointermin = 0
+    pointermax = min(len(text1), len(text2))
+    pointermid = pointermax
+    pointerstart = 0
+    while pointermin < pointermid:
+      if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
+        pointermin = pointermid
+        pointerstart = pointermin
+      else:
+        pointermax = pointermid
+      pointermid = int((pointermax - pointermin) / 2 + pointermin)
+    return pointermid
+
+  def diff_commonSuffix(self, text1, text2):
+    """Determine the common suffix of two strings.
+
+    Args:
+      text1: First string.
+      text2: Second string.
+
+    Returns:
+      The number of characters common to the end of each string.
+    """
+    # Quick check for common null cases.
+    if not text1 or not text2 or text1[-1] != text2[-1]:
+      return 0
+    # Binary search.
+    # Performance analysis: http://neil.fraser.name/news/2007/10/09/
+    pointermin = 0
+    pointermax = min(len(text1), len(text2))
+    pointermid = pointermax
+    pointerend = 0
+    while pointermin < pointermid:
+      if (text1[-pointermid:len(text1) - pointerend] ==
+          text2[-pointermid:len(text2) - pointerend]):
+        pointermin = pointermid
+        pointerend = pointermin
+      else:
+        pointermax = pointermid
+      pointermid = int((pointermax - pointermin) / 2 + pointermin)
+    return pointermid
+
+  def diff_halfMatch(self, text1, text2):
+    """Do the two texts share a substring which is at least half the length of
+    the longer text?
+
+    Args:
+      text1: First string.
+      text2: Second string.
+
+    Returns:
+      Five element Array, containing the prefix of text1, the suffix of text1,
+      the prefix of text2, the suffix of text2 and the common middle.  Or None
+      if there was no match.
+    """
+    if len(text1) > len(text2):
+      (longtext, shorttext) = (text1, text2)
+    else:
+      (shorttext, longtext) = (text1, text2)
+    if len(longtext) < 10 or len(shorttext) < 1:
+      return None  # Pointless.
+
+    def diff_halfMatchI(longtext, shorttext, i):
+      """Does a substring of shorttext exist within longtext such that the
+      substring is at least half the length of longtext?
+      Closure, but does not reference any external variables.
+
+      Args:
+        longtext: Longer string.
+        shorttext: Shorter string.
+        i: Start index of quarter length substring within longtext.
+
+      Returns:
+        Five element Array, containing the prefix of longtext, the suffix of
+        longtext, the prefix of shorttext, the suffix of shorttext and the
+        common middle.  Or None if there was no match.
+      """
+      seed = longtext[i:i + len(longtext) / 4]
+      best_common = ''
+      j = shorttext.find(seed)
+      while j != -1:
+        prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
+        suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
+        if len(best_common) < suffixLength + prefixLength:
+          best_common = (shorttext[j - suffixLength:j] +
+              shorttext[j:j + prefixLength])
+          best_longtext_a = longtext[:i - suffixLength]
+          best_longtext_b = longtext[i + prefixLength:]
+          best_shorttext_a = shorttext[:j - suffixLength]
+          best_shorttext_b = shorttext[j + prefixLength:]
+        j = shorttext.find(seed, j + 1)
+
+      if len(best_common) >= len(longtext) / 2:
+        return (best_longtext_a, best_longtext_b,
+                best_shorttext_a, best_shorttext_b, best_common)
+      else:
+        return None
+
+    # First check if the second quarter is the seed for a half-match.
+    hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) / 4)
+    # Check again based on the third quarter.
+    hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) / 2)
+    if not hm1 and not hm2:
+      return None
+    elif not hm2:
+      hm = hm1
+    elif not hm1:
+      hm = hm2
+    else:
+      # Both matched.  Select the longest.
+      if len(hm1[4]) > len(hm2[4]):
+        hm = hm1
+      else:
+        hm = hm2
+
+    # A half-match was found, sort out the return data.
+    if len(text1) > len(text2):
+      (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
+    else:
+      (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
+    return (text1_a, text1_b, text2_a, text2_b, mid_common)
+
+  def diff_cleanupSemantic(self, diffs):
+    """Reduce the number of edits by eliminating semantically trivial
+    equalities.
+
+    Args:
+      diffs: Array of diff tuples.
+    """
+    changes = False
+    equalities = []  # Stack of indices where equalities are found.
+    lastequality = None  # Always equal to equalities[-1][1]
+    pointer = 0  # Index of current position.
+    length_changes1 = 0  # Number of chars that changed prior to the equality.
+    length_changes2 = 0  # Number of chars that changed after the equality.
+    while pointer < len(diffs):
+      if diffs[pointer][0] == self.DIFF_EQUAL:  # equality found
+        equalities.append(pointer)
+        length_changes1 = length_changes2
+        length_changes2 = 0
+        lastequality = diffs[pointer][1]
+      else:  # an insertion or deletion
+        length_changes2 += len(diffs[pointer][1])
+        if (lastequality != None and (len(lastequality) <= length_changes1) and
+            (len(lastequality) <= length_changes2)):
+          # Duplicate record
+          diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
+          # Change second copy to insert.
+          diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
+              diffs[equalities[-1] + 1][1])
+          # Throw away the equality we just deleted.
+          equalities.pop()
+          # Throw away the previous equality (it needs to be reevaluated).
+          if len(equalities) != 0:
+            equalities.pop()
+          if len(equalities):
+            pointer = equalities[-1]
+          else:
+            pointer = -1
+          length_changes1 = 0  # Reset the counters.
+          length_changes2 = 0
+          lastequality = None
+          changes = True
+      pointer += 1
+
+    if changes:
+      self.diff_cleanupMerge(diffs)
+
+    self.diff_cleanupSemanticLossless(diffs)
+
+  def diff_cleanupSemanticLossless(self, diffs):
+    """Look for single edits surrounded on both sides by equalities
+    which can be shifted sideways to align the edit to a word boundary.
+    e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
+
+    Args:
+      diffs: Array of diff tuples.
+    """
+
+    def diff_cleanupSemanticScore(one, two):
+      """Given two strings, compute a score representing whether the
+      internal boundary falls on logical boundaries.
+      Scores range from 5 (best) to 0 (worst).
+      Closure, but does not reference any external variables.
+
+      Args:
+        one: First string.
+        two: Second string.
+
+      Returns:
+        The score.
+      """
+      if not one or not two:
+        # Edges are the best.
+        return 5
+
+      # Each port of this function behaves slightly differently due to
+      # subtle differences in each language's definition of things like
+      # 'whitespace'.  Since this function's purpose is largely cosmetic,
+      # the choice has been made to use each language's native features
+      # rather than force total conformity.
+      score = 0
+      # One point for non-alphanumeric.
+      if not one[-1].isalnum() or not two[0].isalnum():
+        score += 1
+        # Two points for whitespace.
+        if one[-1].isspace() or two[0].isspace():
+          score += 1
+          # Three points for line breaks.
+          if (one[-1] == "\r" or one[-1] == "\n" or
+              two[0] == "\r" or two[0] == "\n"):
+            score += 1
+            # Four points for blank lines.
+            if (re.search("\\n\\r?\\n$", one) or
+                re.match("^\\r?\\n\\r?\\n", two)):
+              score += 1
+      return score
+
+    pointer = 1
+    # Intentionally ignore the first and last element (don't need checking).
+    while pointer < len(diffs) - 1:
+      if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
+          diffs[pointer + 1][0] == self.DIFF_EQUAL):
+        # This is a single edit surrounded by equalities.
+        equality1 = diffs[pointer - 1][1]
+        edit = diffs[pointer][1]
+        equality2 = diffs[pointer + 1][1]
+
+        # First, shift the edit as far left as possible.
+        commonOffset = self.diff_commonSuffix(equality1, edit)
+        if commonOffset:
+          commonString = edit[-commonOffset:]
+          equality1 = equality1[:-commonOffset]
+          edit = commonString + edit[:-commonOffset]
+          equality2 = commonString + equality2
+
+        # Second, step character by character right, looking for the best fit.
+        bestEquality1 = equality1
+        bestEdit = edit
+        bestEquality2 = equality2
+        bestScore = (diff_cleanupSemanticScore(equality1, edit) +
+            diff_cleanupSemanticScore(edit, equality2))
+        while edit and equality2 and edit[0] == equality2[0]:
+          equality1 += edit[0]
+          edit = edit[1:] + equality2[0]
+          equality2 = equality2[1:]
+          score = (diff_cleanupSemanticScore(equality1, edit) +
+              diff_cleanupSemanticScore(edit, equality2))
+          # The >= encourages trailing rather than leading whitespace on edits.
+          if score >= bestScore:
+            bestScore = score
+            bestEquality1 = equality1
+            bestEdit = edit
+            bestEquality2 = equality2
+
+        if diffs[pointer - 1][1] != bestEquality1:
+          # We have an improvement, save it back to the diff.
+          if bestEquality1:
+            diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
+          else:
+            del diffs[pointer - 1]
+            pointer -= 1
+          diffs[pointer] = (diffs[pointer][0], bestEdit)
+          if bestEquality2:
+            diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
+          else:
+            del diffs[pointer + 1]
+            pointer -= 1
+      pointer += 1
+
+  def diff_cleanupEfficiency(self, diffs):
+    """Reduce the number of edits by eliminating operationally trivial
+    equalities.
+
+    Args:
+      diffs: Array of diff tuples.
+    """
+    changes = False
+    equalities = []  # Stack of indices where equalities are found.
+    lastequality = ''  # Always equal to equalities[-1][1]
+    pointer = 0  # Index of current position.
+    pre_ins = False  # Is there an insertion operation before the last equality.
+    pre_del = False  # Is there a deletion operation before the last equality.
+    post_ins = False  # Is there an insertion operation after the last equality.
+    post_del = False  # Is there a deletion operation after the last equality.
+    while pointer < len(diffs):
+      if diffs[pointer][0] == self.DIFF_EQUAL:  # equality found
+        if (len(diffs[pointer][1]) < self.Diff_EditCost and
+            (post_ins or post_del)):
+          # Candidate found.
+          equalities.append(pointer)
+          pre_ins = post_ins
+          pre_del = post_del
+          lastequality = diffs[pointer][1]
+        else:
+          # Not a candidate, and can never become one.
+          equalities = []
+          lastequality = ''
+
+        post_ins = post_del = False
+      else:  # an insertion or deletion
+        if diffs[pointer][0] == self.DIFF_DELETE:
+          post_del = True
+        else:
+          post_ins = True
+
+        # Five types to be split:
+        # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
+        # <ins>A</ins>X<ins>C</ins><del>D</del>
+        # <ins>A</ins><del>B</del>X<ins>C</ins>
+        # <ins>A</del>X<ins>C</ins><del>D</del>
+        # <ins>A</ins><del>B</del>X<del>C</del>
+
+        if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
+                             ((len(lastequality) < self.Diff_EditCost / 2) and
+                              (pre_ins + pre_del + post_ins + post_del) == 3)):
+          # Duplicate record
+          diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
+          # Change second copy to insert.
+          diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
+              diffs[equalities[-1] + 1][1])
+          equalities.pop()  # Throw away the equality we just deleted
+          lastequality = ''
+          if pre_ins and pre_del:
+            # No changes made which could affect previous entry, keep going.
+            post_ins = post_del = True
+            equalities = []
+          else:
+            if len(equalities):
+              equalities.pop()  # Throw away the previous equality
+            if len(equalities):
+              pointer = equalities[-1]
+            else:
+              pointer = -1
+            post_ins = post_del = False
+          changes = True
+      pointer += 1
+
+    if changes:
+      self.diff_cleanupMerge(diffs)
+
+  def diff_cleanupMerge(self, diffs):
+    """Reorder and merge like edit sections.  Merge equalities.
+    Any edit section can move as long as it doesn't cross an equality.
+
+    Args:
+      diffs: Array of diff tuples.
+    """
+    diffs.append((self.DIFF_EQUAL, ''))  # Add a dummy entry at the end.
+    pointer = 0
+    count_delete = 0
+    count_insert = 0
+    text_delete = ''
+    text_insert = ''
+    while pointer < len(diffs):
+      if diffs[pointer][0] == self.DIFF_INSERT:
+        count_insert += 1
+        text_insert += diffs[pointer][1]
+        pointer += 1
+      elif diffs[pointer][0] == self.DIFF_DELETE:
+        count_delete += 1
+        text_delete += diffs[pointer][1]
+        pointer += 1
+      elif diffs[pointer][0] == self.DIFF_EQUAL:
+        # Upon reaching an equality, check for prior redundancies.
+        if count_delete != 0 or count_insert != 0:
+          if count_delete != 0 and count_insert != 0:
+            # Factor out any common prefixies.
+            commonlength = self.diff_commonPrefix(text_insert, text_delete)
+            if commonlength != 0:
+              x = pointer - count_delete - count_insert - 1
+              if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
+                diffs[x] = (diffs[x][0], diffs[x][1] +
+                            text_insert[:commonlength])
+              else:
+                diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
+                pointer += 1
+              text_insert = text_insert[commonlength:]
+              text_delete = text_delete[commonlength:]
+            # Factor out any common suffixies.
+            commonlength = self.diff_commonSuffix(text_insert, text_delete)
+            if commonlength != 0:
+              diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
+                  diffs[pointer][1])
+              text_insert = text_insert[:-commonlength]
+              text_delete = text_delete[:-commonlength]
+          # Delete the offending records and add the merged ones.
+          if count_delete == 0:
+            diffs[pointer - count_insert : pointer] = [
+                (self.DIFF_INSERT, text_insert)]
+          elif count_insert == 0:
+            diffs[pointer - count_delete : pointer] = [
+                (self.DIFF_DELETE, text_delete)]
+          else:
+            diffs[pointer - count_delete - count_insert : pointer] = [
+                (self.DIFF_DELETE, text_delete),
+                (self.DIFF_INSERT, text_insert)]
+          pointer = pointer - count_delete - count_insert + 1
+          if count_delete != 0:
+            pointer += 1
+          if count_insert != 0:
+            pointer += 1
+        elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
+          # Merge this equality with the previous one.
+          diffs[pointer - 1] = (diffs[pointer - 1][0],
+                                diffs[pointer - 1][1] + diffs[pointer][1])
+          del diffs[pointer]
+        else:
+          pointer += 1
+
+        count_insert = 0
+        count_delete = 0
+        text_delete = ''
+        text_insert = ''
+
+    if diffs[-1][1] == '':
+      diffs.pop()  # Remove the dummy entry at the end.
+
+    # Second pass: look for single edits surrounded on both sides by equalities
+    # which can be shifted sideways to eliminate an equality.
+    # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
+    changes = False
+    pointer = 1
+    # Intentionally ignore the first and last element (don't need checking).
+    while pointer < len(diffs) - 1:
+      if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
+          diffs[pointer + 1][0] == self.DIFF_EQUAL):
+        # This is a single edit surrounded by equalities.
+        if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
+          # Shift the edit over the previous equality.
+          diffs[pointer] = (diffs[pointer][0],
+              diffs[pointer - 1][1] +
+              diffs[pointer][1][:-len(diffs[pointer - 1][1])])
+          diffs[pointer + 1] = (diffs[pointer + 1][0],
+                                diffs[pointer - 1][1] + diffs[pointer + 1][1])
+          del diffs[pointer - 1]
+          changes = True
+        elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
+          # Shift the edit over the next equality.
+          diffs[pointer - 1] = (diffs[pointer - 1][0],
+                                diffs[pointer - 1][1] + diffs[pointer + 1][1])
+          diffs[pointer] = (diffs[pointer][0],
+              diffs[pointer][1][len(diffs[pointer + 1][1]):] +
+              diffs[pointer + 1][1])
+          del diffs[pointer + 1]
+          changes = True
+      pointer += 1
+
+    # If shifts were made, the diff needs reordering and another shift sweep.
+    if changes:
+      self.diff_cleanupMerge(diffs)
+
+  def diff_xIndex(self, diffs, loc):
+    """loc is a location in text1, compute and return the equivalent location
+    in text2.  e.g. "The cat" vs "The big cat", 1->1, 5->8
+
+    Args:
+      diffs: Array of diff tuples.
+      loc: Location within text1.
+
+    Returns:
+      Location within text2.
+    """
+    chars1 = 0
+    chars2 = 0
+    last_chars1 = 0
+    last_chars2 = 0
+    for x in xrange(len(diffs)):
+      (op, text) = diffs[x]
+      if op != self.DIFF_INSERT:  # Equality or deletion.
+        chars1 += len(text)
+      if op != self.DIFF_DELETE:  # Equality or insertion.
+        chars2 += len(text)
+      if chars1 > loc:  # Overshot the location.
+        break
+      last_chars1 = chars1
+      last_chars2 = chars2
+
+    if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
+      # The location was deleted.
+      return last_chars2
+    # Add the remaining len(character).
+    return last_chars2 + (loc - last_chars1)
+
+  def diff_prettyHtml(self, diffs):
+    """Convert a diff array into a pretty HTML report.
+
+    Args:
+      diffs: Array of diff tuples.
+
+    Returns:
+      HTML representation.
+    """
+    html = []
+    i = 0
+    for (op, data) in diffs:
+      text = (data.replace("&", "&amp;").replace("<", "&lt;")
+                 .replace(">", "&gt;").replace("\n", "&para;<BR>"))
+      if op == self.DIFF_INSERT:
+        html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=%i\">%s</INS>"
+            % (i, text))
+      elif op == self.DIFF_DELETE:
+        html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=%i\">%s</DEL>"
+            % (i, text))
+      elif op == self.DIFF_EQUAL:
+        html.append("<SPAN TITLE=\"i=%i\">%s</SPAN>" % (i, text))
+      if op != self.DIFF_DELETE:
+        i += len(data)
+    return "".join(html)
+
+  def diff_text1(self, diffs):
+    """Compute and return the source text (all equalities and deletions).
+
+    Args:
+      diffs: Array of diff tuples.
+
+    Returns:
+      Source text.
+    """
+    text = []
+    for (op, data) in diffs:
+      if op != self.DIFF_INSERT:
+        text.append(data)
+    return "".join(text)
+
+  def diff_text2(self, diffs):
+    """Compute and return the destination text (all equalities and insertions).
+
+    Args:
+      diffs: Array of diff tuples.
+
+    Returns:
+      Destination text.
+    """
+    text = []
+    for (op, data) in diffs:
+      if op != self.DIFF_DELETE:
+        text.append(data)
+    return "".join(text)
+
+  def diff_levenshtein(self, diffs):
+    """Compute the Levenshtein distance; the number of inserted, deleted or
+    substituted characters.
+
+    Args:
+      diffs: Array of diff tuples.
+
+    Returns:
+      Number of changes.
+    """
+    levenshtein = 0
+    insertions = 0
+    deletions = 0
+    for (op, data) in diffs:
+      if op == self.DIFF_INSERT:
+        insertions += len(data)
+      elif op == self.DIFF_DELETE:
+        deletions += len(data)
+      elif op == self.DIFF_EQUAL:
+        # A deletion and an insertion is one substitution.
+        levenshtein += max(insertions, deletions)
+        insertions = 0
+        deletions = 0
+    levenshtein += max(insertions, deletions)
+    return levenshtein
+
+  def diff_toDelta(self, diffs):
+    """Crush the diff into an encoded string which describes the operations
+    required to transform text1 into text2.
+    E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
+    Operations are tab-separated.  Inserted text is escaped using %xx notation.
+
+    Args:
+      diffs: Array of diff tuples.
+
+    Returns:
+      Delta text.
+    """
+    text = []
+    for (op, data) in diffs:
+      if op == self.DIFF_INSERT:
+        # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
+        data = data.encode("utf-8")
+        text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
+      elif op == self.DIFF_DELETE:
+        text.append("-%d" % len(data))
+      elif op == self.DIFF_EQUAL:
+        text.append("=%d" % len(data))
+    return "\t".join(text)
+
+  def diff_fromDelta(self, text1, delta):
+    """Given the original text1, and an encoded string which describes the
+    operations required to transform text1 into text2, compute the full diff.
+
+    Args:
+      text1: Source string for the diff.
+      delta: Delta text.
+
+    Returns:
+      Array of diff tuples.
+
+    Raises:
+      ValueError: If invalid input.
+    """
+    if type(delta) == unicode:
+      # Deltas should be composed of a subset of ascii chars, Unicode not
+      # required.  If this encode raises UnicodeEncodeError, delta is invalid.
+      delta = delta.encode("ascii")
+    diffs = []
+    pointer = 0  # Cursor in text1
+    tokens = delta.split("\t")
+    for token in tokens:
+      if token == "":
+        # Blank tokens are ok (from a trailing \t).
+        continue
+      # Each token begins with a one character parameter which specifies the
+      # operation of this token (delete, insert, equality).
+      param = token[1:]
+      if token[0] == "+":
+        param = urllib.unquote(param).decode("utf-8")
+        diffs.append((self.DIFF_INSERT, param))
+      elif token[0] == "-" or token[0] == "=":
+        try:
+          n = int(param)
+        except ValueError:
+          raise ValueError("Invalid number in diff_fromDelta: " + param)
+        if n < 0:
+          raise ValueError("Negative number in diff_fromDelta: " + param)
+        text = text1[pointer : pointer + n]
+        pointer += n
+        if token[0] == "=":
+          diffs.append((self.DIFF_EQUAL, text))
+        else:
+          diffs.append((self.DIFF_DELETE, text))
+      else:
+        # Anything else is an error.
+        raise ValueError("Invalid diff operation in diff_fromDelta: " +
+            token[0])
+    if pointer != len(text1):
+      raise ValueError(
+          "Delta length (%d) does not equal source text length (%d)." %
+         (pointer, len(text1)))
+    return diffs
+
+  #  MATCH FUNCTIONS
+
+  def match_main(self, text, pattern, loc):
+    """Locate the best instance of 'pattern' in 'text' near 'loc'.
+
+    Args:
+      text: The text to search.
+      pattern: The pattern to search for.
+      loc: The location to search around.
+
+    Returns:
+      Best match index or -1.
+    """
+    # Check for null inputs.
+    if text == None or pattern == None:
+      raise ValueError("Null inputs. (match_main)")
+
+    loc = max(0, min(loc, len(text)))
+    if text == pattern:
+      # Shortcut (potentially not guaranteed by the algorithm)
+      return 0
+    elif not text:
+      # Nothing to match.
+      return -1
+    elif text[loc:loc + len(pattern)] == pattern:
+      # Perfect match at the perfect spot!  (Includes case of null pattern)
+      return loc
+    else:
+      # Do a fuzzy compare.
+      match = self.match_bitap(text, pattern, loc)
+      return match
+
+  def match_bitap(self, text, pattern, loc):
+    """Locate the best instance of 'pattern' in 'text' near 'loc' using the
+    Bitap algorithm.
+
+    Args:
+      text: The text to search.
+      pattern: The pattern to search for.
+      loc: The location to search around.
+
+    Returns:
+      Best match index or -1.
+    """
+    # Python doesn't have a maxint limit, so ignore this check.
+    #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
+    #  raise ValueError("Pattern too long for this application.")
+
+    # Initialise the alphabet.
+    s = self.match_alphabet(pattern)
+
+    def match_bitapScore(e, x):
+      """Compute and return the score for a match with e errors and x location.
+      Accesses loc and pattern through being a closure.
+
+      Args:
+        e: Number of errors in match.
+        x: Location of match.
+
+      Returns:
+        Overall score for match (0.0 = good, 1.0 = bad).
+      """
+      accuracy = float(e) / len(pattern)
+      proximity = abs(loc - x)
+      if not self.Match_Distance:
+        # Dodge divide by zero error.
+        return proximity and 1.0 or accuracy
+      return accuracy + (proximity / float(self.Match_Distance))
+
+    # Highest score beyond which we give up.
+    score_threshold = self.Match_Threshold
+    # Is there a nearby exact match? (speedup)
+    best_loc = text.find(pattern, loc)
+    if best_loc != -1:
+      score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
+      # What about in the other direction? (speedup)
+      best_loc = text.rfind(pattern, loc + len(pattern))
+      if best_loc != -1:
+        score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
+
+    # Initialise the bit arrays.
+    matchmask = 1 << (len(pattern) - 1)
+    best_loc = -1
+
+    bin_max = len(pattern) + len(text)
+    # Empty initialization added to appease pychecker.
+    last_rd = None
+    for d in xrange(len(pattern)):
+      # Scan for the best match each iteration allows for one more error.
+      # Run a binary search to determine how far from 'loc' we can stray at
+      # this error level.
+      bin_min = 0
+      bin_mid = bin_max
+      while bin_min < bin_mid:
+        if match_bitapScore(d, loc + bin_mid) <= score_threshold:
+          bin_min = bin_mid
+        else:
+          bin_max = bin_mid
+        bin_mid = (bin_max - bin_min) / 2 + bin_min
+
+      # Use the result from this iteration as the maximum for the next.
+      bin_max = bin_mid
+      start = max(1, loc - bin_mid + 1)
+      finish = min(loc + bin_mid, len(text)) + len(pattern)
+
+      rd = range(finish + 1)
+      rd.append((1 << d) - 1)
+      for j in xrange(finish, start - 1, -1):
+        if len(text) <= j - 1:
+          # Out of range.
+          charMatch = 0
+        else:
+          charMatch = s.get(text[j - 1], 0)
+        if d == 0:  # First pass: exact match.
+          rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
+        else:  # Subsequent passes: fuzzy match.
+          rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
+              ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
+        if rd[j] & matchmask:
+          score = match_bitapScore(d, j - 1)
+          # This match will almost certainly be better than any existing match.
+          # But check anyway.
+          if score <= score_threshold:
+            # Told you so.
+            score_threshold = score
+            best_loc = j - 1
+            if best_loc > loc:
+              # When passing loc, don't exceed our current distance from loc.
+              start = max(1, 2 * loc - best_loc)
+            else:
+              # Already passed loc, downhill from here on in.
+              break
+      # No hope for a (better) match at greater error levels.
+      if match_bitapScore(d + 1, loc) > score_threshold:
+        break
+      last_rd = rd
+    return best_loc
+
+  def match_alphabet(self, pattern):
+    """Initialise the alphabet for the Bitap algorithm.
+
+    Args:
+      pattern: The text to encode.
+
+    Returns:
+      Hash of character locations.
+    """
+    s = {}
+    for char in pattern:
+      s[char] = 0
+    for i in xrange(len(pattern)):
+      s[pattern[i]] |= 1 << (len(pattern) - i - 1)
+    return s
+
+  #  PATCH FUNCTIONS
+
+  def patch_addContext(self, patch, text):
+    """Increase the context until it is unique,
+    but don't let the pattern expand beyond Match_MaxBits.
+
+    Args:
+      patch: The patch to grow.
+      text: Source text.
+    """
+    if len(text) == 0:
+      return
+    pattern = text[patch.start2 : patch.start2 + patch.length1]
+    padding = 0
+
+    # Look for the first and last matches of pattern in text.  If two different
+    # matches are found, increase the pattern length.
+    while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
+        0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
+        self.Patch_Margin)):
+      padding += self.Patch_Margin
+      pattern = text[max(0, patch.start2 - padding) :
+                     patch.start2 + patch.length1 + padding]
+    # Add one chunk for good luck.
+    padding += self.Patch_Margin
+
+    # Add the prefix.
+    prefix = text[max(0, patch.start2 - padding) : patch.start2]
+    if prefix:
+      patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
+    # Add the suffix.
+    suffix = text[patch.start2 + patch.length1 :
+                  patch.start2 + patch.length1 + padding]
+    if suffix:
+      patch.diffs.append((self.DIFF_EQUAL, suffix))
+
+    # Roll back the start points.
+    patch.start1 -= len(prefix)
+    patch.start2 -= len(prefix)
+    # Extend lengths.
+    patch.length1 += len(prefix) + len(suffix)
+    patch.length2 += len(prefix) + len(suffix)
+
+  def patch_make(self, a, b=None, c=None):
+    """Compute a list of patches to turn text1 into text2.
+    Use diffs if provided, otherwise compute it ourselves.
+    There are four ways to call this function, depending on what data is
+    available to the caller:
+    Method 1:
+    a = text1, b = text2
+    Method 2:
+    a = diffs
+    Method 3 (optimal):
+    a = text1, b = diffs
+    Method 4 (deprecated, use method 3):
+    a = text1, b = text2, c = diffs
+
+    Args:
+      a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
+          text2 (method 2).
+      b: text2 (methods 1,4) or Array of diff tuples for text1 to
+          text2 (method 3) or undefined (method 2).
+      c: Array of diff tuples for text1 to text2 (method 4) or
+          undefined (methods 1,2,3).
+
+    Returns:
+      Array of patch objects.
+    """
+    text1 = None
+    diffs = None
+    # Note that texts may arrive as 'str' or 'unicode'.
+    if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
+      # Method 1: text1, text2
+      # Compute diffs from text1 and text2.
+      text1 = a
+      diffs = self.diff_main(text1, b, True)
+      if len(diffs) > 2:
+        self.diff_cleanupSemantic(diffs)
+        self.diff_cleanupEfficiency(diffs)
+    elif isinstance(a, list) and b is None and c is None:
+      # Method 2: diffs
+      # Compute text1 from diffs.
+      diffs = a
+      text1 = self.diff_text1(diffs)
+    elif isinstance(a, basestring) and isinstance(b, list) and c is None:
+      # Method 3: text1, diffs
+      text1 = a
+      diffs = b
+    elif (isinstance(a, basestring) and isinstance(b, basestring) and
+          isinstance(c, list)):
+      # Method 4: text1, text2, diffs
+      # text2 is not used.
+      text1 = a
+      diffs = c
+    else:
+      raise ValueError("Unknown call format to patch_make.")
+
+    if not diffs:
+      return []  # Get rid of the None case.
+    patches = []
+    patch = patch_obj()
+    char_count1 = 0  # Number of characters into the text1 string.
+    char_count2 = 0  # Number of characters into the text2 string.
+    prepatch_text = text1  # Recreate the patches to determine context info.
+    postpatch_text = text1
+    for x in xrange(len(diffs)):
+      (diff_type, diff_text) = diffs[x]
+      if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
+        # A new patch starts here.
+        patch.start1 = char_count1
+        patch.start2 = char_count2
+      if diff_type == self.DIFF_INSERT:
+        # Insertion
+        patch.diffs.append(diffs[x])
+        patch.length2 += len(diff_text)
+        postpatch_text = (postpatch_text[:char_count2] + diff_text +
+                          postpatch_text[char_count2:])
+      elif diff_type == self.DIFF_DELETE:
+        # Deletion.
+        patch.length1 += len(diff_text)
+        patch.diffs.append(diffs[x])
+        postpatch_text = (postpatch_text[:char_count2] +
+                          postpatch_text[char_count2 + len(diff_text):])
+      elif (diff_type == self.DIFF_EQUAL and
+            len(diff_text) <= 2 * self.Patch_Margin and
+            len(patch.diffs) != 0 and len(diffs) != x + 1):
+        # Small equality inside a patch.
+        patch.diffs.append(diffs[x])
+        patch.length1 += len(diff_text)
+        patch.length2 += len(diff_text)
+
+      if (diff_type == self.DIFF_EQUAL and
+          len(diff_text) >= 2 * self.Patch_Margin):
+        # Time for a new patch.
+        if len(patch.diffs) != 0:
+          self.patch_addContext(patch, prepatch_text)
+          patches.append(patch)
+          patch = patch_obj()
+          # Unlike Unidiff, our patch lists have a rolling context.
+          # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
+          # Update prepatch text & pos to reflect the application of the
+          # just completed patch.
+          prepatch_text = postpatch_text
+          char_count1 = char_count2
+
+      # Update the current character count.
+      if diff_type != self.DIFF_INSERT:
+        char_count1 += len(diff_text)
+      if diff_type != self.DIFF_DELETE:
+        char_count2 += len(diff_text)
+
+    # Pick up the leftover patch if not empty.
+    if len(patch.diffs) != 0:
+      self.patch_addContext(patch, prepatch_text)
+      patches.append(patch)
+    return patches
+
+  def patch_deepCopy(self, patches):
+    """Given an array of patches, return another array that is identical.
+
+    Args:
+      patches: Array of patch objects.
+
+    Returns:
+      Array of patch objects.
+    """
+    patchesCopy = []
+    for patch in patches:
+      patchCopy = patch_obj()
+      # No need to deep copy the tuples since they are immutable.
+      patchCopy.diffs = patch.diffs[:]
+      patchCopy.start1 = patch.start1
+      patchCopy.start2 = patch.start2
+      patchCopy.length1 = patch.length1
+      patchCopy.length2 = patch.length2
+      patchesCopy.append(patchCopy)
+    return patchesCopy
+
+  def patch_apply(self, patches, text):
+    """Merge a set of patches onto the text.  Return a patched text, as well
+    as a list of true/false values indicating which patches were applied.
+
+    Args:
+      patches: Array of patch objects.
+      text: Old text.
+
+    Returns:
+      Two element Array, containing the new text and an array of boolean values.
+    """
+    if not patches:
+      return (text, [])
+
+    # Deep copy the patches so that no changes are made to originals.
+    patches = self.patch_deepCopy(patches)
+
+    nullPadding = self.patch_addPadding(patches)
+    text = nullPadding + text + nullPadding
+    self.patch_splitMax(patches)
+
+    # delta keeps track of the offset between the expected and actual location
+    # of the previous patch.  If there are patches expected at positions 10 and
+    # 20, but the first patch was found at 12, delta is 2 and the second patch
+    # has an effective expected position of 22.
+    delta = 0
+    results = []
+    for patch in patches:
+      expected_loc = patch.start2 + delta
+      text1 = self.diff_text1(patch.diffs)
+      end_loc = -1
+      if len(text1) > self.Match_MaxBits:
+        # patch_splitMax will only provide an oversized pattern in the case of
+        # a monster delete.
+        start_loc = self.match_main(text, text1[:self.Match_MaxBits],
+                                    expected_loc)
+        if start_loc != -1:
+          end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
+              expected_loc + len(text1) - self.Match_MaxBits)
+          if end_loc == -1 or start_loc >= end_loc:
+            # Can't find valid trailing context.  Drop this patch.
+            start_loc = -1
+      else:
+        start_loc = self.match_main(text, text1, expected_loc)
+      if start_loc == -1:
+        # No match found.  :(
+        results.append(False)
+        # Subtract the delta for this failed patch from subsequent patches.
+        delta -= patch.length2 - patch.length1
+      else:
+        # Found a match.  :)
+        results.append(True)
+        delta = start_loc - expected_loc
+        if end_loc == -1:
+          text2 = text[start_loc : start_loc + len(text1)]
+        else:
+          text2 = text[start_loc : end_loc + self.Match_MaxBits]
+        if text1 == text2:
+          # Perfect match, just shove the replacement text in.
+          text = (text[:start_loc] + self.diff_text2(patch.diffs) +
+                      text[start_loc + len(text1):])
+        else:
+          # Imperfect match.
+          # Run a diff to get a framework of equivalent indices.
+          diffs = self.diff_main(text1, text2, False)
+          if (len(text1) > self.Match_MaxBits and
+              self.diff_levenshtein(diffs) / float(len(text1)) >
+              self.Patch_DeleteThreshold):
+            # The end points match, but the content is unacceptably bad.
+            results[-1] = False
+          else:
+            self.diff_cleanupSemanticLossless(diffs)
+            index1 = 0
+            for (op, data) in patch.diffs:
+              if op != self.DIFF_EQUAL:
+                index2 = self.diff_xIndex(diffs, index1)
+              if op == self.DIFF_INSERT:  # Insertion
+                text = text[:start_loc + index2] + data + text[start_loc +
+                                                               index2:]
+              elif op == self.DIFF_DELETE:  # Deletion
+                text = text[:start_loc + index2] + text[start_loc +
+                    self.diff_xIndex(diffs, index1 + len(data)):]
+              if op != self.DIFF_DELETE:
+                index1 += len(data)
+    # Strip the padding off.
+    text = text[len(nullPadding):-len(nullPadding)]
+    return (text, results)
+
+  def patch_addPadding(self, patches):
+    """Add some padding on text start and end so that edges can match
+    something.  Intended to be called only from within patch_apply.
+
+    Args:
+      patches: Array of patch objects.
+
+    Returns:
+      The padding string added to each side.
+    """
+    paddingLength = self.Patch_Margin
+    nullPadding = ""
+    for x in xrange(1, paddingLength + 1):
+      nullPadding += chr(x)
+
+    # Bump all the patches forward.
+    for patch in patches:
+      patch.start1 += paddingLength
+      patch.start2 += paddingLength
+
+    # Add some padding on start of first diff.
+    patch = patches[0]
+    diffs = patch.diffs
+    if not diffs or diffs[0][0] != self.DIFF_EQUAL:
+      # Add nullPadding equality.
+      diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
+      patch.start1 -= paddingLength  # Should be 0.
+      patch.start2 -= paddingLength  # Should be 0.
+      patch.length1 += paddingLength
+      patch.length2 += paddingLength
+    elif paddingLength > len(diffs[0][1]):
+      # Grow first equality.
+      extraLength = paddingLength - len(diffs[0][1])
+      newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
+      diffs[0] = (diffs[0][0], newText)
+      patch.start1 -= extraLength
+      patch.start2 -= extraLength
+      patch.length1 += extraLength
+      patch.length2 += extraLength
+
+    # Add some padding on end of last diff.
+    patch = patches[-1]
+    diffs = patch.diffs
+    if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
+      # Add nullPadding equality.
+      diffs.append((self.DIFF_EQUAL, nullPadding))
+      patch.length1 += paddingLength
+      patch.length2 += paddingLength
+    elif paddingLength > len(diffs[-1][1]):
+      # Grow last equality.
+      extraLength = paddingLength - len(diffs[-1][1])
+      newText = diffs[-1][1] + nullPadding[:extraLength]
+      diffs[-1] = (diffs[-1][0], newText)
+      patch.length1 += extraLength
+      patch.length2 += extraLength
+
+    return nullPadding
+
+  def patch_splitMax(self, patches):
+    """Look through the patches and break up any which are longer than the
+    maximum limit of the match algorithm.
+
+    Args:
+      patches: Array of patch objects.
+    """
+    if self.Match_MaxBits == 0:
+      return
+    for x in xrange(len(patches)):
+      if patches[x].length1 > self.Match_MaxBits:
+        bigpatch = patches[x]
+        # Remove the big old patch.
+        del patches[x]
+        x -= 1
+        patch_size = self.Match_MaxBits
+        start1 = bigpatch.start1
+        start2 = bigpatch.start2
+        precontext = ''
+        while len(bigpatch.diffs) != 0:
+          # Create one of several smaller patches.
+          patch = patch_obj()
+          empty = True
+          patch.start1 = start1 - len(precontext)
+          patch.start2 = start2 - len(precontext)
+          if precontext:
+            patch.length1 = patch.length2 = len(precontext)
+            patch.diffs.append((self.DIFF_EQUAL, precontext))
+
+          while (len(bigpatch.diffs) != 0 and
+                 patch.length1 < patch_size - self.Patch_Margin):
+            (diff_type, diff_text) = bigpatch.diffs[0]
+            if diff_type == self.DIFF_INSERT:
+              # Insertions are harmless.
+              patch.length2 += len(diff_text)
+              start2 += len(diff_text)
+              patch.diffs.append(bigpatch.diffs.pop(0))
+              empty = False
+            elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
+                patch.diffs[0][0] == self.DIFF_EQUAL and
+                len(diff_text) > 2 * patch_size):
+              # This is a large deletion.  Let it pass in one chunk.
+              patch.length1 += len(diff_text)
+              start1 += len(diff_text)
+              empty = False
+              patch.diffs.append((diff_type, diff_text))
+              del bigpatch.diffs[0]
+            else:
+              # Deletion or equality.  Only take as much as we can stomach.
+              diff_text = diff_text[:patch_size - patch.length1 -
+                                    self.Patch_Margin]
+              patch.length1 += len(diff_text)
+              start1 += len(diff_text)
+              if diff_type == self.DIFF_EQUAL:
+                patch.length2 += len(diff_text)
+                start2 += len(diff_text)
+              else:
+                empty = False
+
+              patch.diffs.append((diff_type, diff_text))
+              if diff_text == bigpatch.diffs[0][1]:
+                del bigpatch.diffs[0]
+              else:
+                bigpatch.diffs[0] = (bigpatch.diffs[0][0],
+                                     bigpatch.diffs[0][1][len(diff_text):])
+
+          # Compute the head context for the next patch.
+          precontext = self.diff_text2(patch.diffs)
+          precontext = precontext[-self.Patch_Margin:]
+          # Append the end context for this patch.
+          postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
+          if postcontext:
+            patch.length1 += len(postcontext)
+            patch.length2 += len(postcontext)
+            if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
+              patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
+                                 postcontext)
+            else:
+              patch.diffs.append((self.DIFF_EQUAL, postcontext))
+
+          if not empty:
+            x += 1
+            patches.insert(x, patch)
+
+  def patch_toText(self, patches):
+    """Take a list of patches and return a textual representation.
+
+    Args:
+      patches: Array of patch objects.
+
+    Returns:
+      Text representation of patches.
+    """
+    text = []
+    for patch in patches:
+      text.append(str(patch))
+    return "".join(text)
+
+  def patch_fromText(self, textline):
+    """Parse a textual representation of patches and return a list of patch
+    objects.
+
+    Args:
+      textline: Text representation of patches.
+
+    Returns:
+      Array of patch objects.
+
+    Raises:
+      ValueError: If invalid input.
+    """
+    if type(textline) == unicode:
+      # Patches should be composed of a subset of ascii chars, Unicode not
+      # required.  If this encode raises UnicodeEncodeError, patch is invalid.
+      textline = textline.encode("ascii")
+    patches = []
+    if not textline:
+      return patches
+    text = textline.split('\n')
+    while len(text) != 0:
+      m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
+      if not m:
+        raise ValueError("Invalid patch string: " + text[0])
+      patch = patch_obj()
+      patches.append(patch)
+      patch.start1 = int(m.group(1))
+      if m.group(2) == '':
+        patch.start1 -= 1
+        patch.length1 = 1
+      elif m.group(2) == '0':
+        patch.length1 = 0
+      else:
+        patch.start1 -= 1
+        patch.length1 = int(m.group(2))
+
+      patch.start2 = int(m.group(3))
+      if m.group(4) == '':
+        patch.start2 -= 1
+        patch.length2 = 1
+      elif m.group(4) == '0':
+        patch.length2 = 0
+      else:
+        patch.start2 -= 1
+        patch.length2 = int(m.group(4))
+
+      del text[0]
+
+      while len(text) != 0:
+        if text[0]:
+          sign = text[0][0]
+        else:
+          sign = ''
+        line = urllib.unquote(text[0][1:])
+        line = line.decode("utf-8")
+        if sign == '+':
+          # Insertion.
+          patch.diffs.append((self.DIFF_INSERT, line))
+        elif sign == '-':
+          # Deletion.
+          patch.diffs.append((self.DIFF_DELETE, line))
+        elif sign == ' ':
+          # Minor equality.
+          patch.diffs.append((self.DIFF_EQUAL, line))
+        elif sign == '@':
+          # Start of next patch.
+          break
+        elif sign == '':
+          # Blank line?  Whatever.
+          pass
+        else:
+          # WTF?
+          raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
+        del text[0]
+    return patches
+
+
+class patch_obj:
+  """Class representing one patch operation.
+  """
+
+  def __init__(self):
+    """Initializes with an empty list of diffs.
+    """
+    self.diffs = []
+    self.start1 = None
+    self.start2 = None
+    self.length1 = 0
+    self.length2 = 0
+
+  def __str__(self):
+    """Emmulate GNU diff's format.
+    Header: @@ -382,8 +481,9 @@
+    Indicies are printed as 1-based, not 0-based.
+
+    Returns:
+      The GNU diff string.
+    """
+    if self.length1 == 0:
+      coords1 = str(self.start1) + ",0"
+    elif self.length1 == 1:
+      coords1 = str(self.start1 + 1)
+    else:
+      coords1 = str(self.start1 + 1) + "," + str(self.length1)
+    if self.length2 == 0:
+      coords2 = str(self.start2) + ",0"
+    elif self.length2 == 1:
+      coords2 = str(self.start2 + 1)
+    else:
+      coords2 = str(self.start2 + 1) + "," + str(self.length2)
+    text = ["@@ -", coords1, " +", coords2, " @@\n"]
+    # Escape the body of the patch with %xx notation.
+    for (op, data) in self.diffs:
+      if op == diff_match_patch.DIFF_INSERT:
+        text.append("+")
+      elif op == diff_match_patch.DIFF_DELETE:
+        text.append("-")
+      elif op == diff_match_patch.DIFF_EQUAL:
+        text.append(" ")
+      # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
+      data = data.encode("utf-8")
+      text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
+    return "".join(text)
diff --git a/BookReaderIA/datanode/windowed_iterator.py b/BookReaderIA/datanode/windowed_iterator.py

new file mode 100644 (file)

index 0000000..b1dc389
--- /dev/null
+++ b/BookReaderIA/datanode/windowed_iterator.py
@@ -0,0 +1,102 @@
+#!/usr/bin/python
+
+# Copyright(c)2008-2010 Internet Archive. Software license AGPL version 3.
+# 
+# This file is part of BookReader.
+# 
+#     BookReader is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU Affero General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+# 
+#     BookReader is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU Affero General Public License for more details.
+# 
+#     You should have received a copy of the GNU Affero General Public License
+#     along with BookReader.  If not, see <http://www.gnu.org/licenses/>.
+#     
+#     The BookReader source is hosted at http://github.com/openlibrary/bookreader/
+
+from collections import deque
+import itertools
+
+class windowed_iterator:
+    """ Wrap an iterator s.t. we can see [window] neighbors
+    in either direction from the current item.
+
+    Items are stored in a deque of size 2*window + 1, where the latest
+    item is always in the middle position.
+
+    The supplied clear_callback() is called for items more than
+    [window] steps in the past.
+
+    """
+
+    # Todo? remove use of None as sentinel, to be able to represent
+    # iterators returning None.
+
+    def __init__(self, iterator, window, clear_callback=None):
+        self.iterator = iterator
+        # initialize deque with sentinel values
+        self.items = deque((None for i in range(window + 1)),
+                           window * 2 + 1)
+        self.window = window
+        self.clear_callback = clear_callback
+    def __iter__(self):
+        return self
+    def __repr__(self):
+        return str(self.items) + ' window: ' + str(self.window)
+    def clear(self):
+        for item in self.items:
+            if item and self.clear_callback is not None:
+                self.clear_callback(item)
+        self.items.clear()
+    def neighbor(self, delta):
+        if abs(delta) > self.window:
+            raise IndexError('Requested delta outside window')
+        while self.window + delta + 1 > len(self.items):
+            try:
+                self.items.append(self.iterator.next())
+            except StopIteration:
+                return None
+        return self.items[self.window + delta]
+    def neighbors(self, window=None, modtwo=False):
+        if window is None:
+            window = self.window
+        if window > self.window:
+            raise IndexError('Requested delta outside window')
+        for i in itertools.chain(range(-window, 0),
+                                  range(1, window + 1)):
+            if modtwo and i % 2 == 1:
+                continue
+            n = self.neighbor(i)
+            if n is not None:
+                yield n
+    def next(self):
+        nextitem = None
+        if len(self.items) == self.window + 1:
+            # elicit potential StopIteration before clearing/popping
+            nextitem = self.iterator.next()
+        if self.items[0] is not None and self.clear_callback is not None:
+            self.clear_callback(self.items[0])
+        self.items.popleft()
+        if nextitem is not None:
+            self.items.append(nextitem)
+        return self.items[self.window]
+
+
+if __name__ == '__main__':
+    def sample_gen():
+        for i in range(0, 10):
+            yield { 'num': i*i }
+
+    g = sample_gen()
+    c = windowed_iterator(g, 3)
+
+    for i, item in enumerate(c):
+        print 'item %s: %s' % (i, item)
+        # print c
+        if i in (1, 4, 6, 9):
+            print 'neighbors of item %s: %s' % (i, [n for n in c.neighbors(2)])
diff --git a/BookReaderIA/inc/BookReader.inc b/BookReaderIA/inc/BookReader.inc

index 630a28b..3fa885c 100644 (file)
--- a/BookReaderIA/inc/BookReader.inc
+++ b/BookReaderIA/inc/BookReader.inc
@@ -141,6 +141,13 @@ class BookReader
          <![endif]-->
      <script type="text/javascript" src="/bookreader/jquery.bt.min.js"></script>
      <script type="text/javascript" src="/bookreader/BookReader.js?v=<? echo($version); ?>"></script>
+    <script type="text/javascript" src="/bookreader/soundmanager/soundmanager2.js?v=<? echo($version); ?>"></script>
+    <script>
+        soundManager.debugMode = false;
+        soundManager.url = '/bookreader/soundmanager/swf/';       
+        soundManager.useHTML5Audio = true;
+        soundManager.flashVersion = 9; //flash 8 version of swf is buggy when calling play() on a sound that is still loading
+    </script>
  </head>
  <body style="background-color: ##939598;">
author	rajbot <raj@archive.org>
	Tue, 12 Oct 2010 21:38:47 +0000 (21:38 +0000)
committer	rajbot <raj@archive.org>
	Tue, 12 Oct 2010 21:38:47 +0000 (21:38 +0000)
BookReader/BookReader.css		patch \| blob \| history
BookReader/BookReader.js		patch \| blob \| history
BookReader/images/progressbar.gif	[new file with mode: 0644]	patch \| blob
BookReaderIA/datanode/BookReaderGetTTS.php	[new file with mode: 0644]	patch \| blob
BookReaderIA/datanode/BookReaderGetText.py	[new file with mode: 0644]	patch \| blob
BookReaderIA/datanode/BookReaderGetTextWrapper.php	[new file with mode: 0644]	patch \| blob
BookReaderIA/datanode/BookReaderJSIA.php		patch \| blob \| history
BookReaderIA/datanode/diff_match_patch.py	[new file with mode: 0644]	patch \| blob
BookReaderIA/datanode/windowed_iterator.py	[new file with mode: 0644]	patch \| blob
BookReaderIA/inc/BookReader.inc		patch \| blob \| history