X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=plack%2Flib%2FPlack%2FApp%2FBookReader.pm;h=72d265553c003663f6ba72600b5c02cfe6273d68;hb=e7bcdbc42e8c7bbe995673f1f1efafb4f0fe8865;hp=e12b1691613bbcffa1e19a5a81a3954babf57af0;hpb=f58d3a78c645f48f96e086f436a8992af916578e;p=bookreader.git

diff --git a/plack/lib/Plack/App/BookReader.pm b/plack/lib/Plack/App/BookReader.pm
index e12b169..72d2655 100644
--- a/plack/lib/Plack/App/BookReader.pm
+++ b/plack/lib/Plack/App/BookReader.pm
@@ -15,12 +15,14 @@ use File::Slurp;
 use JSON;
 use autodie;
 use Time::HiRes qw(time);
+use Encode;
 
 sub make_basedir {
 	my $path = shift;
 	return if -e $path;
 	$path =~ s{/[^/]+$}{} || die "no dir/file in $path";
-	File::Path::make_path $path;
+	warn "# make_basedir $path\n";
+	-e $path ? 0 : File::Path::make_path $path;
 }
 
 # Stolen from rack/directory.rb
@@ -60,13 +62,13 @@ my $reader_page = <<'PAGE';
     <title>%s</title>
     
     <link rel="stylesheet" type="text/css" href="/BookReader/BookReader.css"/>
-    <script type="text/javascript" src="http://www.archive.org/includes/jquery-1.4.2.min.js"></script>
-    <script type="text/javascript" src="http://www.archive.org/bookreader/jquery-ui-1.8.5.custom.min.js"></script>
+    <script type="text/javascript" src="http://archive.org/includes/jquery-1.4.2.min.js"></script>
+    <script type="text/javascript" src="http://archive.org/bookreader/jquery-ui-1.8.5.custom.min.js"></script>
 
-    <script type="text/javascript" src="http://www.archive.org/bookreader/dragscrollable.js"></script>
-    <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.colorbox-min.js"></script>
-    <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.ui.ipad.js"></script>
-    <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.bt.min.js"></script>
+    <script type="text/javascript" src="http://archive.org/bookreader/dragscrollable.js"></script>
+    <script type="text/javascript" src="http://archive.org/bookreader/jquery.colorbox-min.js"></script>
+    <script type="text/javascript" src="http://archive.org/bookreader/jquery.ui.ipad.js"></script>
+    <script type="text/javascript" src="http://archive.org/bookreader/jquery.bt.min.js"></script>
 
     <script type="text/javascript" src="/BookReader/BookReader.js"></script>
 
@@ -95,64 +97,65 @@ var pages = %s;
 // Return the width of a given page.  Here we assume all images are 800 pixels wide
 br.getPageWidth = function(index) {
 	if ( ! pages[index] ) return;
-    return parseInt( pages[index][1] );
+	return parseInt( pages[index][1] );
 }
 
 // Return the height of a given page.  Here we assume all images are 1200 pixels high
 br.getPageHeight = function(index) {
 	if ( ! pages[index] ) return;
-    return parseInt( pages[index][2] );
+	return parseInt( pages[index][2] );
 }
 
 // We load the images from archive.org -- you can modify this function to retrieve images
 // using a different URL structure
 br.getPageURI = function(index, reduce, rotate) {
 	if ( ! pages[index] ) return;
-    // reduce and rotate are ignored in this simple implementation, but we
-    // could e.g. look at reduce and load images from a different directory
-    // or pass the information to an image server
-	var url = pages[index][0] + '?reduce='+reduce;
-	console.debug('getPageURI', index, reduce, rotate, url);
-    return url;
+	// reduce and rotate are ignored in this simple implementation, but we
+	// could e.g. look at reduce and load images from a different directory
+	// or pass the information to an image server
+	var r = 1 << ( Math.ceil(reduce).toString(2).length - 1 ); // reduce to nearest higher pow 2
+	var url = pages[index][0] + '?reduce='+r;
+	console.debug('getPageURI', index, reduce, r, rotate, url);
+	return url;
 }
 
 // Return which side, left or right, that a given page should be displayed on
 br.getPageSide = function(index) {
-    if (0 == (index & 0x1)) {
-        return 'R';
-    } else {
-        return 'L';
-    }
+	if (0 == (index & 0x1)) {
+		return 'R';
+	} else {
+		return 'L';
+	}
 }
 
 // This function returns the left and right indices for the user-visible
 // spread that contains the given index.  The return values may be
 // null if there is no facing page or the index is invalid.
 br.getSpreadIndices = function(pindex) {   
-    var spreadIndices = [null, null]; 
-    if ('rl' == this.pageProgression) {
-        // Right to Left
-        if (this.getPageSide(pindex) == 'R') {
-            spreadIndices[1] = pindex;
-            spreadIndices[0] = pindex + 1;
-        } else {
-            // Given index was LHS
-            spreadIndices[0] = pindex;
-            spreadIndices[1] = pindex - 1;
-        }
-    } else {
-        // Left to right
-        if (this.getPageSide(pindex) == 'L') {
-            spreadIndices[0] = pindex;
-            spreadIndices[1] = pindex + 1;
-        } else {
-            // Given index was RHS
-            spreadIndices[1] = pindex;
-            spreadIndices[0] = pindex - 1;
-        }
-    }
-    
-    return spreadIndices;
+	var spreadIndices = [null, null]; 
+	if ('rl' == this.pageProgression) {
+		// Right to Left
+		if (this.getPageSide(pindex) == 'R') {
+			spreadIndices[1] = pindex;
+			spreadIndices[0] = pindex + 1;
+		} else {
+			// Given index was LHS
+			spreadIndices[0] = pindex;
+			spreadIndices[1] = pindex - 1;
+		}
+	} else {
+		// Left to right
+		if (this.getPageSide(pindex) == 'L') {
+			spreadIndices[0] = pindex;
+			spreadIndices[1] = pindex + 1;
+		} else {
+			// Given index was RHS
+			spreadIndices[1] = pindex;
+			spreadIndices[0] = pindex - 1;
+		}
+	}
+	
+	return spreadIndices;
 }
 
 // For a given "accessible page index" return the page number in the book.
@@ -160,7 +163,7 @@ br.getSpreadIndices = function(pindex) {
 // For example, index 5 might correspond to "Page 1" if there is front matter such
 // as a title page and table of contents.
 br.getPageNum = function(index) {
-    return index+1;
+	return index+1;
 }
 
 // Total number of leafs
@@ -174,7 +177,7 @@ br.bookUrl  = '%s';
 br.imagesBaseURL = '/BookReader/images/';
 
 br.getEmbedCode = function(frameWidth, frameHeight, viewParams) {
-    return "Embed code not supported in bookreader demo.";
+	return "Embed code not supported in bookreader demo.";
 }
 
 // Let's go!
@@ -207,29 +210,33 @@ $('#btnSrch').hide();
 PAGE
 
 sub should_handle {
-    my($self, $file) = @_;
-    return -d $file || -f $file;
+	my($self, $file) = @_;
+	return -d $file || -f $file;
 }
 
 sub return_dir_redirect {
-    my ($self, $env) = @_;
-    my $uri = Plack::Request->new($env)->uri;
-    return [ 301,
-        [
-            'Location' => $uri . '/',
-            'Content-Type' => 'text/plain',
-            'Content-Length' => 8,
-        ],
-        [ 'Redirect' ],
-    ];
+	my ($self, $env) = @_;
+	my $uri = Plack::Request->new($env)->uri;
+	return [ 301,
+		[
+			'Location' => $uri . '/',
+			'Content-Type' => 'text/plain',
+			'Content-Length' => 8,
+		],
+		[ 'Redirect' ],
+	];
 }
 
-sub convert {
-	warn "# convert ",dump(@_);
+sub convert { gm('convert',@_) }
+sub montage { gm('montage',@_) }
+
+sub gm {
+	my $command = shift;
+	warn "# $command ",dump(@_);
 	my $t = time();
-	system 'gm', 'convert', @_;
+	system 'gm', $command, @_;
 	$t = time() - $t;
-	warn sprintf("## created %d bytes in %.2f s %s\n", -s $_[-1], $t, $_[-1]);
+	warn sprintf("## $command %d bytes in %.2f s %s\n", -s $_[-1], $t, $_[-1]);
 }
 
 sub longest_common_prefix {
@@ -252,27 +259,78 @@ sub sort_pages {
 	} @_;
 }
 
-sub convert_pdf {
-	my ($page, $cache_dir) = @_;
+sub convert_pdf_page {
+	my ($pdf, $page, $path) = @_;
+	my $t = time();
+
+	make_path $path;
+
+	warn "# pdfimages $page $pdf -> $path/\n";
+	system 'pdfimages', '-f', $page, '-l', $page, '-q', '-j', '-p', $pdf, "$path/p";
+
+	my @parts = ();
+	# glob split on spaces!
+	opendir(my $dh, $path);
+	while (readdir($dh)) {
+		my $full = "$path/$_";
+		warn "## readdir $full\n";
+		next unless -f $full; # skip . ..
+		push @parts, $_;
+	}
+	closedir $dh;
+
+	die "can't find images for $pdf in $path" unless $#parts >= 0;
+
+	@parts = sort_pages @parts;
 
+	my $image = "$path.jpg";
+
+	if ( $#parts == 0 ) { # single image
+			my $part = "$path/$parts[0]";
+			convert( $part => $image );
+	} else {
+			my @full = map { "$path/$_" } @parts;
+			montage( @full, '-tile', '1x'.scalar(@full), '-geometry', '+1+1' => $image );
+	}
+
+	die "$image: $!" unless -r $image;
+
+	remove_tree $path;
+
+	$t = time() - $t;
+	warn sprintf("## page: %d in %.2f s for %s\n", $page, $t, $image);
+	return $image;
+}
+
+sub render_pdf_page {
+	my ( $pdf, $page, $path ) = @_;
 	my $t = time();
+
+	warn "# pdftocairo $pdf\n";
+	system('pdftocairo', '-jpeg', '-f', $page, '-l', $page, $pdf, $path);
+
+	my $image = sprintf( '%s-%03d.jpg', $path, $page );
+
+	die "can't find $image: $!" unless -r $image;
+
 	$t = time() - $t;
-	warn sprintf("## %.2f s %s\n", $t);
+	warn sprintf("## page: %d in %.2f s for %s\n", $page, $t, $image);
+	return $image;
 }
 
 sub serve_path {
-    my($self, $env, $path, $fullpath) = @_;
+	my($self, $env, $path, $fullpath) = @_;
 
 	my $req = Plack::Request->new($env);
 
-    my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO};
+	my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO};
 	my @files = ();
 	my @page_files;
 
 	if ( -f $path && $path =~ s{/([^/]+\.pdf)$}{} ) {
 		push @page_files, $1;
 		warn "# single pdf: $path / $1\n";
-    } elsif (-f $path ) {
+	} elsif ( -f $path ) {
 
 		if ( my $reduce = $req->param('reduce') ) {
 			$reduce = int($reduce); # BookReader javascript somethimes returns float
@@ -286,12 +344,12 @@ sub serve_path {
 				convert( '-scale', ( 100 / $reduce ) .'%', $path => $cache_path );
 			}
 
-        	return $self->SUPER::serve_path($env, $cache_path, $fullpath);
+			return $self->SUPER::serve_path($env, $cache_path, $fullpath);
 
 		}
 
-        return $self->SUPER::serve_path($env, $path, $fullpath);
-     } elsif ( -d $path ) {
+		return $self->SUPER::serve_path($env, $path, $fullpath);
+	 } elsif ( -d $path ) {
 
 		if ($dir_url !~ m{/$}) {
 			return $self->return_dir_redirect($env);
@@ -333,64 +391,64 @@ sub serve_path {
 		warn "# page_files = ",dump( @page_files );
 	}
 
-    my $dir  = Plack::Util::encode_html( $env->{PATH_INFO} );
+	my $dir  = Plack::Util::encode_html( $env->{PATH_INFO} );
 	my $page = 'empty';
 
 	if ( $req->param('bookreader') ) {
 
-		my $pages; # []
-		my $pages_path = "cache/$dir_url/bookreader.json";
+		my $pages; # []:
+		my $pages_path = "meta/$dir_url/bookreader.json";
 		if ( -e $pages_path ) {
 			$pages = decode_json read_file $pages_path;
 		} else {
 			foreach my $page ( @page_files ) {
 				my $image = Graphics::Magick->new;
 				if ( $page =~ m/\.pdf$/ ) {
-					die "$path/$page: $!" unless -e "$path/$page";
-					my $cache_dir = "cache/$dir_url/$page/";
-					make_path $cache_dir;
-					warn "# pdfimages $path/$page -> $cache_dir";
-					system 'pdfimages', '-q', '-j', '-p', "$path/$page", $cache_dir;
-
-					my @pdf_pages = ();
-
-					# glob split on spaces!
-					opendir(my $dh, $cache_dir);
-					while (readdir($dh)) {
-						warn "## readdir = [$_]\n";
-						my $page = "$cache_dir/$_";
-						next unless -f $page; # skip . ..
-						push @pdf_pages, $page;
-					}
-					closedir $dh;
-
-					foreach $page ( sort_pages @pdf_pages ) {
-						if ( $page !~ m/\.jpg$/ ) {
-							convert( $page => $page . '.jpg' );
-							unlink $page;
-							$page .= '.jpg';
-						}
-
-						warn "## ping $page\n";
-						die "$page: $!" unless -r $page;
-						my ( $w, $h, $size, $format ) = $image->ping($page);
-						warn "## image size $w*$h $size $format $page\n";
-						push @$pages, [ "/$page", $w, $h ] if $w && $h;
+					die "$path/$page: $!" unless -r "$path/$page";
+
+					my $info = `pdfinfo "$path/$page"`;
+					warn "# pdfinfo $path/$page\n$info\n";
+					my $pdf_pages = $1 if ( $info =~ m/Pages:\s*(\d+)/s );
+					die "can't find number of pages for $path/$page in:\n$pdf_pages\n" unless $pdf_pages;
+
+					my $cache_path = "cache/$dir_url/$page";
+					my $txt = "$cache_path.txt";
+					make_basedir $txt;
+					system('pdftotext', "$path/$page", $txt);
+					warn "# pdftotext $txt ", -s $txt, " bytes\n";
+
+					my $is_bitmap = -s $txt == $pdf_pages;
+
+					$pdf_pages = $ENV{MAX_PAGES} if defined $ENV{MAX_PAGES} && $pdf_pages > $ENV{MAX_PAGES}; # FIXME
+
+					warn "DIAG: bitmap:$is_bitmap pdf_pages:$pdf_pages\n";
+
+					foreach my $nr ( 1 .. $pdf_pages ) {
+						my $page_url = $is_bitmap
+							? convert_pdf_page( "$path/$page", $nr, "$cache_path.$nr" )
+							: render_pdf_page(  "$path/$page", $nr, "$cache_path" )
+						;
+						warn "## ping $page_url\n";
+						my ( $w, $h, $size, $format ) = $image->ping($page_url);
+						warn "## image size $w*$h $size $format $page_url\n";
+						my $url = decode('utf-8',"/$page_url");
+						push @$pages, [ $url, $w, $h ] if $w && $h;
 					}
 
 				} else {
 					die "$path/$page: $!" unless -r "$path/$page";
 					my ( $w, $h, $size, $format ) = $image->ping("$path/$page");
 					warn "# image size $w*$h $size $format $path/$page\n";
-					push @$pages, [ "$dir_url/$page", $w, $h ] if $w && $h;
+					my $url = decode('utf-8',"$dir_url/$page");
+					push @$pages, [ $url, $w, $h ] if $w && $h;
 				}
 			}
 			make_basedir $pages_path;
-			write_file $pages_path => encode_json( $pages );
+			write_file $pages_path, => encode_json( $pages );
 			warn "# created $pages_path ", -s $pages_path, " bytes\n";
 		}
 		warn "# pages = ",dump($pages);
-		$page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, '..';
+		$page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, $dir =~ m/\/$/ ? '..' : '.';
 
 	} else {
 
@@ -404,7 +462,7 @@ sub serve_path {
 
 	}
 
-    return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ];
+	return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ];
 }
 
 1;