X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=plack%2Flib%2FPlack%2FApp%2FBookReader.pm;h=72d265553c003663f6ba72600b5c02cfe6273d68;hb=e7bcdbc42e8c7bbe995673f1f1efafb4f0fe8865;hp=e12b1691613bbcffa1e19a5a81a3954babf57af0;hpb=f58d3a78c645f48f96e086f436a8992af916578e;p=bookreader.git diff --git a/plack/lib/Plack/App/BookReader.pm b/plack/lib/Plack/App/BookReader.pm index e12b169..72d2655 100644 --- a/plack/lib/Plack/App/BookReader.pm +++ b/plack/lib/Plack/App/BookReader.pm @@ -15,12 +15,14 @@ use File::Slurp; use JSON; use autodie; use Time::HiRes qw(time); +use Encode; sub make_basedir { my $path = shift; return if -e $path; $path =~ s{/[^/]+$}{} || die "no dir/file in $path"; - File::Path::make_path $path; + warn "# make_basedir $path\n"; + -e $path ? 0 : File::Path::make_path $path; } # Stolen from rack/directory.rb @@ -60,13 +62,13 @@ my $reader_page = <<'PAGE'; %s - - + + - - - - + + + + @@ -95,64 +97,65 @@ var pages = %s; // Return the width of a given page. Here we assume all images are 800 pixels wide br.getPageWidth = function(index) { if ( ! pages[index] ) return; - return parseInt( pages[index][1] ); + return parseInt( pages[index][1] ); } // Return the height of a given page. Here we assume all images are 1200 pixels high br.getPageHeight = function(index) { if ( ! pages[index] ) return; - return parseInt( pages[index][2] ); + return parseInt( pages[index][2] ); } // We load the images from archive.org -- you can modify this function to retrieve images // using a different URL structure br.getPageURI = function(index, reduce, rotate) { if ( ! pages[index] ) return; - // reduce and rotate are ignored in this simple implementation, but we - // could e.g. look at reduce and load images from a different directory - // or pass the information to an image server - var url = pages[index][0] + '?reduce='+reduce; - console.debug('getPageURI', index, reduce, rotate, url); - return url; + // reduce and rotate are ignored in this simple implementation, but we + // could e.g. look at reduce and load images from a different directory + // or pass the information to an image server + var r = 1 << ( Math.ceil(reduce).toString(2).length - 1 ); // reduce to nearest higher pow 2 + var url = pages[index][0] + '?reduce='+r; + console.debug('getPageURI', index, reduce, r, rotate, url); + return url; } // Return which side, left or right, that a given page should be displayed on br.getPageSide = function(index) { - if (0 == (index & 0x1)) { - return 'R'; - } else { - return 'L'; - } + if (0 == (index & 0x1)) { + return 'R'; + } else { + return 'L'; + } } // This function returns the left and right indices for the user-visible // spread that contains the given index. The return values may be // null if there is no facing page or the index is invalid. br.getSpreadIndices = function(pindex) { - var spreadIndices = [null, null]; - if ('rl' == this.pageProgression) { - // Right to Left - if (this.getPageSide(pindex) == 'R') { - spreadIndices[1] = pindex; - spreadIndices[0] = pindex + 1; - } else { - // Given index was LHS - spreadIndices[0] = pindex; - spreadIndices[1] = pindex - 1; - } - } else { - // Left to right - if (this.getPageSide(pindex) == 'L') { - spreadIndices[0] = pindex; - spreadIndices[1] = pindex + 1; - } else { - // Given index was RHS - spreadIndices[1] = pindex; - spreadIndices[0] = pindex - 1; - } - } - - return spreadIndices; + var spreadIndices = [null, null]; + if ('rl' == this.pageProgression) { + // Right to Left + if (this.getPageSide(pindex) == 'R') { + spreadIndices[1] = pindex; + spreadIndices[0] = pindex + 1; + } else { + // Given index was LHS + spreadIndices[0] = pindex; + spreadIndices[1] = pindex - 1; + } + } else { + // Left to right + if (this.getPageSide(pindex) == 'L') { + spreadIndices[0] = pindex; + spreadIndices[1] = pindex + 1; + } else { + // Given index was RHS + spreadIndices[1] = pindex; + spreadIndices[0] = pindex - 1; + } + } + + return spreadIndices; } // For a given "accessible page index" return the page number in the book. @@ -160,7 +163,7 @@ br.getSpreadIndices = function(pindex) { // For example, index 5 might correspond to "Page 1" if there is front matter such // as a title page and table of contents. br.getPageNum = function(index) { - return index+1; + return index+1; } // Total number of leafs @@ -174,7 +177,7 @@ br.bookUrl = '%s'; br.imagesBaseURL = '/BookReader/images/'; br.getEmbedCode = function(frameWidth, frameHeight, viewParams) { - return "Embed code not supported in bookreader demo."; + return "Embed code not supported in bookreader demo."; } // Let's go! @@ -207,29 +210,33 @@ $('#btnSrch').hide(); PAGE sub should_handle { - my($self, $file) = @_; - return -d $file || -f $file; + my($self, $file) = @_; + return -d $file || -f $file; } sub return_dir_redirect { - my ($self, $env) = @_; - my $uri = Plack::Request->new($env)->uri; - return [ 301, - [ - 'Location' => $uri . '/', - 'Content-Type' => 'text/plain', - 'Content-Length' => 8, - ], - [ 'Redirect' ], - ]; + my ($self, $env) = @_; + my $uri = Plack::Request->new($env)->uri; + return [ 301, + [ + 'Location' => $uri . '/', + 'Content-Type' => 'text/plain', + 'Content-Length' => 8, + ], + [ 'Redirect' ], + ]; } -sub convert { - warn "# convert ",dump(@_); +sub convert { gm('convert',@_) } +sub montage { gm('montage',@_) } + +sub gm { + my $command = shift; + warn "# $command ",dump(@_); my $t = time(); - system 'gm', 'convert', @_; + system 'gm', $command, @_; $t = time() - $t; - warn sprintf("## created %d bytes in %.2f s %s\n", -s $_[-1], $t, $_[-1]); + warn sprintf("## $command %d bytes in %.2f s %s\n", -s $_[-1], $t, $_[-1]); } sub longest_common_prefix { @@ -252,27 +259,78 @@ sub sort_pages { } @_; } -sub convert_pdf { - my ($page, $cache_dir) = @_; +sub convert_pdf_page { + my ($pdf, $page, $path) = @_; + my $t = time(); + + make_path $path; + + warn "# pdfimages $page $pdf -> $path/\n"; + system 'pdfimages', '-f', $page, '-l', $page, '-q', '-j', '-p', $pdf, "$path/p"; + + my @parts = (); + # glob split on spaces! + opendir(my $dh, $path); + while (readdir($dh)) { + my $full = "$path/$_"; + warn "## readdir $full\n"; + next unless -f $full; # skip . .. + push @parts, $_; + } + closedir $dh; + + die "can't find images for $pdf in $path" unless $#parts >= 0; + + @parts = sort_pages @parts; + my $image = "$path.jpg"; + + if ( $#parts == 0 ) { # single image + my $part = "$path/$parts[0]"; + convert( $part => $image ); + } else { + my @full = map { "$path/$_" } @parts; + montage( @full, '-tile', '1x'.scalar(@full), '-geometry', '+1+1' => $image ); + } + + die "$image: $!" unless -r $image; + + remove_tree $path; + + $t = time() - $t; + warn sprintf("## page: %d in %.2f s for %s\n", $page, $t, $image); + return $image; +} + +sub render_pdf_page { + my ( $pdf, $page, $path ) = @_; my $t = time(); + + warn "# pdftocairo $pdf\n"; + system('pdftocairo', '-jpeg', '-f', $page, '-l', $page, $pdf, $path); + + my $image = sprintf( '%s-%03d.jpg', $path, $page ); + + die "can't find $image: $!" unless -r $image; + $t = time() - $t; - warn sprintf("## %.2f s %s\n", $t); + warn sprintf("## page: %d in %.2f s for %s\n", $page, $t, $image); + return $image; } sub serve_path { - my($self, $env, $path, $fullpath) = @_; + my($self, $env, $path, $fullpath) = @_; my $req = Plack::Request->new($env); - my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO}; + my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO}; my @files = (); my @page_files; if ( -f $path && $path =~ s{/([^/]+\.pdf)$}{} ) { push @page_files, $1; warn "# single pdf: $path / $1\n"; - } elsif (-f $path ) { + } elsif ( -f $path ) { if ( my $reduce = $req->param('reduce') ) { $reduce = int($reduce); # BookReader javascript somethimes returns float @@ -286,12 +344,12 @@ sub serve_path { convert( '-scale', ( 100 / $reduce ) .'%', $path => $cache_path ); } - return $self->SUPER::serve_path($env, $cache_path, $fullpath); + return $self->SUPER::serve_path($env, $cache_path, $fullpath); } - return $self->SUPER::serve_path($env, $path, $fullpath); - } elsif ( -d $path ) { + return $self->SUPER::serve_path($env, $path, $fullpath); + } elsif ( -d $path ) { if ($dir_url !~ m{/$}) { return $self->return_dir_redirect($env); @@ -333,64 +391,64 @@ sub serve_path { warn "# page_files = ",dump( @page_files ); } - my $dir = Plack::Util::encode_html( $env->{PATH_INFO} ); + my $dir = Plack::Util::encode_html( $env->{PATH_INFO} ); my $page = 'empty'; if ( $req->param('bookreader') ) { - my $pages; # [] - my $pages_path = "cache/$dir_url/bookreader.json"; + my $pages; # []: + my $pages_path = "meta/$dir_url/bookreader.json"; if ( -e $pages_path ) { $pages = decode_json read_file $pages_path; } else { foreach my $page ( @page_files ) { my $image = Graphics::Magick->new; if ( $page =~ m/\.pdf$/ ) { - die "$path/$page: $!" unless -e "$path/$page"; - my $cache_dir = "cache/$dir_url/$page/"; - make_path $cache_dir; - warn "# pdfimages $path/$page -> $cache_dir"; - system 'pdfimages', '-q', '-j', '-p', "$path/$page", $cache_dir; - - my @pdf_pages = (); - - # glob split on spaces! - opendir(my $dh, $cache_dir); - while (readdir($dh)) { - warn "## readdir = [$_]\n"; - my $page = "$cache_dir/$_"; - next unless -f $page; # skip . .. - push @pdf_pages, $page; - } - closedir $dh; - - foreach $page ( sort_pages @pdf_pages ) { - if ( $page !~ m/\.jpg$/ ) { - convert( $page => $page . '.jpg' ); - unlink $page; - $page .= '.jpg'; - } - - warn "## ping $page\n"; - die "$page: $!" unless -r $page; - my ( $w, $h, $size, $format ) = $image->ping($page); - warn "## image size $w*$h $size $format $page\n"; - push @$pages, [ "/$page", $w, $h ] if $w && $h; + die "$path/$page: $!" unless -r "$path/$page"; + + my $info = `pdfinfo "$path/$page"`; + warn "# pdfinfo $path/$page\n$info\n"; + my $pdf_pages = $1 if ( $info =~ m/Pages:\s*(\d+)/s ); + die "can't find number of pages for $path/$page in:\n$pdf_pages\n" unless $pdf_pages; + + my $cache_path = "cache/$dir_url/$page"; + my $txt = "$cache_path.txt"; + make_basedir $txt; + system('pdftotext', "$path/$page", $txt); + warn "# pdftotext $txt ", -s $txt, " bytes\n"; + + my $is_bitmap = -s $txt == $pdf_pages; + + $pdf_pages = $ENV{MAX_PAGES} if defined $ENV{MAX_PAGES} && $pdf_pages > $ENV{MAX_PAGES}; # FIXME + + warn "DIAG: bitmap:$is_bitmap pdf_pages:$pdf_pages\n"; + + foreach my $nr ( 1 .. $pdf_pages ) { + my $page_url = $is_bitmap + ? convert_pdf_page( "$path/$page", $nr, "$cache_path.$nr" ) + : render_pdf_page( "$path/$page", $nr, "$cache_path" ) + ; + warn "## ping $page_url\n"; + my ( $w, $h, $size, $format ) = $image->ping($page_url); + warn "## image size $w*$h $size $format $page_url\n"; + my $url = decode('utf-8',"/$page_url"); + push @$pages, [ $url, $w, $h ] if $w && $h; } } else { die "$path/$page: $!" unless -r "$path/$page"; my ( $w, $h, $size, $format ) = $image->ping("$path/$page"); warn "# image size $w*$h $size $format $path/$page\n"; - push @$pages, [ "$dir_url/$page", $w, $h ] if $w && $h; + my $url = decode('utf-8',"$dir_url/$page"); + push @$pages, [ $url, $w, $h ] if $w && $h; } } make_basedir $pages_path; - write_file $pages_path => encode_json( $pages ); + write_file $pages_path, => encode_json( $pages ); warn "# created $pages_path ", -s $pages_path, " bytes\n"; } warn "# pages = ",dump($pages); - $page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, '..'; + $page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, $dir =~ m/\/$/ ? '..' : '.'; } else { @@ -404,7 +462,7 @@ sub serve_path { } - return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ]; + return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ]; } 1;