use Graphics::Magick;
use File::Slurp;
use JSON;
-use Time::Piece ();
-use Time::Seconds 'ONE_YEAR';
use autodie;
+use Time::HiRes qw(time);
+use Encode;
sub make_basedir {
my $path = shift;
return if -e $path;
$path =~ s{/[^/]+$}{} || die "no dir/file in $path";
- File::Path::make_path $path;
+ warn "# make_basedir $path\n";
+ -e $path ? 0 : File::Path::make_path $path;
}
# Stolen from rack/directory.rb
<title>%s</title>
<link rel="stylesheet" type="text/css" href="/BookReader/BookReader.css"/>
- <script type="text/javascript" src="http://www.archive.org/includes/jquery-1.4.2.min.js"></script>
- <script type="text/javascript" src="http://www.archive.org/bookreader/jquery-ui-1.8.5.custom.min.js"></script>
+ <script type="text/javascript" src="http://archive.org/includes/jquery-1.4.2.min.js"></script>
+ <script type="text/javascript" src="http://archive.org/bookreader/jquery-ui-1.8.5.custom.min.js"></script>
- <script type="text/javascript" src="http://www.archive.org/bookreader/dragscrollable.js"></script>
- <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.colorbox-min.js"></script>
- <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.ui.ipad.js"></script>
- <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.bt.min.js"></script>
+ <script type="text/javascript" src="http://archive.org/bookreader/dragscrollable.js"></script>
+ <script type="text/javascript" src="http://archive.org/bookreader/jquery.colorbox-min.js"></script>
+ <script type="text/javascript" src="http://archive.org/bookreader/jquery.ui.ipad.js"></script>
+ <script type="text/javascript" src="http://archive.org/bookreader/jquery.bt.min.js"></script>
<script type="text/javascript" src="/BookReader/BookReader.js"></script>
// Return the width of a given page. Here we assume all images are 800 pixels wide
br.getPageWidth = function(index) {
if ( ! pages[index] ) return;
- return parseInt( pages[index][1] );
+ return parseInt( pages[index][1] );
}
// Return the height of a given page. Here we assume all images are 1200 pixels high
br.getPageHeight = function(index) {
if ( ! pages[index] ) return;
- return parseInt( pages[index][2] );
+ return parseInt( pages[index][2] );
}
// We load the images from archive.org -- you can modify this function to retrieve images
// using a different URL structure
br.getPageURI = function(index, reduce, rotate) {
if ( ! pages[index] ) return;
- // reduce and rotate are ignored in this simple implementation, but we
- // could e.g. look at reduce and load images from a different directory
- // or pass the information to an image server
- var url = pages[index][0] + '?reduce='+reduce;
- console.debug('getPageURI', index, reduce, rotate, url);
- return url;
+ // reduce and rotate are ignored in this simple implementation, but we
+ // could e.g. look at reduce and load images from a different directory
+ // or pass the information to an image server
+ var r = 1 << ( Math.ceil(reduce).toString(2).length - 1 ); // reduce to nearest higher pow 2
+ var url = pages[index][0] + '?reduce='+r;
+ console.debug('getPageURI', index, reduce, r, rotate, url);
+ return url;
}
// Return which side, left or right, that a given page should be displayed on
br.getPageSide = function(index) {
- if (0 == (index & 0x1)) {
- return 'R';
- } else {
- return 'L';
- }
+ if (0 == (index & 0x1)) {
+ return 'R';
+ } else {
+ return 'L';
+ }
}
// This function returns the left and right indices for the user-visible
// spread that contains the given index. The return values may be
// null if there is no facing page or the index is invalid.
br.getSpreadIndices = function(pindex) {
- var spreadIndices = [null, null];
- if ('rl' == this.pageProgression) {
- // Right to Left
- if (this.getPageSide(pindex) == 'R') {
- spreadIndices[1] = pindex;
- spreadIndices[0] = pindex + 1;
- } else {
- // Given index was LHS
- spreadIndices[0] = pindex;
- spreadIndices[1] = pindex - 1;
- }
- } else {
- // Left to right
- if (this.getPageSide(pindex) == 'L') {
- spreadIndices[0] = pindex;
- spreadIndices[1] = pindex + 1;
- } else {
- // Given index was RHS
- spreadIndices[1] = pindex;
- spreadIndices[0] = pindex - 1;
- }
- }
-
- return spreadIndices;
+ var spreadIndices = [null, null];
+ if ('rl' == this.pageProgression) {
+ // Right to Left
+ if (this.getPageSide(pindex) == 'R') {
+ spreadIndices[1] = pindex;
+ spreadIndices[0] = pindex + 1;
+ } else {
+ // Given index was LHS
+ spreadIndices[0] = pindex;
+ spreadIndices[1] = pindex - 1;
+ }
+ } else {
+ // Left to right
+ if (this.getPageSide(pindex) == 'L') {
+ spreadIndices[0] = pindex;
+ spreadIndices[1] = pindex + 1;
+ } else {
+ // Given index was RHS
+ spreadIndices[1] = pindex;
+ spreadIndices[0] = pindex - 1;
+ }
+ }
+
+ return spreadIndices;
}
// For a given "accessible page index" return the page number in the book.
// For example, index 5 might correspond to "Page 1" if there is front matter such
// as a title page and table of contents.
br.getPageNum = function(index) {
- return index+1;
+ return index+1;
}
// Total number of leafs
br.imagesBaseURL = '/BookReader/images/';
br.getEmbedCode = function(frameWidth, frameHeight, viewParams) {
- return "Embed code not supported in bookreader demo.";
+ return "Embed code not supported in bookreader demo.";
}
// Let's go!
PAGE
sub should_handle {
- my($self, $file) = @_;
- return -d $file || -f $file;
+ my($self, $file) = @_;
+ return -d $file || -f $file;
}
sub return_dir_redirect {
- my ($self, $env) = @_;
- my $uri = Plack::Request->new($env)->uri;
- return [ 301,
- [
- 'Location' => $uri . '/',
- 'Content-Type' => 'text/plain',
- 'Content-Length' => 8,
- ],
- [ 'Redirect' ],
- ];
+ my ($self, $env) = @_;
+ my $uri = Plack::Request->new($env)->uri;
+ return [ 301,
+ [
+ 'Location' => $uri . '/',
+ 'Content-Type' => 'text/plain',
+ 'Content-Length' => 8,
+ ],
+ [ 'Redirect' ],
+ ];
+}
+
+sub convert { gm('convert',@_) }
+sub montage { gm('montage',@_) }
+
+sub gm {
+ my $command = shift;
+ warn "# $command ",dump(@_);
+ my $t = time();
+ system 'gm', $command, @_;
+ $t = time() - $t;
+ warn sprintf("## $command %d bytes in %.2f s %s\n", -s $_[-1], $t, $_[-1]);
+}
+
+sub longest_common_prefix {
+ my $prefix = shift;
+ for (@_) {
+ chop $prefix while (! /^\Q$prefix\E/i);
+ }
+ warn "# longest_common_prefix [$prefix]\n";
+ return $prefix;
+}
+
+sub sort_pages {
+ my $prefix = longest_common_prefix @_;
+ sort {
+ my ( $an,$bn ) = ( $a,$b );
+ $an =~ s/^\Q$prefix\E//i; $an =~ s/\D+//g;
+ $bn =~ s/^\Q$prefix\E//i; $bn =~ s/\D+//g;
+ warn "## sort [$a] $an <=> $bn [$b]\n";
+ $an <=> $bn;
+ } @_;
+}
+
+sub convert_pdf_page {
+ my ($pdf, $page, $path) = @_;
+ my $t = time();
+
+ make_path $path;
+
+ warn "# pdfimages $page $pdf -> $path/\n";
+ system 'pdfimages', '-f', $page, '-l', $page, '-q', '-j', '-p', $pdf, "$path/p";
+
+ my @parts = ();
+ # glob split on spaces!
+ opendir(my $dh, $path);
+ while (readdir($dh)) {
+ my $full = "$path/$_";
+ warn "## readdir $full\n";
+ next unless -f $full; # skip . ..
+ push @parts, $_;
+ }
+ closedir $dh;
+
+ die "can't find images for $pdf in $path" unless $#parts >= 0;
+
+ @parts = sort_pages @parts;
+
+ my $image = "$path.jpg";
+
+ if ( $#parts == 0 ) { # single image
+ my $part = "$path/$parts[0]";
+ convert( $part => $image );
+ } else {
+ my @full = map { "$path/$_" } @parts;
+ montage( @full, '-tile', '1x'.scalar(@full), '-geometry', '+1+1' => $image );
+ }
+
+ die "$image: $!" unless -r $image;
+
+ remove_tree $path;
+
+ $t = time() - $t;
+ warn sprintf("## page: %d in %.2f s for %s\n", $page, $t, $image);
+ return $image;
+}
+
+sub render_pdf_page {
+ my ( $pdf, $page, $path ) = @_;
+ my $t = time();
+
+ warn "# pdftocairo $pdf\n";
+ system('pdftocairo', '-jpeg', '-f', $page, '-l', $page, $pdf, $path);
+
+ my $image = sprintf( '%s-%03d.jpg', $path, $page );
+
+ die "can't find $image: $!" unless -r $image;
+
+ $t = time() - $t;
+ warn sprintf("## page: %d in %.2f s for %s\n", $page, $t, $image);
+ return $image;
}
sub serve_path {
- my($self, $env, $path, $fullpath) = @_;
+ my($self, $env, $path, $fullpath) = @_;
my $req = Plack::Request->new($env);
- my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO};
+ my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO};
+ my @files = ();
+ my @page_files;
- if (-f $path) {
+ if ( -f $path && $path =~ s{/([^/]+\.pdf)$}{} ) {
+ push @page_files, $1;
+ warn "# single pdf: $path / $1\n";
+ } elsif ( -f $path ) {
if ( my $reduce = $req->param('reduce') ) {
$reduce = int($reduce); # BookReader javascript somethimes returns float
if ( $reduce <= 1 && $path =~ m/\.jpe?g$/ ) {
$cache_path = $path;
} elsif ( ! -e $cache_path ) {
- my $image = Graphics::Magick->new;
- warn "## Read $path ", -s $path, " bytes\n";
- $image->Read($path);
- my ( $w, $h ) = $image->Get('width','height');
- $image->Resize(
- width => $w / $reduce,
- height => $h / $reduce
- );
make_basedir $cache_path;
- $image->Write( filename => $cache_path );
- warn "# created $cache_path ", -s $cache_path, " bytes\n";
+ convert( '-scale', ( 100 / $reduce ) .'%', $path => $cache_path );
}
- return $self->SUPER::serve_path($env, $cache_path, $fullpath);
+ return $self->SUPER::serve_path($env, $cache_path, $fullpath);
}
- return $self->SUPER::serve_path($env, $path, $fullpath);
- }
-
- if ($dir_url !~ m{/$}) {
- return $self->return_dir_redirect($env);
- }
+ return $self->SUPER::serve_path($env, $path, $fullpath);
+ } elsif ( -d $path ) {
- my @files = ();
+ if ($dir_url !~ m{/$}) {
+ return $self->return_dir_redirect($env);
+ }
- my $dh = DirHandle->new($path);
- my @children;
- while (defined(my $ent = $dh->read)) {
- next if $ent eq '.';
- push @children, $ent;
- }
+ my $dh = DirHandle->new($path);
+ my @children;
+ while (defined(my $ent = $dh->read)) {
+ next if $ent eq '.';
+ push @children, $ent;
+ }
- my @page_files;
+ for my $basename (sort { $a cmp $b } @children) {
+ push @page_files, $basename if $basename =~ m/\d+\D?\.(jpg|gif|pdf)$/;
+ my $file = "$path/$basename";
+ my $url = $dir_url . $basename;
- for my $basename (sort { $a cmp $b } @children) {
- push @page_files, $basename if $basename =~ m/\d+\.(jpg|gif|pdf)$/;
- my $file = "$path/$basename";
- my $url = $dir_url . $basename;
+ my $is_dir = -d $file;
+ my @stat = stat _;
- my $is_dir = -d $file;
- my @stat = stat _;
+ $url = join '/', map {uri_escape($_)} split m{/}, $url;
- $url = join '/', map {uri_escape($_)} split m{/}, $url;
+ if ($is_dir) {
+ $basename .= "/";
+ $url .= "/";
+ }
- if ($is_dir) {
- $basename .= "/";
- $url .= "/";
- }
+ my $mime_type = $is_dir ? 'directory' : ( Plack::MIME->mime_type($file) || 'text/plain' );
+ push @files, [ $url, $basename, $stat[7], $mime_type, HTTP::Date::time2str($stat[9]) ];
+ }
- my $mime_type = $is_dir ? 'directory' : ( Plack::MIME->mime_type($file) || 'text/plain' );
- push @files, [ $url, $basename, $stat[7], $mime_type, HTTP::Date::time2str($stat[9]) ];
- }
+ } else {
+ die "Unsupported format: $path";
+ }
- warn "# page_files = ",dump( @page_files );
+ if ( @page_files ) {
+ @page_files = sort_pages @page_files;
+ warn "# page_files = ",dump( @page_files );
+ }
- my $dir = Plack::Util::encode_html( $env->{PATH_INFO} );
+ my $dir = Plack::Util::encode_html( $env->{PATH_INFO} );
my $page = 'empty';
if ( $req->param('bookreader') ) {
- my $pages; # []
- my $pages_path = "cache/$dir_url/bookreader.json";
- if ( 0 && -e $pages_path ) {
+ my $pages; # []:
+ my $pages_path = "meta/$dir_url/bookreader.json";
+ if ( -e $pages_path ) {
$pages = decode_json read_file $pages_path;
} else {
- foreach my $page ( sort { $a <=> $b } @page_files ) {
+ foreach my $page ( @page_files ) {
my $image = Graphics::Magick->new;
if ( $page =~ m/\.pdf$/ ) {
- my $cache_dir = "cache/$dir_url/$page/";
- make_path $cache_dir;
- warn "# pdfimages $path/$page -> $cache_dir";
- system 'pdfimages', '-q', '-j', '-p', "$path/$page", $cache_dir;
-
- # glob split on spaces!
- opendir(my $dh, $cache_dir);
- while (readdir($dh)) {
- warn "## readdir = [$_]\n";
- my $page = "$cache_dir/$_";
- next unless -f $page; # skip . ..
-
- if ( $page !~ m/\.jpg$/ ) {
- warn "# convert to jpg";
- system 'gm', 'convert', $page, $page . '.jpg';
- unlink $page;
- $page .= '.jpg';
- }
-
- warn "## ping $page\n";
- die "$page: $!" unless -r $page;
- my ( $w, $h, $size, $format ) = $image->ping($page);
- warn "## image size $w*$h $size $format $page\n";
- push @$pages, [ "/$page", $w, $h ] if $w && $h;
+ die "$path/$page: $!" unless -r "$path/$page";
+
+ my $info = `pdfinfo "$path/$page"`;
+ warn "# pdfinfo $path/$page\n$info\n";
+ my $pdf_pages = $1 if ( $info =~ m/Pages:\s*(\d+)/s );
+ die "can't find number of pages for $path/$page in:\n$pdf_pages\n" unless $pdf_pages;
+
+ my $cache_path = "cache/$dir_url/$page";
+ my $txt = "$cache_path.txt";
+ make_basedir $txt;
+ system('pdftotext', "$path/$page", $txt);
+ warn "# pdftotext $txt ", -s $txt, " bytes\n";
+
+ my $is_bitmap = -s $txt == $pdf_pages;
+
+ $pdf_pages = $ENV{MAX_PAGES} if defined $ENV{MAX_PAGES} && $pdf_pages > $ENV{MAX_PAGES}; # FIXME
+
+ warn "DIAG: bitmap:$is_bitmap pdf_pages:$pdf_pages\n";
+
+ foreach my $nr ( 1 .. $pdf_pages ) {
+ my $page_url = $is_bitmap
+ ? convert_pdf_page( "$path/$page", $nr, "$cache_path.$nr" )
+ : render_pdf_page( "$path/$page", $nr, "$cache_path" )
+ ;
+ warn "## ping $page_url\n";
+ my ( $w, $h, $size, $format ) = $image->ping($page_url);
+ warn "## image size $w*$h $size $format $page_url\n";
+ my $url = decode('utf-8',"/$page_url");
+ push @$pages, [ $url, $w, $h ] if $w && $h;
}
- closedir $dh;
} else {
die "$path/$page: $!" unless -r "$path/$page";
my ( $w, $h, $size, $format ) = $image->ping("$path/$page");
warn "# image size $w*$h $size $format $path/$page\n";
- push @$pages, [ "$dir_url/$page", $w, $h ] if $w && $h;
+ my $url = decode('utf-8',"$dir_url/$page");
+ push @$pages, [ $url, $w, $h ] if $w && $h;
}
}
make_basedir $pages_path;
- write_file $pages_path => encode_json( $pages );
+ write_file $pages_path, => encode_json( $pages );
warn "# created $pages_path ", -s $pages_path, " bytes\n";
}
warn "# pages = ",dump($pages);
- $page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, '..';
+ $page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, $dir =~ m/\/$/ ? '..' : '.';
} else {
}
- return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ];
+ return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ];
}
1;