1 package Plack::App::BookReader;
2 use parent qw(Plack::App::File);
11 use Data::Dump qw(dump);
12 use File::Path qw(make_path remove_tree);
17 use Time::HiRes qw(time);
23 $path =~ s{/[^/]+$}{} || die "no dir/file in $path";
24 warn "# make_basedir $path\n";
25 -e $path ? 0 : File::Path::make_path $path;
28 # Stolen from rack/directory.rb
29 my $dir_file = "<tr><td class='name'><a href='%s'>%s</a></td><td class='size'>%s</td><td class='type'>%s</td><td class='mtime'>%s</td></tr>";
30 my $dir_page = <<PAGE;
33 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
34 <style type='text/css'>
35 table { width:100%%; }
36 .name { text-align:left; }
37 .size, .mtime { text-align:right; }
39 .mtime { width:15em; }
46 <th class='name'>Name</th>
47 <th class='size'>Size</th>
48 <th class='type'>Type</th>
49 <th class='mtime'>Last Modified</th>
58 my $reader_page = <<'PAGE';
59 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
64 <link rel="stylesheet" type="text/css" href="/BookReader/BookReader.css"/>
65 <script type="text/javascript" src="http://archive.org/includes/jquery-1.4.2.min.js"></script>
66 <script type="text/javascript" src="http://archive.org/bookreader/jquery-ui-1.8.5.custom.min.js"></script>
68 <script type="text/javascript" src="http://archive.org/bookreader/dragscrollable.js"></script>
69 <script type="text/javascript" src="http://archive.org/bookreader/jquery.colorbox-min.js"></script>
70 <script type="text/javascript" src="http://archive.org/bookreader/jquery.ui.ipad.js"></script>
71 <script type="text/javascript" src="http://archive.org/bookreader/jquery.bt.min.js"></script>
73 <script type="text/javascript" src="/BookReader/BookReader.js"></script>
75 <style type="text/css">
77 /* Hide print and embed functionality */
78 #BRtoolbar .embed, .print {
84 <script type="text/javascript">
85 $(document).ready( function() {
88 // This file shows the minimum you need to provide to BookReader to display a book
90 // Copyright(c)2008-2009 Internet Archive. Software license AGPL version 3.
92 // Create the BookReader object
93 var br = new BookReader();
97 // Return the width of a given page. Here we assume all images are 800 pixels wide
98 br.getPageWidth = function(index) {
99 if ( ! pages[index] ) return;
100 return parseInt( pages[index][1] );
103 // Return the height of a given page. Here we assume all images are 1200 pixels high
104 br.getPageHeight = function(index) {
105 if ( ! pages[index] ) return;
106 return parseInt( pages[index][2] );
109 // We load the images from archive.org -- you can modify this function to retrieve images
110 // using a different URL structure
111 br.getPageURI = function(index, reduce, rotate) {
112 if ( ! pages[index] ) return;
113 // reduce and rotate are ignored in this simple implementation, but we
114 // could e.g. look at reduce and load images from a different directory
115 // or pass the information to an image server
116 var r = 1 << ( Math.ceil(reduce).toString(2).length - 1 ); // reduce to nearest higher pow 2
117 var url = pages[index][0] + '?reduce='+r;
118 console.debug('getPageURI', index, reduce, r, rotate, url);
122 // Return which side, left or right, that a given page should be displayed on
123 br.getPageSide = function(index) {
124 if (0 == (index & 0x1)) {
131 // This function returns the left and right indices for the user-visible
132 // spread that contains the given index. The return values may be
133 // null if there is no facing page or the index is invalid.
134 br.getSpreadIndices = function(pindex) {
135 var spreadIndices = [null, null];
136 if ('rl' == this.pageProgression) {
138 if (this.getPageSide(pindex) == 'R') {
139 spreadIndices[1] = pindex;
140 spreadIndices[0] = pindex + 1;
142 // Given index was LHS
143 spreadIndices[0] = pindex;
144 spreadIndices[1] = pindex - 1;
148 if (this.getPageSide(pindex) == 'L') {
149 spreadIndices[0] = pindex;
150 spreadIndices[1] = pindex + 1;
152 // Given index was RHS
153 spreadIndices[1] = pindex;
154 spreadIndices[0] = pindex - 1;
158 return spreadIndices;
161 // For a given "accessible page index" return the page number in the book.
163 // For example, index 5 might correspond to "Page 1" if there is front matter such
164 // as a title page and table of contents.
165 br.getPageNum = function(index) {
169 // Total number of leafs
170 br.numLeafs = pages.length;
172 // Book title and the URL used for the book title link
176 // Override the path used to find UI images
177 br.imagesBaseURL = '/BookReader/images/';
179 br.getEmbedCode = function(frameWidth, frameHeight, viewParams) {
180 return "Embed code not supported in bookreader demo.";
186 // read-aloud and search need backend compenents and are not supported in the demo
187 $('#BRtoolbar').find('.read').hide();
188 $('#textSrch').hide();
189 $('#btnSrch').hide();
195 <body style="background-color: ##939598;">
197 <div id="BookReader">
198 Internet Archive BookReader<br/>
202 The BookReader requires JavaScript to be enabled. Please check that your browser supports JavaScript and that it is enabled in the browser settings.
213 my($self, $file) = @_;
214 return -d $file || -f $file;
217 sub return_dir_redirect {
218 my ($self, $env) = @_;
219 my $uri = Plack::Request->new($env)->uri;
222 'Location' => $uri . '/',
223 'Content-Type' => 'text/plain',
224 'Content-Length' => 8,
230 sub convert { gm('convert',@_) }
231 sub montage { gm('montage',@_) }
235 warn "# $command ",dump(@_);
237 system 'gm', $command, @_;
239 warn sprintf("## $command %d bytes in %.2f s %s\n", -s $_[-1], $t, $_[-1]);
242 sub longest_common_prefix {
245 chop $prefix while (! /^\Q$prefix\E/i);
247 warn "# longest_common_prefix [$prefix]\n";
252 my $prefix = longest_common_prefix @_;
254 my ( $an,$bn ) = ( $a,$b );
255 $an =~ s/^\Q$prefix\E//i; $an =~ s/\D+//g;
256 $bn =~ s/^\Q$prefix\E//i; $bn =~ s/\D+//g;
257 warn "## sort [$a] $an <=> $bn [$b]\n";
262 sub convert_pdf_page {
263 my ($pdf, $page, $path) = @_;
268 warn "# pdfimages $page $pdf -> $path/\n";
269 system 'pdfimages', '-f', $page, '-l', $page, '-q', '-j', '-p', $pdf, "$path/p";
272 # glob split on spaces!
273 opendir(my $dh, $path);
274 while (readdir($dh)) {
275 my $full = "$path/$_";
276 warn "## readdir $full\n";
277 next unless -f $full; # skip . ..
282 die "can't find images for $pdf in $path" unless $#parts >= 0;
284 @parts = sort_pages @parts;
286 my $image = "$path.jpg";
288 if ( $#parts == 0 ) { # single image
289 my $part = "$path/$parts[0]";
290 convert( $part => $image );
292 my @full = map { "$path/$_" } @parts;
293 montage( @full, '-tile', '1x'.scalar(@full), '-geometry', '+1+1' => $image );
296 die "$image: $!" unless -r $image;
301 warn sprintf("## page: %d in %.2f s for %s\n", $page, $t, $image);
305 sub render_pdf_page {
306 my ( $pdf, $page, $path ) = @_;
309 warn "# pdftocairo $pdf\n";
310 system('pdftocairo', '-jpeg', '-f', $page, '-l', $page, $pdf, $path);
312 my $image = sprintf( '%s-%03d.jpg', $path, $page );
314 die "can't find $image: $!" unless -r $image;
317 warn sprintf("## page: %d in %.2f s for %s\n", $page, $t, $image);
322 my($self, $env, $path, $fullpath) = @_;
324 my $req = Plack::Request->new($env);
326 my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO};
330 if ( -f $path && $path =~ s{/([^/]+\.pdf)$}{} ) {
331 push @page_files, $1;
332 warn "# single pdf: $path / $1\n";
333 } elsif (-f $path ) {
335 if ( my $reduce = $req->param('reduce') ) {
336 $reduce = int($reduce); # BookReader javascript somethimes returns float
337 warn "# reduce $reduce $path\n";
339 my $cache_path = "cache/$dir_url.reduce.$reduce.jpg";
340 if ( $reduce <= 1 && $path =~ m/\.jpe?g$/ ) {
342 } elsif ( ! -e $cache_path ) {
343 make_basedir $cache_path;
344 convert( '-scale', ( 100 / $reduce ) .'%', $path => $cache_path );
347 return $self->SUPER::serve_path($env, $cache_path, $fullpath);
351 return $self->SUPER::serve_path($env, $path, $fullpath);
352 } elsif ( -d $path ) {
354 if ($dir_url !~ m{/$}) {
355 return $self->return_dir_redirect($env);
358 my $dh = DirHandle->new($path);
360 while (defined(my $ent = $dh->read)) {
362 push @children, $ent;
365 for my $basename (sort { $a cmp $b } @children) {
366 push @page_files, $basename if $basename =~ m/\d+\D?\.(jpg|gif|pdf)$/;
367 my $file = "$path/$basename";
368 my $url = $dir_url . $basename;
370 my $is_dir = -d $file;
374 $url = join '/', map {uri_escape($_)} split m{/}, $url;
381 my $mime_type = $is_dir ? 'directory' : ( Plack::MIME->mime_type($file) || 'text/plain' );
382 push @files, [ $url, $basename, $stat[7], $mime_type, HTTP::Date::time2str($stat[9]) ];
386 die "Unsupported format: $path";
390 @page_files = sort_pages @page_files;
391 warn "# page_files = ",dump( @page_files );
394 my $dir = Plack::Util::encode_html( $env->{PATH_INFO} );
397 if ( $req->param('bookreader') ) {
400 my $pages_path = "meta/$dir_url/bookreader.json";
401 if ( -e $pages_path ) {
402 $pages = decode_json read_file $pages_path;
404 foreach my $page ( @page_files ) {
405 my $image = Graphics::Magick->new;
406 if ( $page =~ m/\.pdf$/ ) {
407 die "$path/$page: $!" unless -r "$path/$page";
409 my $info = `pdfinfo "$path/$page"`;
410 warn "# pdfinfo $path/$page\n$info\n";
411 my $pdf_pages = $1 if ( $info =~ m/Pages:\s*(\d+)/s );
412 die "can't find number of pages for $path/$page in:\n$pdf_pages\n" unless $pdf_pages;
414 my $cache_path = "cache/$dir_url/$page";
415 my $txt = "$cache_path.txt";
417 system('pdftotext', "$path/$page", $txt);
418 warn "# pdftotext $txt ", -s $txt, " bytes\n";
420 my $is_bitmap = -s $txt == $pdf_pages;
422 $pdf_pages = $ENV{MAX_PAGES} if defined $ENV{MAX_PAGES} && $pdf_pages > $ENV{MAX_PAGES}; # FIXME
424 warn "DIAG: bitmap:$is_bitmap pdf_pages:$pdf_pages\n";
426 foreach my $nr ( 1 .. $pdf_pages ) {
427 my $page_url = $is_bitmap
428 ? convert_pdf_page( "$path/$page", $nr, "$cache_path.$nr" )
429 : render_pdf_page( "$path/$page", $nr, "$cache_path" )
431 warn "## ping $page_url\n";
432 my ( $w, $h, $size, $format ) = $image->ping($page_url);
433 warn "## image size $w*$h $size $format $page_url\n";
434 my $url = decode('utf-8',"/$page_url");
435 push @$pages, [ $url, $w, $h ] if $w && $h;
439 die "$path/$page: $!" unless -r "$path/$page";
440 my ( $w, $h, $size, $format ) = $image->ping("$path/$page");
441 warn "# image size $w*$h $size $format $path/$page\n";
442 my $url = decode('utf-8',"$dir_url/$page");
443 push @$pages, [ $url, $w, $h ] if $w && $h;
446 make_basedir $pages_path;
447 write_file $pages_path, => encode_json( $pages );
448 warn "# created $pages_path ", -s $pages_path, " bytes\n";
450 warn "# pages = ",dump($pages);
451 $page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, $dir =~ m/\/$/ ? '..' : '.';
455 my $files = join "\n", map {
457 sprintf $dir_file, map Plack::Util::encode_html($_), @$f;
460 $page = sprintf $dir_page, $dir, $dir, $files,
461 @page_files ? '<form><input type=submit name=bookreader value="Read"></form>' . dump( [ @page_files ] ) : '';
465 return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ];
474 Plack::App::BookReader - Internet Archive Book Reader with directory index
479 use Plack::App::BookReader;
480 my $app = Plack::App::BookReader->new({ root => "/path/to/htdocs" })->to_app;
484 This is a static file server PSGI application with directory index a la Apache's mod_autoindex.
492 Document root directory. Defaults to the current directory.
499 Tatsuhiko Miyagawa (based on L<Plack::App::Directory>