plack/lib/Plack/App/BookReader.pm

   1 package Plack::App::BookReader;
   2 use parent qw(Plack::App::File);
   3 use strict;
   4 use warnings;
   5 use Plack::Util;
   6 use HTTP::Date;
   7 use Plack::MIME;
   8 use DirHandle;
   9 use URI::Escape;
  10 use Plack::Request;
  11 use Data::Dump qw(dump);
  12 use File::Path qw(make_path remove_tree);
  13 use Graphics::Magick;
  14 use File::Slurp;
  15 use JSON;
  16 use autodie;
  17 use Time::HiRes qw(time);
  18
  19 sub make_basedir {
  20         my $path = shift;
  21         return if -e $path;
  22         $path =~ s{/[^/]+$}{} || die "no dir/file in $path";
  23         File::Path::make_path $path;
  24 }
  25
  26 # Stolen from rack/directory.rb
  27 my $dir_file = "<tr><td class='name'><a href='%s'>%s</a></td><td class='size'>%s</td><td class='type'>%s</td><td class='mtime'>%s</td></tr>";
  28 my $dir_page = <<PAGE;
  29 <html><head>
  30   <title>%s</title>
  31   <meta http-equiv="content-type" content="text/html; charset=utf-8" />
  32   <style type='text/css'>
  33 table { width:100%%; }
  34 .name { text-align:left; }
  35 .size, .mtime { text-align:right; }
  36 .type { width:11em; }
  37 .mtime { width:15em; }
  38   </style>
  39 </head><body>
  40 <h1>%s</h1>
  41 <hr />
  42 <table>
  43   <tr>
  44     <th class='name'>Name</th>
  45     <th class='size'>Size</th>
  46     <th class='type'>Type</th>
  47     <th class='mtime'>Last Modified</th>
  48   </tr>
  49 %s
  50 </table>
  51 <hr />
  52 <code>%s</code>
  53 </body></html>
  54 PAGE
  55
  56 my $reader_page = <<'PAGE';
  57 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  58 <html>
  59 <head>
  60     <title>%s</title>
  61
  62     <link rel="stylesheet" type="text/css" href="/BookReader/BookReader.css"/>
  63     <script type="text/javascript" src="http://www.archive.org/includes/jquery-1.4.2.min.js"></script>
  64     <script type="text/javascript" src="http://www.archive.org/bookreader/jquery-ui-1.8.5.custom.min.js"></script>
  65
  66     <script type="text/javascript" src="http://www.archive.org/bookreader/dragscrollable.js"></script>
  67     <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.colorbox-min.js"></script>
  68     <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.ui.ipad.js"></script>
  69     <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.bt.min.js"></script>
  70
  71     <script type="text/javascript" src="/BookReader/BookReader.js"></script>
  72
  73 <style type="text/css">
  74
  75 /* Hide print and embed functionality */
  76 #BRtoolbar .embed, .print {
  77     display: none;
  78 }
  79
  80 </style>
  81
  82 <script type="text/javascript">
  83 $(document).ready( function() {
  84
  85 //
  86 // This file shows the minimum you need to provide to BookReader to display a book
  87 //
  88 // Copyright(c)2008-2009 Internet Archive. Software license AGPL version 3.
  89
  90 // Create the BookReader object
  91 var br = new BookReader();
  92
  93 var pages = %s;
  94
  95 // Return the width of a given page.  Here we assume all images are 800 pixels wide
  96 br.getPageWidth = function(index) {
  97         if ( ! pages[index] ) return;
  98     return parseInt( pages[index][1] );
  99 }
 100
 101 // Return the height of a given page.  Here we assume all images are 1200 pixels high
 102 br.getPageHeight = function(index) {
 103         if ( ! pages[index] ) return;
 104     return parseInt( pages[index][2] );
 105 }
 106
 107 // We load the images from archive.org -- you can modify this function to retrieve images
 108 // using a different URL structure
 109 br.getPageURI = function(index, reduce, rotate) {
 110         if ( ! pages[index] ) return;
 111     // reduce and rotate are ignored in this simple implementation, but we
 112     // could e.g. look at reduce and load images from a different directory
 113     // or pass the information to an image server
 114         var url = pages[index][0] + '?reduce='+reduce;
 115         console.debug('getPageURI', index, reduce, rotate, url);
 116     return url;
 117 }
 118
 119 // Return which side, left or right, that a given page should be displayed on
 120 br.getPageSide = function(index) {
 121     if (0 == (index & 0x1)) {
 122         return 'R';
 123     } else {
 124         return 'L';
 125     }
 126 }
 127
 128 // This function returns the left and right indices for the user-visible
 129 // spread that contains the given index.  The return values may be
 130 // null if there is no facing page or the index is invalid.
 131 br.getSpreadIndices = function(pindex) {
 132     var spreadIndices = [null, null];
 133     if ('rl' == this.pageProgression) {
 134         // Right to Left
 135         if (this.getPageSide(pindex) == 'R') {
 136             spreadIndices[1] = pindex;
 137             spreadIndices[0] = pindex + 1;
 138         } else {
 139             // Given index was LHS
 140             spreadIndices[0] = pindex;
 141             spreadIndices[1] = pindex - 1;
 142         }
 143     } else {
 144         // Left to right
 145         if (this.getPageSide(pindex) == 'L') {
 146             spreadIndices[0] = pindex;
 147             spreadIndices[1] = pindex + 1;
 148         } else {
 149             // Given index was RHS
 150             spreadIndices[1] = pindex;
 151             spreadIndices[0] = pindex - 1;
 152         }
 153     }
 154
 155     return spreadIndices;
 156 }
 157
 158 // For a given "accessible page index" return the page number in the book.
 159 //
 160 // For example, index 5 might correspond to "Page 1" if there is front matter such
 161 // as a title page and table of contents.
 162 br.getPageNum = function(index) {
 163     return index+1;
 164 }
 165
 166 // Total number of leafs
 167 br.numLeafs = pages.length;
 168
 169 // Book title and the URL used for the book title link
 170 br.bookTitle= '%s';
 171 br.bookUrl  = '%s';
 172
 173 // Override the path used to find UI images
 174 br.imagesBaseURL = '/BookReader/images/';
 175
 176 br.getEmbedCode = function(frameWidth, frameHeight, viewParams) {
 177     return "Embed code not supported in bookreader demo.";
 178 }
 179
 180 // Let's go!
 181 br.init();
 182
 183 // read-aloud and search need backend compenents and are not supported in the demo
 184 $('#BRtoolbar').find('.read').hide();
 185 $('#textSrch').hide();
 186 $('#btnSrch').hide();
 187
 188 } );
 189 </script>
 190
 191 </head>
 192 <body style="background-color: ##939598;">
 193
 194 <div id="BookReader">
 195     Internet Archive BookReader<br/>
 196
 197     <noscript>
 198     <p>
 199         The BookReader requires JavaScript to be enabled. Please check that your browser supports JavaScript and that it is enabled in the browser settings.
 200     </p>
 201     </noscript>
 202 </div>
 203
 204
 205 </body>
 206 </html>
 207 PAGE
 208
 209 sub should_handle {
 210     my($self, $file) = @_;
 211     return -d $file || -f $file;
 212 }
 213
 214 sub return_dir_redirect {
 215     my ($self, $env) = @_;
 216     my $uri = Plack::Request->new($env)->uri;
 217     return [ 301,
 218         [
 219             'Location' => $uri . '/',
 220             'Content-Type' => 'text/plain',
 221             'Content-Length' => 8,
 222         ],
 223         [ 'Redirect' ],
 224     ];
 225 }
 226
 227 sub convert {
 228         warn "# convert ",dump(@_);
 229         my $t = time();
 230         system 'gm', 'convert', @_;
 231         $t = time() - $t;
 232         warn sprintf("## created %d bytes in %.2f s %s\n", -s $_[-1], $t, $_[-1]);
 233 }
 234
 235 sub serve_path {
 236     my($self, $env, $path, $fullpath) = @_;
 237
 238         my $req = Plack::Request->new($env);
 239
 240     my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO};
 241
 242     if (-f $path) {
 243
 244                 if ( my $reduce = $req->param('reduce') ) {
 245                         $reduce = int($reduce); # BookReader javascript somethimes returns float
 246                         warn "# reduce $reduce $path\n";
 247
 248                         my $cache_path = "cache/$dir_url.reduce.$reduce.jpg";
 249                         if ( $reduce <= 1 && $path =~ m/\.jpe?g$/ ) {
 250                                 $cache_path = $path;
 251                         } elsif ( ! -e $cache_path ) {
 252                                 make_basedir $cache_path;
 253                                 convert( '-scale', ( 100 / $reduce ) .'%', $path => $cache_path );
 254                         }
 255
 256                 return $self->SUPER::serve_path($env, $cache_path, $fullpath);
 257
 258                 }
 259
 260         return $self->SUPER::serve_path($env, $path, $fullpath);
 261     }
 262
 263     if ($dir_url !~ m{/$}) {
 264         return $self->return_dir_redirect($env);
 265     }
 266
 267     my @files = ();
 268
 269     my $dh = DirHandle->new($path);
 270     my @children;
 271     while (defined(my $ent = $dh->read)) {
 272         next if $ent eq '.';
 273         push @children, $ent;
 274     }
 275
 276         my @page_files;
 277
 278     for my $basename (sort { $a cmp $b } @children) {
 279                 push @page_files, $basename if $basename =~ m/\d+\.(jpg|gif|pdf)$/;
 280         my $file = "$path/$basename";
 281         my $url = $dir_url . $basename;
 282
 283         my $is_dir = -d $file;
 284         my @stat = stat _;
 285
 286
 287         $url = join '/', map {uri_escape($_)} split m{/}, $url;
 288
 289         if ($is_dir) {
 290             $basename .= "/";
 291             $url      .= "/";
 292         }
 293
 294         my $mime_type = $is_dir ? 'directory' : ( Plack::MIME->mime_type($file) || 'text/plain' );
 295         push @files, [ $url, $basename, $stat[7], $mime_type, HTTP::Date::time2str($stat[9]) ];
 296     }
 297
 298         warn "# page_files = ",dump( @page_files );
 299
 300     my $dir  = Plack::Util::encode_html( $env->{PATH_INFO} );
 301         my $page = 'empty';
 302
 303         if ( $req->param('bookreader') ) {
 304
 305                 my $pages; # []
 306                 my $pages_path = "cache/$dir_url/bookreader.json";
 307                 if ( -e $pages_path ) {
 308                         $pages = decode_json read_file $pages_path;
 309                 } else {
 310                         foreach my $page ( sort { $a <=> $b } @page_files ) {
 311                                 my $image = Graphics::Magick->new;
 312                                 if ( $page =~ m/\.pdf$/ ) {
 313                                         my $cache_dir = "cache/$dir_url/$page/";
 314                                         make_path $cache_dir;
 315                                         warn "# pdfimages $path/$page -> $cache_dir";
 316                                         system 'pdfimages', '-q', '-j', '-p', "$path/$page", $cache_dir;
 317
 318                                         # glob split on spaces!
 319                                         opendir(my $dh, $cache_dir);
 320                                         while (readdir($dh)) {
 321                                                 warn "## readdir = [$_]\n";
 322                                                 my $page = "$cache_dir/$_";
 323                                                 next unless -f $page; # skip . ..
 324
 325                                                 if ( $page !~ m/\.jpg$/ ) {
 326                                                         convert( $page => $page . '.jpg' );
 327                                                         unlink $page;
 328                                                         $page .= '.jpg';
 329                                                 }
 330
 331                                                 warn "## ping $page\n";
 332                                                 die "$page: $!" unless -r $page;
 333                                                 my ( $w, $h, $size, $format ) = $image->ping($page);
 334                                                 warn "## image size $w*$h $size $format $page\n";
 335                                                 push @$pages, [ "/$page", $w, $h ] if $w && $h;
 336                                         }
 337                                         closedir $dh;
 338
 339                                 } else {
 340                                         die "$path/$page: $!" unless -r "$path/$page";
 341                                         my ( $w, $h, $size, $format ) = $image->ping("$path/$page");
 342                                         warn "# image size $w*$h $size $format $path/$page\n";
 343                                         push @$pages, [ "$dir_url/$page", $w, $h ] if $w && $h;
 344                                 }
 345                         }
 346                         make_basedir $pages_path;
 347                         write_file $pages_path => encode_json( $pages );
 348                         warn "# created $pages_path ", -s $pages_path, " bytes\n";
 349                 }
 350                 warn "# pages = ",dump($pages);
 351                 $page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, '..';
 352
 353         } else {
 354
 355                 my $files = join "\n", map {
 356                         my $f = $_;
 357                         sprintf $dir_file, map Plack::Util::encode_html($_), @$f;
 358                 } @files;
 359
 360                 $page = sprintf $dir_page, $dir, $dir, $files,
 361                         @page_files ? '<form><input type=submit name=bookreader value="Read"></form>' . dump( [ @page_files ] ) : '';
 362
 363         }
 364
 365     return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ];
 366 }
 367
 368 1;
 369
 370 __END__
 371
 372 =head1 NAME
 373
 374 Plack::App::BookReader - Internet Archive Book Reader with directory index
 375
 376 =head1 SYNOPSIS
 377
 378   # app.psgi
 379   use Plack::App::BookReader;
 380   my $app = Plack::App::BookReader->new({ root => "/path/to/htdocs" })->to_app;
 381
 382 =head1 DESCRIPTION
 383
 384 This is a static file server PSGI application with directory index a la Apache's mod_autoindex.
 385
 386 =head1 CONFIGURATION
 387
 388 =over 4
 389
 390 =item root
 391
 392 Document root directory. Defaults to the current directory.
 393
 394 =back
 395
 396 =head1 AUTHOR
 397
 398 Dobrica Pavlinusic
 399 Tatsuhiko Miyagawa (based on L<Plack::App::Directory>
 400
 401 =head1 SEE ALSO
 402
 403 L<Plack::App::File>
 404
 405 =cut
 406