plack/lib/Plack/App/BookReader.pm

   1 package Plack::App::BookReader;
   2 use parent qw(Plack::App::File);
   3 use strict;
   4 use warnings;
   5 use Plack::Util;
   6 use HTTP::Date;
   7 use Plack::MIME;
   8 use DirHandle;
   9 use URI::Escape;
  10 use Plack::Request;
  11 use Data::Dump qw(dump);
  12 use File::Path qw(make_path remove_tree);
  13 use Graphics::Magick;
  14 use File::Slurp;
  15 use JSON;
  16 use Time::Piece ();
  17 use Time::Seconds 'ONE_YEAR';
  18 use autodie;
  19
  20 sub make_basedir {
  21         my $path = shift;
  22         return if -e $path;
  23         $path =~ s{/[^/]+$}{} || die "no dir/file in $path";
  24         File::Path::make_path $path;
  25 }
  26
  27 # Stolen from rack/directory.rb
  28 my $dir_file = "<tr><td class='name'><a href='%s'>%s</a></td><td class='size'>%s</td><td class='type'>%s</td><td class='mtime'>%s</td></tr>";
  29 my $dir_page = <<PAGE;
  30 <html><head>
  31   <title>%s</title>
  32   <meta http-equiv="content-type" content="text/html; charset=utf-8" />
  33   <style type='text/css'>
  34 table { width:100%%; }
  35 .name { text-align:left; }
  36 .size, .mtime { text-align:right; }
  37 .type { width:11em; }
  38 .mtime { width:15em; }
  39   </style>
  40 </head><body>
  41 <h1>%s</h1>
  42 <hr />
  43 <table>
  44   <tr>
  45     <th class='name'>Name</th>
  46     <th class='size'>Size</th>
  47     <th class='type'>Type</th>
  48     <th class='mtime'>Last Modified</th>
  49   </tr>
  50 %s
  51 </table>
  52 <hr />
  53 <code>%s</code>
  54 </body></html>
  55 PAGE
  56
  57 my $reader_page = <<'PAGE';
  58 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  59 <html>
  60 <head>
  61     <title>%s</title>
  62
  63     <link rel="stylesheet" type="text/css" href="/BookReader/BookReader.css"/>
  64     <script type="text/javascript" src="http://www.archive.org/includes/jquery-1.4.2.min.js"></script>
  65     <script type="text/javascript" src="http://www.archive.org/bookreader/jquery-ui-1.8.5.custom.min.js"></script>
  66
  67     <script type="text/javascript" src="http://www.archive.org/bookreader/dragscrollable.js"></script>
  68     <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.colorbox-min.js"></script>
  69     <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.ui.ipad.js"></script>
  70     <script type="text/javascript" src="http://www.archive.org/bookreader/jquery.bt.min.js"></script>
  71
  72     <script type="text/javascript" src="/BookReader/BookReader.js"></script>
  73
  74 <style type="text/css">
  75
  76 /* Hide print and embed functionality */
  77 #BRtoolbar .embed, .print {
  78     display: none;
  79 }
  80
  81 </style>
  82
  83 <script type="text/javascript">
  84 $(document).ready( function() {
  85
  86 //
  87 // This file shows the minimum you need to provide to BookReader to display a book
  88 //
  89 // Copyright(c)2008-2009 Internet Archive. Software license AGPL version 3.
  90
  91 // Create the BookReader object
  92 var br = new BookReader();
  93
  94 var pages = %s;
  95
  96 // Return the width of a given page.  Here we assume all images are 800 pixels wide
  97 br.getPageWidth = function(index) {
  98         if ( ! pages[index] ) return;
  99     return parseInt( pages[index][1] );
 100 }
 101
 102 // Return the height of a given page.  Here we assume all images are 1200 pixels high
 103 br.getPageHeight = function(index) {
 104         if ( ! pages[index] ) return;
 105     return parseInt( pages[index][2] );
 106 }
 107
 108 // We load the images from archive.org -- you can modify this function to retrieve images
 109 // using a different URL structure
 110 br.getPageURI = function(index, reduce, rotate) {
 111         if ( ! pages[index] ) return;
 112     // reduce and rotate are ignored in this simple implementation, but we
 113     // could e.g. look at reduce and load images from a different directory
 114     // or pass the information to an image server
 115         var url = pages[index][0] + '?reduce='+reduce;
 116         console.debug('getPageURI', index, reduce, rotate, url);
 117     return url;
 118 }
 119
 120 // Return which side, left or right, that a given page should be displayed on
 121 br.getPageSide = function(index) {
 122     if (0 == (index & 0x1)) {
 123         return 'R';
 124     } else {
 125         return 'L';
 126     }
 127 }
 128
 129 // This function returns the left and right indices for the user-visible
 130 // spread that contains the given index.  The return values may be
 131 // null if there is no facing page or the index is invalid.
 132 br.getSpreadIndices = function(pindex) {
 133     var spreadIndices = [null, null];
 134     if ('rl' == this.pageProgression) {
 135         // Right to Left
 136         if (this.getPageSide(pindex) == 'R') {
 137             spreadIndices[1] = pindex;
 138             spreadIndices[0] = pindex + 1;
 139         } else {
 140             // Given index was LHS
 141             spreadIndices[0] = pindex;
 142             spreadIndices[1] = pindex - 1;
 143         }
 144     } else {
 145         // Left to right
 146         if (this.getPageSide(pindex) == 'L') {
 147             spreadIndices[0] = pindex;
 148             spreadIndices[1] = pindex + 1;
 149         } else {
 150             // Given index was RHS
 151             spreadIndices[1] = pindex;
 152             spreadIndices[0] = pindex - 1;
 153         }
 154     }
 155
 156     return spreadIndices;
 157 }
 158
 159 // For a given "accessible page index" return the page number in the book.
 160 //
 161 // For example, index 5 might correspond to "Page 1" if there is front matter such
 162 // as a title page and table of contents.
 163 br.getPageNum = function(index) {
 164     return index+1;
 165 }
 166
 167 // Total number of leafs
 168 br.numLeafs = pages.length;
 169
 170 // Book title and the URL used for the book title link
 171 br.bookTitle= '%s';
 172 br.bookUrl  = '%s';
 173
 174 // Override the path used to find UI images
 175 br.imagesBaseURL = '/BookReader/images/';
 176
 177 br.getEmbedCode = function(frameWidth, frameHeight, viewParams) {
 178     return "Embed code not supported in bookreader demo.";
 179 }
 180
 181 // Let's go!
 182 br.init();
 183
 184 // read-aloud and search need backend compenents and are not supported in the demo
 185 $('#BRtoolbar').find('.read').hide();
 186 $('#textSrch').hide();
 187 $('#btnSrch').hide();
 188
 189 } );
 190 </script>
 191
 192 </head>
 193 <body style="background-color: ##939598;">
 194
 195 <div id="BookReader">
 196     Internet Archive BookReader<br/>
 197
 198     <noscript>
 199     <p>
 200         The BookReader requires JavaScript to be enabled. Please check that your browser supports JavaScript and that it is enabled in the browser settings.
 201     </p>
 202     </noscript>
 203 </div>
 204
 205
 206 </body>
 207 </html>
 208 PAGE
 209
 210 sub should_handle {
 211     my($self, $file) = @_;
 212     return -d $file || -f $file;
 213 }
 214
 215 sub return_dir_redirect {
 216     my ($self, $env) = @_;
 217     my $uri = Plack::Request->new($env)->uri;
 218     return [ 301,
 219         [
 220             'Location' => $uri . '/',
 221             'Content-Type' => 'text/plain',
 222             'Content-Length' => 8,
 223         ],
 224         [ 'Redirect' ],
 225     ];
 226 }
 227
 228 sub serve_path {
 229     my($self, $env, $path, $fullpath) = @_;
 230
 231         my $req = Plack::Request->new($env);
 232
 233     my $dir_url = $env->{SCRIPT_NAME} . $env->{PATH_INFO};
 234
 235     if (-f $path) {
 236
 237                 if ( my $reduce = $req->param('reduce') ) {
 238                         $reduce = int($reduce); # BookReader javascript somethimes returns float
 239                         warn "# reduce $reduce $path\n";
 240
 241                         my $cache_path = "cache/$dir_url.reduce.$reduce.jpg";
 242                         if ( $reduce <= 1 && $path =~ m/\.jpe?g$/ ) {
 243                                 $cache_path = $path;
 244                         } elsif ( ! -e $cache_path ) {
 245                                 my $image = Graphics::Magick->new;
 246                                 warn "## Read $path ", -s $path, " bytes\n";
 247                                 $image->Read($path);
 248                                 my ( $w, $h ) = $image->Get('width','height');
 249                                 $image->Resize(
 250                                         width  => $w / $reduce,
 251                                         height => $h / $reduce
 252                                 );
 253                                 make_basedir $cache_path;
 254                                 $image->Write( filename => $cache_path );
 255                                 warn "# created $cache_path ", -s $cache_path, " bytes\n";
 256                         }
 257
 258                 return $self->SUPER::serve_path($env, $cache_path, $fullpath);
 259
 260                 }
 261
 262         return $self->SUPER::serve_path($env, $path, $fullpath);
 263     }
 264
 265     if ($dir_url !~ m{/$}) {
 266         return $self->return_dir_redirect($env);
 267     }
 268
 269     my @files = ();
 270
 271     my $dh = DirHandle->new($path);
 272     my @children;
 273     while (defined(my $ent = $dh->read)) {
 274         next if $ent eq '.';
 275         push @children, $ent;
 276     }
 277
 278         my @page_files;
 279
 280     for my $basename (sort { $a cmp $b } @children) {
 281                 push @page_files, $basename if $basename =~ m/\d+\.(jpg|gif|pdf)$/;
 282         my $file = "$path/$basename";
 283         my $url = $dir_url . $basename;
 284
 285         my $is_dir = -d $file;
 286         my @stat = stat _;
 287
 288
 289         $url = join '/', map {uri_escape($_)} split m{/}, $url;
 290
 291         if ($is_dir) {
 292             $basename .= "/";
 293             $url      .= "/";
 294         }
 295
 296         my $mime_type = $is_dir ? 'directory' : ( Plack::MIME->mime_type($file) || 'text/plain' );
 297         push @files, [ $url, $basename, $stat[7], $mime_type, HTTP::Date::time2str($stat[9]) ];
 298     }
 299
 300         warn "# page_files = ",dump( @page_files );
 301
 302     my $dir  = Plack::Util::encode_html( $env->{PATH_INFO} );
 303         my $page = 'empty';
 304
 305         if ( $req->param('bookreader') ) {
 306
 307                 my $pages; # []
 308                 my $pages_path = "cache/$dir_url/bookreader.json";
 309                 if ( 0 && -e $pages_path ) {
 310                         $pages = decode_json read_file $pages_path;
 311                 } else {
 312                         foreach my $page ( sort { $a <=> $b } @page_files ) {
 313                                 my $image = Graphics::Magick->new;
 314                                 if ( $page =~ m/\.pdf$/ ) {
 315                                         my $cache_dir = "cache/$dir_url/$page/";
 316                                         make_path $cache_dir;
 317                                         warn "# pdfimages $path/$page -> $cache_dir";
 318                                         system 'pdfimages', '-q', '-j', '-p', "$path/$page", $cache_dir;
 319
 320                                         # glob split on spaces!
 321                                         opendir(my $dh, $cache_dir);
 322                                         while (readdir($dh)) {
 323                                                 warn "## readdir = [$_]\n";
 324                                                 my $page = "$cache_dir/$_";
 325                                                 next unless -f $page; # skip . ..
 326
 327                                                 if ( $page !~ m/\.jpg$/ ) {
 328                                                         warn "# convert to jpg";
 329                                                         system 'gm', 'convert', $page, $page . '.jpg';
 330                                                         unlink $page;
 331                                                         $page .= '.jpg';
 332                                                 }
 333
 334                                                 warn "## ping $page\n";
 335                                                 die "$page: $!" unless -r $page;
 336                                                 my ( $w, $h, $size, $format ) = $image->ping($page);
 337                                                 warn "## image size $w*$h $size $format $page\n";
 338                                                 push @$pages, [ "/$page", $w, $h ] if $w && $h;
 339                                         }
 340                                         closedir $dh;
 341
 342                                 } else {
 343                                         die "$path/$page: $!" unless -r "$path/$page";
 344                                         my ( $w, $h, $size, $format ) = $image->ping("$path/$page");
 345                                         warn "# image size $w*$h $size $format $path/$page\n";
 346                                         push @$pages, [ "$dir_url/$page", $w, $h ] if $w && $h;
 347                                 }
 348                         }
 349                         make_basedir $pages_path;
 350                         write_file $pages_path => encode_json( $pages );
 351                         warn "# created $pages_path ", -s $pages_path, " bytes\n";
 352                 }
 353                 warn "# pages = ",dump($pages);
 354                 $page = sprintf $reader_page, $dir, encode_json( $pages ), $dir, '..';
 355
 356         } else {
 357
 358                 my $files = join "\n", map {
 359                         my $f = $_;
 360                         sprintf $dir_file, map Plack::Util::encode_html($_), @$f;
 361                 } @files;
 362
 363                 $page = sprintf $dir_page, $dir, $dir, $files,
 364                         @page_files ? '<form><input type=submit name=bookreader value="Read"></form>' . dump( [ @page_files ] ) : '';
 365
 366         }
 367
 368     return [ 200, ['Content-Type' => 'text/html; charset=utf-8'], [ $page ] ];
 369 }
 370
 371 1;
 372
 373 __END__
 374
 375 =head1 NAME
 376
 377 Plack::App::BookReader - Internet Archive Book Reader with directory index
 378
 379 =head1 SYNOPSIS
 380
 381   # app.psgi
 382   use Plack::App::BookReader;
 383   my $app = Plack::App::BookReader->new({ root => "/path/to/htdocs" })->to_app;
 384
 385 =head1 DESCRIPTION
 386
 387 This is a static file server PSGI application with directory index a la Apache's mod_autoindex.
 388
 389 =head1 CONFIGURATION
 390
 391 =over 4
 392
 393 =item root
 394
 395 Document root directory. Defaults to the current directory.
 396
 397 =back
 398
 399 =head1 AUTHOR
 400
 401 Dobrica Pavlinusic
 402 Tatsuhiko Miyagawa (based on L<Plack::App::Directory>
 403
 404 =head1 SEE ALSO
 405
 406 L<Plack::App::File>
 407
 408 =cut
 409