MARC import: part 5 of large file support
authorGalen Charlton <galen.charlton@liblime.com>
Mon, 26 Nov 2007 21:40:57 +0000 (15:40 -0600)
committerJoshua Ferraro <jmf@liblime.com>
Tue, 27 Nov 2007 02:05:34 +0000 (20:05 -0600)
Staging job now gets moved to background so that
it does not get killed if it takes longer
than the Apache timeout.

Added AJAX monitoring of job status.

Signed-off-by: Joshua Ferraro <jmf@liblime.com>
C4/BackgroundJob.pm
koha-tmpl/intranet-tmpl/prog/en/includes/background-job.inc [new file with mode: 0644]
koha-tmpl/intranet-tmpl/prog/en/modules/tools/stage-marc-import.tmpl
tools/background-job-progress.pl [new file with mode: 0755]
tools/stage-marc-import.pl
tools/upload-file-progress.pl

index 9f3acff..22baaae 100644 (file)
@@ -105,19 +105,7 @@ sub _serialize {
 
     my $prefix = "job_" . $self->{'jobID'};
     my $session = get_session($self->{'sessionID'});
-    $session->param("$prefix.name", $self->{'name'});
-    $session->param("$prefix.invoker", $self->{'invoker'});
-    $session->param("$prefix.size", $self->{'size'});
-    $session->param("$prefix.progress", $self->{'size'});
-    $session->param("$prefix.status", $self->{'size'});
-    if (exists $self->{'results'}) {
-        my @keys = ();
-        foreach my $key (keys %{ $self->{'results'} }) {
-            $session->param("$prefix.results.$key", $self->{'results'}->{$key});
-            push @keys, $key;
-        }
-        $session->param("$prefix.results_keys", join("\t", @keys));
-    }
+    $session->param($prefix, $self);
     $session->flush();
 }
 
@@ -135,7 +123,7 @@ Read-only accessor for job ID.
 
 sub id {
     my $self = shift;
-    return $self->{'id'};
+    return $self->{'jobID'};
 }
 
 =head2 name
@@ -264,8 +252,8 @@ the results of the job.
 sub finish {
     my $self = shift;
     my $results_hashref = shift;
-    my $self->{'status'} = 'completed';
-    my $self->{'results'} = $results_hashref;
+    $self->{'status'} = 'completed';
+    $self->{'results'} = $results_hashref;
     $self->_serialize();
 }
 
@@ -309,24 +297,10 @@ sub fetch {
 
     my $session = get_session($sessionID);
     my $prefix = "job_$jobID";
-    unless (defined $session->param("$prefix.name")) {
+    unless (defined $session->param($prefix)) {
         return undef;
     }
-    my $self = {};
-    
-    $self->{'name'} = $session->param("$prefix.name");
-    $self->{'status'} = $session->param("$prefix.status");
-    $self->{'invoker'} = $session->param("$prefix.invoker");
-    $self->{'size'} = $session->param("$prefix.size");
-    $self->{'progress'} = $session->param("$prefix.progress");
-    if (defined(my $keys = $session->param("$prefix.results_keys"))) {
-        my @keys = split /\t/, $keys;
-        $self->{'results'} = {};
-        foreach my $key (@keys) {
-            $self->{'results'}->{$key} = $session->param("$prefix.results.$key");
-        }
-    }
-
+    my $self = $session->param($prefix);
     bless $self, $class;
     return $self;
 }
diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/background-job.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/background-job.inc
new file mode 100644 (file)
index 0000000..fe626ba
--- /dev/null
@@ -0,0 +1,82 @@
+<!-- Background job progress indicator -->
+<script type="text/javascript">
+    //<![CDATA
+    var jobID = '';
+    var savedForm;
+    function updateJobProgress() {
+        $.getJSON("/cgi-bin/koha/tools/background-job-progress.pl?jobID=" + jobID, function(json) {
+            var percentage = Math.floor(100 * json.progress / json.job_size);
+            if (json.job_status == 'completed') {
+                percentage = 100;
+            }
+            $("#jobprogress").text(percentage + '%');
+            if (percentage < 100) {
+                setTimeout("updateJobProgress()",200);
+            } else {
+                completeJob();
+            }
+        });
+    }
+
+    function completeJob() {
+        savedForm.completedJobID.value = jobID;
+        savedForm.submit();
+    }
+
+    // submit a background job with data
+    // supplied from form f and activate
+    // progress indicator
+    function submitBackgroundJob(f) {
+        // check for background field
+        if (f.runinbackground) {
+            // set value of this hidden field for 
+            // use by CGI script
+            savedForm = f;
+            f.mainformsubmit.disabled = true;
+            f.runinbackground.value = 'true';
+
+            // gather up form submission
+            var inputs = [];
+            $(':input', f).each(function() {
+                if (this.type == 'radio') {
+                    if (this.checked) {
+                        inputs.push(this.name + '=' + escape(this.value));
+                    }
+                } else if (this.type == 'button') {
+                    ; // do nothing
+                } else {
+                    inputs.push(this.name + '=' + escape(this.value));
+                }
+                
+            });
+
+            // and submit the request
+            $("#jobstatus").show();
+            setTimeout("updateJobProgress()", 2000);
+            $.ajax({
+                data: inputs.join('&'),
+                url: f.action,
+                dataType: 'json',
+                success: function(json) {
+                    jobID = json.jobID;
+                },
+                error: function(xml, textStatus) {
+                    alert('Failed to submit form: ' + testStatus);
+                }
+
+            });
+
+        } else {
+            // background job support not enabled,
+            // so just do a normal form submission
+            f.submit();
+        }
+        
+        //$("#jobstatus").show();
+        //setTimeout("updateJobProgress()",2000);
+        //updateJobProgress();
+        return false;
+
+    }
+    //]]>
+</script>
index aae7aa3..f284f33 100644 (file)
@@ -2,6 +2,7 @@
 <title>Koha &rsaquo; Tools &rsaquo; Stage MARC Records For Import</title>
 <!-- TMPL_INCLUDE NAME="doc-head-close.inc" -->
 <!-- TMPL_INCLUDE NAME="file-upload.inc" -->
+<!-- TMPL_INCLUDE NAME="background-job.inc" -->
 <script type="text/javascript">
 //<![CDATA[
 
@@ -9,8 +10,9 @@ function CheckForm(f) {
     if (f.uploadedfileid.value == '') {
         alert('Please upload a file first.');
     } else {
-        f.submit();
+        return submitBackgroundJob(f);
     }
+    return false;
 }
 
 //]]!>
@@ -68,8 +70,12 @@ function CheckForm(f) {
         <div id="fileuploadfailed" style="display:none"></div>
         </div>
        </li>
+</ol></fieldset>
     <form method="post" action="<!-- TMPL_VAR name="SCRIPT_NAME" -->" enctype="multipart/form-data">
+<fieldset class="rows">
         <input type="hidden" name="uploadedfileid" id="uploadedfileid" value="" />
+        <input type="hidden" name="runinbackground" id="runinbackground" value="" />
+        <input type="hidden" name="completedJobID" id="completedJobID" value="" />
        <li>
                <label for="comments">Notes about this file: </label>
                <input type="text" id="comments" name="comments" />
@@ -106,7 +112,9 @@ function CheckForm(f) {
       </li>
     </ol>
   </fieldset>
-  <fieldset class="action"><input type="button" id="mainformsubmit" onclick="CheckForm(this.form);" value="Stage for import" /></fieldset>
+  <fieldset class="action"><input type="button" id="mainformsubmit" onclick="return CheckForm(this.form);" value="Stage for import" /></fieldset>
+  <div id="jobstatus" style="display:none">Job progress: <span id="jobprogress">0<span>%</div>
+  <div id="jobfailed" style="display:none"></div>
 </form>
 <!-- /TMPL_IF -->
 
diff --git a/tools/background-job-progress.pl b/tools/background-job-progress.pl
new file mode 100755 (executable)
index 0000000..9219e4e
--- /dev/null
@@ -0,0 +1,56 @@
+#!/usr/bin/perl
+
+# Copyright (C) 2007 LibLime
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA  02111-1307 USA
+
+use strict;
+
+# standard or CPAN modules used
+use IO::File;
+use CGI;
+use CGI::Session;
+use C4::Context;
+use C4::Auth qw/check_cookie_auth/;
+use C4::BackgroundJob;
+use CGI::Cookie; # need to check cookies before
+                 # having CGI parse the POST request
+
+my $input = new CGI;
+my %cookies = fetch CGI::Cookie;
+my ($auth_status, $sessionID) = check_cookie_auth($cookies{'CGISESSID'}->value, { tools => 1 });
+if ($auth_status ne "ok") {
+    my $reply = CGI->new("");
+    print $reply->header(-type => 'text/html');
+    print "{ progress: 0 }";
+    exit 0;
+}
+
+my $jobID = $input->param('jobID');
+my $job = C4::BackgroundJob->fetch($sessionID, $jobID);
+my $reported_progress = 0;
+my $job_size = 100;
+my $job_status = 'running';
+if (defined $job) {
+    $reported_progress = $job->progress();
+    $job_size = $job->size();
+    $job_status = $job->status();
+}
+
+my $reply = CGI->new("");
+print $reply->header(-type => 'text/html');
+# response will be sent back as JSON
+print "{ progress: $reported_progress, job_size: $job_size, job_status: '$job_status' }";
index 10ec37a..aa21254 100755 (executable)
@@ -13,7 +13,7 @@
 #
 # Koha is free software; you can redistribute it and/or modify it under the
 # terms of the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any later
+# Foundation; either version 2 of the License, or (at your op) any later
 # version.
 #
 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
@@ -46,6 +46,8 @@ my $input = new CGI;
 my $dbh = C4::Context->dbh;
 
 my $fileID=$input->param('uploadedfileid');
+my $runinbackground = $input->param('runinbackground');
+my $completedJobID = $input->param('completedJobID');
 my $matcher_id = $input->param('matcher');
 my $parse_items = $input->param('parse_items');
 my $comments = $input->param('comments');
@@ -62,35 +64,81 @@ my ($template, $loggedinuser, $cookie)
 $template->param(SCRIPT_NAME => $ENV{'SCRIPT_NAME'},
                                                uploadmarc => $fileID);
 
-if ($fileID) {
-    my %cookies = parse CGI::Cookie($cookie);
-    my $uploaded_file = C4::UploadedFile->fetch($cookies{'CGISESSID'}->value, $fileID);
+my %cookies = parse CGI::Cookie($cookie);
+my $sessionID = $cookies{'CGISESSID'}->value;
+if ($completedJobID) {
+    my $job = C4::BackgroundJob->fetch($sessionID, $completedJobID);
+    my $results = $job->results();
+    $template->param(map { $_ => $results->{$_} } keys %{ $results });
+} elsif ($fileID) {
+    my $uploaded_file = C4::UploadedFile->fetch($sessionID, $fileID);
     my $fh = $uploaded_file->fh();
        my $marcrecord='';
        while (<$fh>) {
                $marcrecord.=$_;
        }
 
-    my $job_size = scalar($marcrecord =~ /\035/g);
-    # if we're matching, job size is doubled
-    $job_size *= 2 if ($matcher_id ne "");
+    my $filename = $uploaded_file->name();
+    my $job = undef;
+    my $staging_callback = sub { };
+    my $matching_callback = sub { };
+    warn "$matcher_id is the matcher";
+    if ($runinbackground) {
+        my $job_size = () = $marcrecord =~ /\035/g;
+        # if we're matching, job size is doubled
+        $job_size *= 2 if ($matcher_id ne "");
+        $job = C4::BackgroundJob->new($sessionID, $filename, $ENV{'SCRIPT_NAME'}, $job_size);
+        my $jobID = $job->id();
+
+        # fork off
+        if (my $pid = fork) {
+            # parent
+            # return job ID as JSON
+            
+            # prevent parent exiting from
+            # destroying the kid's database handle
+            # FIXME: according to DBI doc, this may not work for Oracle
+            $dbh->{InactiveDestroy}  = 1;
+
+            my $reply = CGI->new("");
+            print $reply->header(-type => 'text/html');
+            print "{ jobID: '$jobID' }";
+            exit 0;
+        } elsif (defined $pid) {
+            # child
+            # close STDOUT to signal to Apache that
+            # we're now running in the background
+            close STDOUT;
+            close STDERR;
+        } else {
+            # fork failed, so exit immediately
+            warn "fork failed while attempting to run $ENV{'SCRIPT_NAME'} as a background job";
+            exit 0;
+        }
+
+        # if we get here, we're a child that has detached
+        # itself from Apache
+        $staging_callback = staging_progress_callback($job);
+        $matching_callback = matching_progress_callback($job);
+
+    }
 
     # FIXME branch code
-    my $filename = $uploaded_file->name();
-    my $job = C4::BackgroundJob->new($cookies{'CGISESSID'}->value, $filename, $ENV{'SCRIPT_NAME'}, $job_size);
     my ($batch_id, $num_valid, $num_items, @import_errors) = BatchStageMarcRecords($syntax, $marcrecord, $filename, 
                                                                                    $comments, '', $parse_items, 0,
-                                                                                   100, staging_progress_callback($job));
+                                                                                   50, staging_progress_callback($job));
     my $num_with_matches = 0;
     my $checked_matches = 0;
     my $matcher_failed = 0;
     my $matcher_code = "";
     if ($matcher_id ne "") {
+        warn "we must match $matcher_id";
         my $matcher = C4::Matcher->fetch($matcher_id);
         if (defined $matcher) {
+            warn "failed to retrieve";
             $checked_matches = 1;
             $matcher_code = $matcher->code();
-            $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, 10, 100, matching_progress_callback($job));
+            $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, 10, 50, matching_progress_callback($job));
             SetImportBatchMatcher($batch_id, $matcher_id);
         } else {
             $matcher_failed = 1;
@@ -108,18 +156,20 @@ if ($fileID) {
         matcher_code => $matcher_code,
         import_batch_id => $batch_id
     };
-    $job->finish($results);
-
-       $template->param(staged => $num_valid,
-                        matched => $num_with_matches,
-                     num_items => $num_items,
-                     import_errors => scalar(@import_errors),
-                     total => $num_valid + scalar(@import_errors),
-                     checked_matches => $checked_matches,
-                     matcher_failed => $matcher_failed,
-                     matcher_code => $matcher_code,
-                     import_batch_id => $batch_id
-                    );
+    if ($runinbackground) {
+        $job->finish($results);
+    } else {
+           $template->param(staged => $num_valid,
+                            matched => $num_with_matches,
+                         num_items => $num_items,
+                         import_errors => scalar(@import_errors),
+                         total => $num_valid + scalar(@import_errors),
+                         checked_matches => $checked_matches,
+                         matcher_failed => $matcher_failed,
+                         matcher_code => $matcher_code,
+                         import_batch_id => $batch_id
+                        );
+    }
 
 } else {
     # initial form
@@ -135,14 +185,15 @@ sub staging_progress_callback {
     my $job = shift;
     return sub {
         my $progress = shift;
-        $job->progress($job->progress() + $progress);
+        $job->progress($progress);
     }
 }
 
 sub matching_progress_callback {
     my $job = shift;
+    my $start_progress = $job->progress();
     return sub {
         my $progress = shift;
-        $job->progress($job->progress() + $progress);
+        $job->progress($start_progress + $progress);
     }
 }
index 3f740f7..a89a62f 100755 (executable)
@@ -29,7 +29,6 @@ use C4::UploadedFile;
 use CGI::Cookie; # need to check cookies before
                  # having CGI parse the POST request
 
-my %cookies = fetch CGI::Cookie;
 my %cookies = fetch CGI::Cookie;
 my ($auth_status, $sessionID) = check_cookie_auth($cookies{'CGISESSID'}->value, { tools => 1 });
 if ($auth_status ne "ok") {