use warnings;
use strict;
use autodie;
+use Digest::MD5 qw(md5_hex);
+use Digest::SHA1 qw(sha1_hex);
+use Data::Dump qw(dump);
+
+my $use_sha1 = $ENV{SHA1} || 1;
my @files = @ARGV;
@files = glob '/tmp/isi.*-*.txt' unless @files;
my $path = '/tmp/isi.full.txt';
+
+warn "# ", $#files + 1, " files to $path sha:$use_sha1\n";
+
open(my $out_fh, '>', $path);
print $out_fh "FN ISI Export Format\nVR 1.0\n";
+my $rec;
+my $nr = 0;
+
+my $md5;
+
+my $report;
+
foreach my $file ( sort {
my $a_r = $1 if $a =~ m{(\d+)-\d+};
my $b_r = $1 if $b =~ m{(\d+)-\d+};
$a_r <=> $b_r;
} @files ) {
- warn $file;
+ print STDERR $file;
+
+ push @{ $report->{files} }, $file;
open(my $fh, '<', $file);
while(<$fh>) {
next if m/^(FN|VR|EF)/;
- print $out_fh $_;
+ if ( ! m/^[\r\n]+$/s ) {
+ $rec .= $_;
+ } else {
+ $nr++;
+ my $digest = $use_sha1 ? sha1_hex $rec : md5_hex $rec;
+ if ( my $times = $md5->{$digest} ) {
+ print STDERR 'd';
+ $report->{file}->{$file}->{duplicates}++;
+ } else {
+ print $out_fh $rec . $_;
+ $report->{file}->{$file}->{records}++;
+ $report->{total_records}++;
+ print STDERR '.';
+ }
+
+ $md5->{$digest}++;
+ $rec = '';
+ }
}
+ warn "\n";
}
print $out_fh "EF\n";
close $out_fh;
-warn "# $path ", -s $path;
+
+warn "# $path ", -s $path, dump $report;
+