--- /dev/null
+package Net::FTP::RetrHandle;
+our $VERSION = '0.2';
+
+use warnings;
+use strict;
+
+use constant DEFAULT_MAX_SKIPSIZE => 1024 * 1024 * 2;
+use constant DEFAULT_BLOCKSIZE => 10240; # Net::FTP's default
+
+use base 'IO::Seekable';
+# We don't use base 'IO::Handle'; it currently confuses Archive::Zip.
+
+use Carp;
+use Scalar::Util;
+
+
+=head1 NAME
+
+Net::FTP::RetrHandle - Tied or IO::Handle-compatible interface to a file retrieved by FTP
+
+=head1 SYNOPSIS
+
+Provides a file reading interface for reading all or parts of files
+located on a remote FTP server, including emulation of C<seek> and
+support for downloading only the parts of the file requested.
+
+=head1 DESCRIPTION
+
+Support for skipping the beginning of the file is implemented with the
+FTP C<REST> command, which starts a retrieval at any point in the
+file. Support for skipping the end of the file is implemented with
+the FTP C<ABOR> command, which stops the transfer. With these two
+commands and some careful tracking of the current file position, we're
+able to reliably emulate a C<seek/read> pair, and get only the parts
+of the file that are actually read.
+
+This was originally designed for use with
+L<Archive::Zip|Archive::Zip>; it's reliable enough that the table of
+contents and individual files can be extracted from a remote ZIP
+archive without downloading the whole thing. See L<EXAMPLES> below.
+
+An interface compatible with L<IO::Handle|IO::Handle> is provided,
+along with a C<tie>-based interface.
+
+Remember that an FTP server can only do one thing at a time, so make
+sure to C<close> your connection before asking the FTP server to do
+nything else.
+
+=head1 CONSTRUCTOR
+
+=head2 new ( $ftp, $filename, options... )
+
+Creates a new L<IO::Handle|IO::Handle>-compatible object to fetch all
+or parts of C<$filename> using the FTP connection C<$ftp>.
+
+Available options:
+
+=over 4
+
+=item MaxSkipSize => $size
+
+If we need to move forward in a file or close the connection,
+sometimes it's faster to just read the bytes we don't need than to
+abort the connection and restart. This setting tells how many
+unnecessary bytes we're willing to read rather than abort. An
+appropriate setting depends on the speed of transferring files and the
+speed of reconnecting to the server.
+
+=item BlockSize => $size
+
+When doing buffered reads, how many bytes to read at once. The
+default is the same as the default for L<Net::FTP|Net::FTP>, so it's
+generally best to leave it alone.
+
+=item AlreadyBinary => $bool
+
+If set to a true value, we assume the server is already in binary
+mode, and don't try to set it.
+
+=back
+
+=cut
+use constant USAGE => "Usage: Net::FTP::RetrHandle\->new(ftp => \$ftp_obj, filename => \$filename)\n";
+sub new
+{
+ my $class = shift;
+ my $ftp = shift
+ or croak USAGE;
+ my $filename = shift
+ or croak USAGE;
+ my $self = { MaxSkipSize => DEFAULT_MAX_SKIPSIZE,
+ BlockSize => DEFAULT_BLOCKSIZE,
+ @_,
+ ftp => $ftp, filename => $filename,
+ pos => 0, nextpos => 0};
+ $self->{size} = $self->{ftp}->size($self->{filename})
+ or return undef;
+ $self->{ftp}->binary()
+ unless ($self->{AlreadyBinary});
+
+ bless $self,$class;
+}
+
+=head1 METHODS
+
+Most of the methods implemented behave exactly like those from
+L<IO::Handle|IO::Handle>.
+
+These methods are implemented: C<binmode>, C<clearerr>, C<close>, C<eof>,
+C<error>, C<getc>, C<getline>, C<getlines>, C<getpos>, C<read>,
+C<seek>, C<setpos>, C<sysseek>, C<tell>, C<ungetc>, C<opened>.
+
+=cut ;
+
+sub opened { 1; }
+
+sub seek
+{
+ my $self = shift;
+ my $pos = shift || 0;
+ my $whence = shift || 0;
+ warn " SEEK: self=$self, pos=$pos, whence=$whence\n"
+ if ($ENV{DEBUG});
+ my $curpos = $self->tell();
+ my $newpos = _newpos($self->tell(),$self->{size},$pos,$whence);
+ my $ret;
+ if ($newpos == $curpos)
+ {
+ return $curpos;
+ }
+ elsif (defined($self->{_buf}) and ($newpos > $curpos) and ($newpos < ($curpos + length($self->{_buf}))))
+ {
+ # Just seeking within the buffer (or not at all)
+ substr($self->{_buf},0,$newpos - $curpos,'');
+ $ret = $newpos;
+ }
+ else
+ {
+ $ret = $self->sysseek($newpos,0);
+ $self->{_buf} = '';
+ }
+ return $ret;
+}
+
+sub _newpos
+{
+
+ my($curpos,$size,$pos,$whence)=@_;
+ if ($whence == 0) # seek_set
+ {
+ return $pos;
+ }
+ elsif ($whence == 1) # seek_cur
+ {
+ return $curpos + $pos;
+ }
+ elsif ($whence == 2) # seek_end
+ {
+ return $size + $pos;
+ }
+ else
+ {
+ die "Invalid value $whence for whence!";
+ }
+}
+
+sub sysseek
+{
+ my $self = shift;
+ my $pos = shift || 0;
+ my $whence = shift || 0;
+ warn "SYSSEEK: self=$self, pos=$pos, whence=$whence\n"
+ if ($ENV{DEBUG});
+ my $newpos = _newpos($self->{nextpos},$self->{size},$pos,$whence);
+
+ $self->{eof}=undef;
+ return $self->{nextpos}=$newpos;
+}
+
+sub tell
+{
+ my $self = shift;
+ return $self->{nextpos} - (defined($self->{_buf}) ? length($self->{_buf}) : 0);
+}
+
+# WARNING: ASCII mode probably breaks seek.
+sub binmode
+{
+ my $self = shift;
+ my $mode = shift || ':raw';
+ return if (defined($self->{curmode}) && ($self->{curmode} eq $mode));
+ if (defined($mode) and $mode eq ':crlf')
+ {
+ $self->_finish_connection();
+ $self->{ftp}->ascii()
+ or return $self->seterr();
+ }
+ else
+ {
+ $self->_finish_connection();
+ $self->{ftp}->binary()
+ or return $self->seterr();
+ }
+ $self->{curmode} = $mode;
+}
+
+sub _min
+{
+ return $_[0] < $_[1] ? $_[0] : $_[1];
+}
+
+sub _max
+{
+ return $_[0] > $_[1] ? $_[0] : $_[1];
+}
+
+sub read
+{
+ my $self = shift;
+# return $self->sysread(@_);
+
+ my(undef,$len,$offset)=@_;
+ $offset ||= 0;
+ warn "READ(buf,$len,$offset)\n"
+ if ($ENV{DEBUG});
+
+ if (!defined($self->{_buf}) || length($self->{_buf}) <= 0)
+ {
+ $self->sysread($self->{_buf},_max($len,$self->{BlockSize}))
+ or return 0;
+ }
+ elsif (length($self->{_buf}) < $len)
+ {
+ $self->sysread($self->{_buf},_max($len-length($self->{_buf}),$self->{BlockSize}),length($self->{_buf}));
+ }
+ my $ret = _min($len,length($self->{_buf}));
+ if (!defined($_[0])) { $_[0] = '' }
+ substr($_[0],$offset) = substr($self->{_buf},0,$len,'');
+ $self->{read_count}++;
+
+ return $ret;
+}
+
+sub sysread
+{
+ my $self = shift;
+ if ($self->{eof})
+ {
+ return 0;
+ }
+
+ my(undef,$len,$offset) = @_;
+ $offset ||= 0;
+
+ warn "SYSREAD(buf,$len,$offset)\n"
+ if ($ENV{DEBUG});
+ if ($self->{nextpos} >= $self->{size})
+ {
+ $self->{eof} = 1;
+ $self->{pos} = $self->{nextpos};
+ return 0;
+ }
+
+ if ($self->{pos} != $self->{nextpos})
+ {
+ # They seeked.
+ if ($self->{ftp_running})
+ {
+ warn "Seek detected, nextpos=$self->{nextpos}, pos=$self->{pos}, MaxSkipSize=$self->{MaxSkipSize}\n"
+ if ($ENV{DEBUG});
+ if ($self->{nextpos} > $self->{pos} and ($self->{nextpos} - $self->{pos}) < $self->{MaxSkipSize})
+ {
+ my $br = $self->{nextpos}-$self->{pos};
+ warn "Reading $br bytes to skip ahead\n"
+ if ($ENV{DEBUG});
+ my $junkbuff;
+ while ($br > 0)
+ {
+ warn "Trying to read $br more bytes\n"
+ if ($ENV{DEBUG});
+ my $b = $self->{ftp_data}->read($junkbuff,$br);
+ if ($b == 0)
+ {
+ $self->_at_eof();
+ return 0;
+ }
+ elsif (!defined($b) || $b < 0)
+ {
+ return $self->seterr();
+ }
+ else
+ {
+ $br -= $b;
+ }
+ }
+ $self->{pos}=$self->{nextpos};
+ }
+ else
+ {
+ warn "Aborting connection to move to new position\n"
+ if ($ENV{DEBUG});
+ $self->_finish_connection();
+ }
+ }
+ }
+
+ if (!$self->{ftp_running})
+ {
+ $self->{ftp}->restart($self->{nextpos});
+ $self->{ftp_data} = $self->{ftp}->retr($self->{filename})
+ or return $self->seterr();
+ $self->{ftp_running} = 1;
+ $self->{pos}=$self->{nextpos};
+ }
+
+ my $tmpbuf;
+ my $rb = $self->{ftp_data}->read($tmpbuf,$len);
+ if ($rb == 0)
+ {
+ $self->_at_eof();
+ return 0;
+ }
+ elsif (!defined($rb) || $rb < 0)
+ {
+ return $self->seterr();
+ }
+
+ if (!defined($_[0])) { $_[0] = '' }
+ substr($_[0],$offset) = $tmpbuf;
+ $self->{pos} += $rb;
+ $self->{nextpos} += $rb;
+
+ $self->{sysread_count}++;
+ $rb;
+}
+
+sub _at_eof
+{
+ my $self = shift;
+ $self->{eof}=1;
+ $self->_finish_connection();
+# $self->{ftp_data}->_close();
+ $self->{ftp_running} = $self->{ftp_data} = undef;
+}
+
+sub _finish_connection
+{
+ my $self = shift;
+ warn "_finish_connection\n"
+ if ($ENV{DEBUG});
+ return unless ($self->{ftp_running});
+
+ if ($self->{size} - $self->{pos} < $self->{MaxSkipSize})
+ {
+ warn "Skipping " . ($self->{size}-$self->{pos}) . " bytes\n"
+ if ($ENV{DEBUG});
+ my $junkbuff;
+ my $br;
+ while(($br = $self->{ftp_data}->read($junkbuff,8192)))
+ {
+ # Read until EOF or error
+ }
+ defined($br)
+ or $self->seterr();
+ }
+ warn "Shutting down existing FTP DATA session...\n"
+ if ($ENV{DEBUG});
+
+ my $closeret;
+ {
+ eval {
+ $closeret = $self->{ftp_data}->close();
+ };
+ # Work around a timeout bug in Net::FTP
+ if ($@ && $@ =~ /^Timeout /)
+ {
+ warn "Timeout closing connection, retrying...\n"
+ if ($ENV{DEBUG});
+ select(undef,undef,undef,1);
+ redo;
+ }
+ }
+
+ $self->{ftp_running} = $self->{ftp_data} = undef;
+ return $closeret ? 1 : $self->seterr();
+}
+
+sub write
+{
+ die "Only reading currently supported";
+}
+
+sub close
+{
+ my $self = shift;
+ return $self->{ftp_data} ? $self->_finish_connection()
+ : 1;
+}
+
+sub eof
+{
+ my $self = shift;
+ if ($self->{eof})
+ {
+ return 1;
+ }
+
+ my $c = $self->getc;
+ if (!defined($c))
+ {
+ return 1;
+ }
+ $self->ungetc(ord($c));
+ return undef;
+}
+
+sub getc
+{
+ my $self = shift;
+ my $c;
+ my $rb = $self->read($c,1);
+ if ($rb < 1)
+ {
+ return undef;
+ }
+ return $c;
+}
+
+sub ungetc
+{
+ my $self = shift;
+ # Note that $c is the ordinal value of a character, not the
+ # character itself (for some reason)
+ my($c)=@_;
+ $self->{_buf} = chr($c) . $self->{_buf};
+}
+
+sub getline
+{
+ my $self = shift;
+ if (!defined($/))
+ {
+ my $buf;
+ while($self->read($buf,$self->{BlockSize},length($buf)) > 0)
+ {
+ # Keep going
+ }
+ return $buf;
+ }
+ elsif (ref($/) && looks_like_number ${$/} )
+ {
+ my $buf;
+ $self->read($buf,${$/})
+ or return undef;
+ return $buf;
+ }
+
+ my $rs;
+ if ($/ eq '')
+ {
+ $rs = "\n\n";
+ }
+ else
+ {
+ $rs = $/;
+ }
+ my $eol;
+ if (!defined($self->{_buf})) { $self->{_buf} = '' }
+ while (($eol=index($self->{_buf},$rs)) < $[)
+ {
+ if ($self->{eof})
+ {
+ # return what's left
+ if (length($self->{_buf}) == 0)
+ {
+ return undef;
+ }
+ else
+ {
+ return substr($self->{_buf},0,length($self->{_buf}),'');
+ }
+ }
+ else
+ {
+ $self->sysread($self->{_buf},$self->{BlockSize},length($self->{_buf}));
+ }
+ }
+ # OK, we should have a match.
+ my $tmpbuf = substr($self->{_buf},0,$eol+length($rs),'');
+ while ($/ eq '' and substr($self->{_buf},0,1) eq "\n")
+ {
+ substr($self->{_buf},0,1)='';
+ }
+ return $tmpbuf;
+}
+
+sub getlines
+{
+ my $self = shift;
+ my @lines;
+ my $line;
+ while (defined($line = $self->getline()))
+ {
+ push(@lines,$line);
+ }
+ @lines;
+}
+
+sub error
+{
+ return undef;
+}
+
+sub seterr
+{
+ my $self = shift;
+ $self->{_error} = 1;
+ return undef;
+}
+
+sub clearerr
+{
+ my $self = shift;
+ $self->{_error} = undef;
+ return 0;
+}
+
+sub getpos
+{
+ my $self = shift;
+ return $self->tell();
+}
+
+sub setpos
+{
+ my $self = shift;
+ return $self->seek(@_);
+}
+
+sub DESTROY
+{
+ my $self = shift;
+ if (UNIVERSAL::isa($self,'GLOB'))
+ {
+ $self = tied *$self
+ or die "$self not tied?...";
+ }
+ if ($self->{ftp_data})
+ {
+ $self->_finish_connection();
+ }
+ warn "sysread called ".$self->{sysread_count}." times.\n"
+ if ($ENV{DEBUG});
+}
+
+=head1 TIED INTERFACE
+
+Instead of a L<IO::Handle|IO::Handle>-compatible interface, you can
+use a C<tie>-based interface to use the standard Perl I/O operators.
+You can use it like this:
+
+ use Net::FTP::RetrHandle;
+ # Create FTP object in $ftp
+ # Store filename in $filename
+ tie *FH, 'Net::FTP::RetrHandle', $ftp, $filename
+ or die "Error in tie!\n";
+
+=cut
+ ;
+sub TIEHANDLE
+{
+ my $class = shift;
+ my $obj = $class->new(@_);
+ $obj;
+}
+
+sub READ
+{
+ my $self = shift;
+ $self->read(@_);
+}
+
+sub READLINE
+{
+ my $self = shift;
+ return wantarray ? $self->getlines(@_)
+ : $self->getline(@_);
+}
+
+sub GETC
+{
+ my $self = shift;
+ return $self->getc(@_);
+}
+
+sub SEEK
+{
+ my $self = shift;
+ return $self->seek(@_);
+}
+
+sub SYSSEEK
+{
+ my $self = shift;
+ return $self->sysseek(@_);
+}
+
+sub TELL
+{
+ my $self = shift;
+ return $self->tell();
+}
+
+sub CLOSE
+{
+ my $self = shift;
+ return $self->close(@_);
+}
+
+sub EOF
+{
+ my $self = shift;
+ return $self->eof(@_);
+
+}
+sub UNTIE
+{
+ tied($_[0])->close(@_);
+}
+
+=head1 EXAMPLE
+
+Here's an example of listing a Zip file without downloading the whole
+thing:
+
+ #!/usr/bin/perl
+
+ use warnings;
+ use strict;
+
+ use Net::FTP;
+ use Net::FTP::AutoReconnect;
+ use Net::FTP::RetrHandle;
+ use Archive::Zip;
+
+ my $ftp = Net::FTP::AutoReconnect->new("ftp.info-zip.com", Debug => $ENV{DEBUG})
+ or die "connect error\n";
+ $ftp->login('anonymous','example@example.com')
+ or die "login error\n";
+ $ftp->cwd('/pub/infozip/UNIX/LINUX')
+ or die "cwd error\n";
+ my $fh = Net::FTP::RetrHandle->new($ftp,'unz551x-glibc.zip')
+ or die "Couldn't get handle to remote file\n";
+ my $zip = Archive::Zip->new($fh)
+ or die "Couldn't create Zip object\n";
+ foreach my $fn ($zip->memberNames())
+ {
+ print "unz551-glibc.zip: $fn\n";
+ }
+
+
+=head1 AUTHOR
+
+Scott Gifford <sgifford@suspectclass.com>
+
+=head1 BUGS
+
+The distinction between tied filehandles and C<IO::Handle>-compatible
+filehandles should be blurrier. It seems like other file handle
+objects you can freely mix method calls and traditional Perl
+operations, but I can't figure out how to do it.
+
+Many FTP servers don't like frequent connection aborts. If that's the
+case, try L<Net::FTP::AutoReconnect>, which will hide much of that
+from you.
+
+If the filehandle is tied and created with C<gensym>, C<readline>
+doesn't work with older versions of Perl. No idea why.
+
+=head1 SEE ALSO
+
+L<Net::FTP>, L<Net::FTP::AutoReconnect>, L<IO::Handle>.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2006 Scott Gifford. All rights reserved. This program
+is free software; you can redistribute it and/or modify it under the
+same terms as Perl itself.
+
+=cut
+
+1;