1 package Net::FTP::RetrHandle;
7 use constant DEFAULT_MAX_SKIPSIZE => 1024 * 1024 * 2;
8 use constant DEFAULT_BLOCKSIZE => 10240; # Net::FTP's default
10 use base 'IO::Seekable';
11 # We don't use base 'IO::Handle'; it currently confuses Archive::Zip.
19 Net::FTP::RetrHandle - Tied or IO::Handle-compatible interface to a file retrieved by FTP
23 Provides a file reading interface for reading all or parts of files
24 located on a remote FTP server, including emulation of C<seek> and
25 support for downloading only the parts of the file requested.
29 Support for skipping the beginning of the file is implemented with the
30 FTP C<REST> command, which starts a retrieval at any point in the
31 file. Support for skipping the end of the file is implemented with
32 the FTP C<ABOR> command, which stops the transfer. With these two
33 commands and some careful tracking of the current file position, we're
34 able to reliably emulate a C<seek/read> pair, and get only the parts
35 of the file that are actually read.
37 This was originally designed for use with
38 L<Archive::Zip|Archive::Zip>; it's reliable enough that the table of
39 contents and individual files can be extracted from a remote ZIP
40 archive without downloading the whole thing. See L<EXAMPLES> below.
42 An interface compatible with L<IO::Handle|IO::Handle> is provided,
43 along with a C<tie>-based interface.
45 Remember that an FTP server can only do one thing at a time, so make
46 sure to C<close> your connection before asking the FTP server to do
51 =head2 new ( $ftp, $filename, options... )
53 Creates a new L<IO::Handle|IO::Handle>-compatible object to fetch all
54 or parts of C<$filename> using the FTP connection C<$ftp>.
60 =item MaxSkipSize => $size
62 If we need to move forward in a file or close the connection,
63 sometimes it's faster to just read the bytes we don't need than to
64 abort the connection and restart. This setting tells how many
65 unnecessary bytes we're willing to read rather than abort. An
66 appropriate setting depends on the speed of transferring files and the
67 speed of reconnecting to the server.
69 =item BlockSize => $size
71 When doing buffered reads, how many bytes to read at once. The
72 default is the same as the default for L<Net::FTP|Net::FTP>, so it's
73 generally best to leave it alone.
75 =item AlreadyBinary => $bool
77 If set to a true value, we assume the server is already in binary
78 mode, and don't try to set it.
83 use constant USAGE => "Usage: Net::FTP::RetrHandle\->new(ftp => \$ftp_obj, filename => \$filename)\n";
91 my $self = { MaxSkipSize => DEFAULT_MAX_SKIPSIZE,
92 BlockSize => DEFAULT_BLOCKSIZE,
94 ftp => $ftp, filename => $filename,
95 pos => 0, nextpos => 0};
96 $self->{size} = $self->{ftp}->size($self->{filename})
98 $self->{ftp}->binary()
99 unless ($self->{AlreadyBinary});
106 Most of the methods implemented behave exactly like those from
107 L<IO::Handle|IO::Handle>.
109 These methods are implemented: C<binmode>, C<clearerr>, C<close>, C<eof>,
110 C<error>, C<getc>, C<getline>, C<getlines>, C<getpos>, C<read>,
111 C<seek>, C<setpos>, C<sysseek>, C<tell>, C<ungetc>, C<opened>.
120 my $pos = shift || 0;
121 my $whence = shift || 0;
122 warn " SEEK: self=$self, pos=$pos, whence=$whence\n"
124 my $curpos = $self->tell();
125 my $newpos = _newpos($self->tell(),$self->{size},$pos,$whence);
127 if ($newpos == $curpos)
131 elsif (defined($self->{_buf}) and ($newpos > $curpos) and ($newpos < ($curpos + length($self->{_buf}))))
133 # Just seeking within the buffer (or not at all)
134 substr($self->{_buf},0,$newpos - $curpos,'');
139 $ret = $self->sysseek($newpos,0);
148 my($curpos,$size,$pos,$whence)=@_;
149 if ($whence == 0) # seek_set
153 elsif ($whence == 1) # seek_cur
155 return $curpos + $pos;
157 elsif ($whence == 2) # seek_end
163 die "Invalid value $whence for whence!";
170 my $pos = shift || 0;
171 my $whence = shift || 0;
172 warn "SYSSEEK: self=$self, pos=$pos, whence=$whence\n"
174 my $newpos = _newpos($self->{nextpos},$self->{size},$pos,$whence);
177 return $self->{nextpos}=$newpos;
183 return $self->{nextpos} - (defined($self->{_buf}) ? length($self->{_buf}) : 0);
186 # WARNING: ASCII mode probably breaks seek.
190 my $mode = shift || ':raw';
191 return if (defined($self->{curmode}) && ($self->{curmode} eq $mode));
192 if (defined($mode) and $mode eq ':crlf')
194 $self->_finish_connection();
195 $self->{ftp}->ascii()
196 or return $self->seterr();
200 $self->_finish_connection();
201 $self->{ftp}->binary()
202 or return $self->seterr();
204 $self->{curmode} = $mode;
209 return $_[0] < $_[1] ? $_[0] : $_[1];
214 return $_[0] > $_[1] ? $_[0] : $_[1];
220 # return $self->sysread(@_);
222 my(undef,$len,$offset)=@_;
224 warn "READ(buf,$len,$offset)\n"
227 if (!defined($self->{_buf}) || length($self->{_buf}) <= 0)
229 $self->sysread($self->{_buf},_max($len,$self->{BlockSize}))
232 elsif (length($self->{_buf}) < $len)
234 $self->sysread($self->{_buf},_max($len-length($self->{_buf}),$self->{BlockSize}),length($self->{_buf}));
236 my $ret = _min($len,length($self->{_buf}));
237 if (!defined($_[0])) { $_[0] = '' }
238 substr($_[0],$offset) = substr($self->{_buf},0,$len,'');
239 $self->{read_count}++;
252 my(undef,$len,$offset) = @_;
255 warn "SYSREAD(buf,$len,$offset)\n"
257 if ($self->{nextpos} >= $self->{size})
260 $self->{pos} = $self->{nextpos};
264 if ($self->{pos} != $self->{nextpos})
267 if ($self->{ftp_running})
269 warn "Seek detected, nextpos=$self->{nextpos}, pos=$self->{pos}, MaxSkipSize=$self->{MaxSkipSize}\n"
271 if ($self->{nextpos} > $self->{pos} and ($self->{nextpos} - $self->{pos}) < $self->{MaxSkipSize})
273 my $br = $self->{nextpos}-$self->{pos};
274 warn "Reading $br bytes to skip ahead\n"
279 warn "Trying to read $br more bytes\n"
281 my $b = $self->{ftp_data}->read($junkbuff,$br);
287 elsif (!defined($b) || $b < 0)
289 return $self->seterr();
296 $self->{pos}=$self->{nextpos};
300 warn "Aborting connection to move to new position\n"
302 $self->_finish_connection();
307 if (!$self->{ftp_running})
309 $self->{ftp}->restart($self->{nextpos});
310 $self->{ftp_data} = $self->{ftp}->retr($self->{filename})
311 or return $self->seterr();
312 $self->{ftp_running} = 1;
313 $self->{pos}=$self->{nextpos};
317 my $rb = $self->{ftp_data}->read($tmpbuf,$len);
323 elsif (!defined($rb) || $rb < 0)
325 return $self->seterr();
328 if (!defined($_[0])) { $_[0] = '' }
329 substr($_[0],$offset) = $tmpbuf;
331 $self->{nextpos} += $rb;
333 $self->{sysread_count}++;
341 $self->_finish_connection();
342 # $self->{ftp_data}->_close();
343 $self->{ftp_running} = $self->{ftp_data} = undef;
346 sub _finish_connection
349 warn "_finish_connection\n"
351 return unless ($self->{ftp_running});
353 if ($self->{size} - $self->{pos} < $self->{MaxSkipSize})
355 warn "Skipping " . ($self->{size}-$self->{pos}) . " bytes\n"
359 while(($br = $self->{ftp_data}->read($junkbuff,8192)))
361 # Read until EOF or error
366 warn "Shutting down existing FTP DATA session...\n"
372 $closeret = $self->{ftp_data}->close();
374 # Work around a timeout bug in Net::FTP
375 if ($@ && $@ =~ /^Timeout /)
377 warn "Timeout closing connection, retrying...\n"
379 select(undef,undef,undef,1);
384 $self->{ftp_running} = $self->{ftp_data} = undef;
385 return $closeret ? 1 : $self->seterr();
390 die "Only reading currently supported";
396 return $self->{ftp_data} ? $self->_finish_connection()
413 $self->ungetc(ord($c));
421 my $rb = $self->read($c,1);
432 # Note that $c is the ordinal value of a character, not the
433 # character itself (for some reason)
435 $self->{_buf} = chr($c) . $self->{_buf};
444 while($self->read($buf,$self->{BlockSize},length($buf)) > 0)
450 elsif (ref($/) && looks_like_number ${$/} )
453 $self->read($buf,${$/})
468 if (!defined($self->{_buf})) { $self->{_buf} = '' }
469 while (($eol=index($self->{_buf},$rs)) < $[)
474 if (length($self->{_buf}) == 0)
480 return substr($self->{_buf},0,length($self->{_buf}),'');
485 $self->sysread($self->{_buf},$self->{BlockSize},length($self->{_buf}));
488 # OK, we should have a match.
489 my $tmpbuf = substr($self->{_buf},0,$eol+length($rs),'');
490 while ($/ eq '' and substr($self->{_buf},0,1) eq "\n")
492 substr($self->{_buf},0,1)='';
502 while (defined($line = $self->getline()))
524 $self->{_error} = undef;
531 return $self->tell();
537 return $self->seek(@_);
543 if (UNIVERSAL::isa($self,'GLOB'))
546 or die "$self not tied?...";
548 if ($self->{ftp_data})
550 $self->_finish_connection();
552 warn "sysread called ".$self->{sysread_count}." times.\n"
556 =head1 TIED INTERFACE
558 Instead of a L<IO::Handle|IO::Handle>-compatible interface, you can
559 use a C<tie>-based interface to use the standard Perl I/O operators.
560 You can use it like this:
562 use Net::FTP::RetrHandle;
563 # Create FTP object in $ftp
564 # Store filename in $filename
565 tie *FH, 'Net::FTP::RetrHandle', $ftp, $filename
566 or die "Error in tie!\n";
573 my $obj = $class->new(@_);
586 return wantarray ? $self->getlines(@_)
587 : $self->getline(@_);
593 return $self->getc(@_);
599 return $self->seek(@_);
605 return $self->sysseek(@_);
611 return $self->tell();
617 return $self->close(@_);
623 return $self->eof(@_);
628 tied($_[0])->close(@_);
633 Here's an example of listing a Zip file without downloading the whole
642 use Net::FTP::AutoReconnect;
643 use Net::FTP::RetrHandle;
646 my $ftp = Net::FTP::AutoReconnect->new("ftp.info-zip.com", Debug => $ENV{DEBUG})
647 or die "connect error\n";
648 $ftp->login('anonymous','example@example.com')
649 or die "login error\n";
650 $ftp->cwd('/pub/infozip/UNIX/LINUX')
651 or die "cwd error\n";
652 my $fh = Net::FTP::RetrHandle->new($ftp,'unz551x-glibc.zip')
653 or die "Couldn't get handle to remote file\n";
654 my $zip = Archive::Zip->new($fh)
655 or die "Couldn't create Zip object\n";
656 foreach my $fn ($zip->memberNames())
658 print "unz551-glibc.zip: $fn\n";
664 Scott Gifford <sgifford@suspectclass.com>
668 The distinction between tied filehandles and C<IO::Handle>-compatible
669 filehandles should be blurrier. It seems like other file handle
670 objects you can freely mix method calls and traditional Perl
671 operations, but I can't figure out how to do it.
673 Many FTP servers don't like frequent connection aborts. If that's the
674 case, try L<Net::FTP::AutoReconnect>, which will hide much of that
677 If the filehandle is tied and created with C<gensym>, C<readline>
678 doesn't work with older versions of Perl. No idea why.
682 L<Net::FTP>, L<Net::FTP::AutoReconnect>, L<IO::Handle>.
686 Copyright (c) 2006 Scott Gifford. All rights reserved. This program
687 is free software; you can redistribute it and/or modify it under the
688 same terms as Perl itself.