3 # http proxy for archives
7 use HTTP::Proxy qw ( :log );
8 use HTTP::Proxy::BodyFilter::save;
9 use HTTP::Proxy::BodyFilter::simple;
10 use HTTP::Proxy::HeaderFilter::simple;
15 imageserver.ebscohost.com
18 use Data::Dump qw(dump);
24 use CGI::Util qw( unescape );
28 # get the command-line parameters
35 my $args = '(' . join( '|', keys %args ) . ')';
36 for ( my $i = 0 ; $i < @ARGV ; $i += 2 ) {
37 if ( $ARGV[$i] =~ /$args/o ) {
38 if ( ref $args{$1} ) {
39 push @{ $args{$1} }, $ARGV[ $i + 1 ];
42 $args{$1} = $ARGV[ $i + 1 ];
44 splice( @ARGV, $i, 2 );
50 # the headers we want to see
52 qw( Content-Type Set-Cookie Set-Cookie2 WWW-Authenticate Location ),
56 ( qw( Cookie Cookie2 Referer Referrer Authorization ), @{ $args{header} } );
58 # NOTE: Body request filters always receive the request body in one pass
59 my $post_filter = HTTP::Proxy::BodyFilter::simple->new(
60 begin => sub { $_[0]->{binary} = 0; },
62 my ( $self, $dataref, $message, $protocol, $buffer ) = @_;
63 print STDOUT "\n", $message->method, " ", $message->uri, "\n";
64 print_headers( $message, @clt_hdr );
66 if ( $self->{binary} || $$dataref =~ /\0/ ) {
68 print STDOUT " (not printing binary data)\n";
72 # this is from CGI.pm, method parse_params()
73 my (@pairs) = split( /[&;]/, $$dataref );
75 my ( $param, $value ) = split( '=', $_, 2 );
76 $param = unescape($param);
77 $value = unescape($value);
78 printf STDOUT " %-20s => %s\n", $param, $value;
83 my $get_filter = HTTP::Proxy::HeaderFilter::simple->new(
85 my ( $self, $headers, $message ) = @_;
86 my $req = $message->request;
87 if ( $req->method ne 'POST' ) {
88 print STDOUT "\n", $req->method, " ", $req->uri, "\n";
89 print_headers( $req, @clt_hdr );
91 print STDOUT $message->status_line, "\n";
92 print_headers( $message, @srv_hdr );
99 if ( $message->header($h) ) {
100 print STDOUT " $h: $_\n" for ( $message->header($h) );
105 # create and start the proxy
106 my $proxy = HTTP::Proxy->new(@ARGV);
108 # if we want to look at SOME sites
109 if (@{$args{peek}}) {
110 for (@{$args{peek}}) {
114 request => $post_filter
118 response => $get_filter,
123 # otherwise, peek at all sites
127 request => $post_filter
129 $proxy->push_filter( response => $get_filter, mime => $args{mime} );
138 # you should probably restrict this to certain hosts as well
140 mime => 'application/pdf',
142 response => HTTP::Proxy::BodyFilter::save->new(
146 # send a HTML message instead
147 response => HTTP::Proxy::BodyFilter::simple->new(
149 my ( $self, $message ) = @_; # for information, saorge
153 # my ( $self, $dataref, $message, $protocol, $buffer ) = @_;
154 # $$dataref = $saved++ ? ""
155 # : sprintf '<p>Saving PDF file. Go <a href="%s">back</a></p>',
156 # $message->request->header('referer');
159 # change the response Content-Type
160 response => HTTP::Proxy::HeaderFilter::simple->new(
162 my ( $self, $headers, $response ) = @_;
163 # $headers->content_type('text/html');
172 my $admin_filter = HTTP::Proxy::HeaderFilter::simple->new( sub {
173 my ( $self, $headers, $message ) = @_;
174 warn "XXX ", $headers->header('x-forwarded-for'), ' ', $message->uri, "\n";
176 my $host = $message->uri->host;
177 return unless $host eq $proxy->host;
179 if ( my $q = $message->uri->query ) {
180 $info->{debug} = not $info->{debug} if $q =~ m{debug};
182 warn "## ", dump( $headers, $message ) if $info->{debug};
184 my $host_port = $proxy->host . ':' . $proxy->port;
186 my $res = HTTP::Response->new( 200 );
188 if ( $message->uri->path =~ m/(proxy.pac|wpad.dat)/ ) {
189 $res->content_type('application/x-ns-proxy-autoconfig');
192 function FindProxyForURL(url, host) {
193 // if (shExpMatch(url, "*.example.com:*/*")) {return "DIRECT";}
195 if (shExpMatch(url, "*.js")) return "DIRECT";
196 if (shExpMatch(url, "*.css")) return "DIRECT";
197 if (shExpMatch(url, "*.gif")) return "DIRECT";
198 if (shExpMatch(url, "*.png")) return "DIRECT";
199 if (shExpMatch(url, "*.ico")) return "DIRECT";
200 if (shExpMatch(url, "*.jpg")) return "DIRECT";
202 // if (isInNet(host, "10.0.0.0", "255.255.248.0")) {
203 // return "PROXY fastproxy.example.com:8080";
206 // we don't want to see this traffic!
207 if (shExpMatch(url, "*.google.*")) return "DIRECT";
209 return "PROXY $host_port; DIRECT";
213 $self->proxy->response( $res );
217 $res->content_type('text/html');
220 <h1>HTTP Proxy Archive</h1>
222 <a href="http://$host_port/proxy.pac">proxy.pac</a>
225 . '<pre>' . dump($info) . '</pre>'
226 . qq|<a href="?debug">debug</a>|
229 $self->proxy->response( $res );
231 $proxy->push_filter( request => $admin_filter );
238 warn "listen on host ", $proxy->host, " port ", $proxy->port, "\n";