Author image Przemysław Iskra
and 1 contributors

WWW::CurlOO examples

WWW::CurlOO::examples -- sample modules and test code for WWW::CurlOO

Curl::Transport

Extracted from examples/01-curl-transport.pl

This module shows:

buildtime version check

Required features will be missing if libcurl was too old at WWW::CurlOO compilation.

basic inheritance

Use WWW::Curl::* as base for your modules.

exception handling

Most methods die() with a dualvar exception on error. You can compare them numerically, or display as part of a message.

Motivation

recv() and send() methods use non-blocking transfer, this may be very annoying in simple scripts. This wrapper implements blocking send() wrapper, and two recv() wrappers called read() and readline().

MODULE CODE

 package Curl::Transport;

 use strict;
 use warnings;
 use WWW::CurlOO::Easy qw(/^CURLE_/);
 use base qw(WWW::CurlOO::Easy);

 BEGIN {
     if ( WWW::CurlOO::LIBCURL_VERSION_NUM() < 0x071202 ) {
         my $ver = WWW::CurlOO::LIBCURL_VERSION();
         die "curl $ver does not support send() and recv()";
     }
     # alternatively you can write:
     if ( not WWW::CurlOO::Easy->can( "send" )
             or not WWW::CurlOO::Easy->can( "recv" ) ) {
         die "WWW::CurlOO is missing send() and recv()\n"
     }
 }

 use constant {
     B_URI => 0,
     B_SOCKET => 1,
     B_VEC => 2,
     B_READBUF => 3,
 };


 # new( URL ) -- get new object
 sub new
 {
     my $class = shift;
     my $uri = shift;

     # use an array as our object base
     my $base = [ $uri, undef, undef, '' ];

     my $self = $class->SUPER::new( $base );

     $self->setopt( WWW::CurlOO::Easy::CURLOPT_URL, $uri );
     $self->setopt( WWW::CurlOO::Easy::CURLOPT_CONNECT_ONLY, 1 );

     # will die if fails
     $self->perform();

     $self->[ B_SOCKET ] = $self->getinfo( WWW::CurlOO::Easy::CURLINFO_LASTSOCKET );

     # prepare select vector
     my $vec = '';
     vec( $vec, $self->[ B_SOCKET ], 1 ) = 1;
     $self->[ B_VEC ] = $vec;

     return $self;
 }

 # send( DATA ) -- send some data, wait for socket availability if it cannot
 # be sent all at once
 sub send($$)
 {
     my $self = shift;
     my $data = shift;

     while ( length $data ) {
         # copy, because select overwrites those values
         my $w = $self->[ B_VEC ];

         # wait for write
         select undef, $w, undef, 0;

         # make sure some write bit is set
         next unless vec( $w, $self->[ B_SOCKET ], 1 );

         # actually send the data
         my $sent = $self->SUPER::send( $data );

         # remove from buffer what we sent
         substr $data, 0, $sent, '';
     };
 }

 # read( SIZE ) -- read SIZE bytes, wait for more data if there wasn't enough
 sub read($$)
 {
     my $self = shift;
     my $size = shift;

     return '' unless $size > 0;

     while ( length $self->[ B_READBUF ] < $size ) {
         my $r = $self->[ B_VEC ];

         # wait for data
         select $r, undef, undef, 0;

         # make sure some read bit is set
         redo unless vec( $r, $self->[ B_SOCKET ], 1 );

         eval {
             my $l = $self->SUPER::recv( $self->[ B_READBUF ],
                 $size - length $self->[ B_READBUF ] );
         };
         if ( $@ ) {
             if ( $@ == CURLE_UNSUPPORTED_PROTOCOL ) {
                 my $uri = $self->[ B_URI ];
                 warn "Connection to $uri closed: $@\n";
                 last;
             } elsif ( $@ == CURLE_AGAIN ) {
                 warn "nothing to read, this should not happen";
             } else {
                 die $@;
             }
         }
     }

     return substr $self->[ B_READBUF ], 0, $size, '';
 }

 # readline() -- read until $/
 sub readline($)
 {
     my $self = shift;

     # we allow changing $/, but we don't support $/ = undef.
     local $/;
     $/ = "\n" unless defined $/;

     my $idx;
     until ( ( $idx = index $self->[ B_READBUF ], $/ ) >= 0 ) {
         my $r = $self->[ B_VEC ];

         # wait for data
         select $r, undef, undef, 0;

         # make sure some read bit is set
         next unless vec( $r, $self->[ B_SOCKET ], 1 );

         # read 256 bytes, should be enough in most cases
         eval {
             $self->SUPER::recv( $self->[ B_READBUF ], 256 );
         };
         if ( $@ ) {
             if ( $@ == CURLE_UNSUPPORTED_PROTOCOL ) {
                 my $uri = $self->[ B_URI ];
                 warn "Connection to $uri closed: $@\n";
                 last;
             } elsif ( $@ == CURLE_AGAIN ) {
                 warn "nothing to read, this should not happen";
             } else {
                 die $@;
             }
         }
     }

     return substr $self->[ B_READBUF ], 0, ($idx + length $/), '';
 }

 1;

TEST APPLICATION

Sample application using this module could look like this:

 #!perl
 use strict;
 use warnings;
 use Curl::Transport;

 my $host = shift @ARGV || "example.com";

 my $t = Curl::Transport->new( "http://$host" );
 $t->send( "GET / HTTP/1.0\r\n" );
 $t->send( "User-Agent: Curl::Transport test\r\n" );
 $t->send( "Accept: */*\r\n" );
 $t->send( "Host: $host\r\n" );
 $t->send( "Connection: Close\r\n" );
 $t->send( "\r\n" );

 my $length;
 {
     local $/ = "\r\n";
     local $_;
     do {
         $_ = $t->readline();
         $length = 0 | $1 if /Content-Length:\s*(\d+)/;
         chomp;
         print "HEADER: $_\n";
     } while ( length $_ );
 }

 if ( defined $length ) {
     print "Reading $length bytes of data:\n";
     print $t->read( $length );

     print "\nTrying to read one more byte, should fail:\n";
     print $t->read( 1 );
     print "\n";
 } else {
     print "Don't know how much to read\n";
     while ( $_ = $t->readline() ) {
         print;
     }
 }

 printf "Last error: %s\n", $t->error();

Multi::Simple

Extracted from examples/02-multi-simple.pl

This module shows how to use WWW::CurlOO::Multi interface correctly in its simpliest form. Uses perl builtin select(). A more advanced code would use callbacks and some event library instead.

Motivation

Writing a proper multi wrapper code requires a rather good understainding of libcurl multi interface. This code provides a recipie for those who just need something that "simply works".

MODULE CODE

 package Multi::Simple;

 use strict;
 use warnings;
 use WWW::CurlOO::Multi;
 use base qw(WWW::CurlOO::Multi);

 # make new object, preset the data
 sub new
 {
     my $class = shift;
     my $active = 0;
     return $class->SUPER::new( \$active );
 }

 # add one handle and count it
 sub add_handle($$)
 {
     my $self = shift;
     my $easy = shift;

     $$self++;
     $self->SUPER::add_handle( $easy );
 }

 # perform until some handle finishes, does all the magic needed to make it
 # efficient (check as soon as there is some data) without overusing the cpu.
 sub get_one($)
 {
     my $self = shift;

     if ( my @result = $self->info_read() ) {
         $self->remove_handle( $result[ 1 ] );
         return @result;
     }

     while ( $$self ) {
         my $t = $self->timeout;
         if ( $t != 0 ) {
             $t = 10000 if $t < 0;
             my ( $r, $w, $e ) = $self->fdset;

             select $r, $w, $e, $t / 1000;
         }

         my $ret = $self->perform();
         if ( $$self != $ret ) {
             $$self = $ret;
             if ( my @result = $self->info_read() ) {
                 $self->remove_handle( $result[ 1 ] );
                 return @result;
             }
         }
     };

     return ();
 }

 1;

TEST APPLICATION

Sample application using this module looks like this:

 #!perl
 use strict;
 use warnings;
 use Multi::Simple;
 use WWW::CurlOO::Share qw(:constants);


 sub easy
 {
     my $uri = shift;
     my $share = shift;

     require WWW::CurlOO::Easy;

     my $easy = WWW::CurlOO::Easy->new( { uri => $uri, body => '' } );
     $easy->setopt( WWW::CurlOO::Easy::CURLOPT_VERBOSE(), 1 );
     $easy->setopt( WWW::CurlOO::Easy::CURLOPT_URL(), $uri );
     $easy->setopt( WWW::CurlOO::Easy::CURLOPT_WRITEHEADER(), \$easy->{headers} );
     $easy->setopt( WWW::CurlOO::Easy::CURLOPT_FILE(), \$easy->{body} );
     $easy->setopt( WWW::CurlOO::Easy::CURLOPT_SHARE(), $share );
     return $easy;
 }

 my $multi = Multi::Simple->new();

 my @uri = (
     "http://www.google.com/search?q=perl",
     "http://www.google.com/search?q=curl",
     "http://www.google.com/search?q=perl+curl",
 );

 {
     # share cookies between all handles
     my $share = WWW::CurlOO::Share->new();
     $share->setopt( CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE );
     $multi->add_handle( easy( shift ( @uri ), $share ) );
 }

 my $ret = 0;
 while ( my ( $msg, $easy, $result ) = $multi->get_one() ) {
     print "\nFinished downloading $easy->{uri}: $result:\n";
     printf "Body is %d bytes long\n", length $easy->{body};
     print "=" x 80 . "\n";

     $ret = 1 if $result;

     $multi->add_handle( easy( shift ( @uri ), $easy->share ) ) if @uri;
 }

 exit $ret;

Multi::Event

Extracted from examples/03-multi-event.pl

This module shows how to use WWW::CurlOO::Multi interface with an event library, AnyEvent in this case.

Motivation

This is the most efficient method for using WWW::CurlOO::Multi interface, but it requires a really good understanding of it. This code tries to show the quirks found when using event-based programming.

MODULE CODE

 package Multi::Event;

 use strict;
 use warnings;
 use AnyEvent;
 use WWW::CurlOO::Multi qw(/^CURL_POLL_/ /^CURL_CSELECT_/);
 use base qw(WWW::CurlOO::Multi);

 BEGIN {
     if ( not WWW::CurlOO::Multi->can( 'CURLMOPT_TIMERFUNCTION' ) ) {
         die "WWW::CurlOO::Multi is missing timer callback,\n" .
             "rebuild WWW::CurlOO with libcurl 7.16.0 or newer\n";
     }
 }

 sub new
 {
     my $class = shift;

     # no base object this time
     # we'll use the default hash

     my $multi = $class->SUPER::new();

     $multi->setopt( WWW::CurlOO::Multi::CURLMOPT_SOCKETFUNCTION,
         \&_cb_socket );
     $multi->setopt( WWW::CurlOO::Multi::CURLMOPT_TIMERFUNCTION,
         \&_cb_timer );

     $multi->{active} = -1;

     return $multi;
 }


 # socket callback:
 # will be called by curl any time events on some socket must be updated
 sub _cb_socket
 {
     my ( $multi, $easy, $socket, $poll ) = @_;
     #warn "on_socket( $socket => $poll )\n";

     # Right now $socket belongs to that $easy, but it can be
     # shared with another easy handle if server supports persistent
     # connections.
     # This is why we register socket events inside multi object
     # and not $easy.

     # deregister old io events
     delete $multi->{ "r$socket" };
     delete $multi->{ "w$socket" };

     # AnyEvent does not support registering a socket for both reading and
     # writing. This is rarely used so there is no harm in separating
     # the events.

     # register read event
     if ( $poll == CURL_POLL_IN or $poll == CURL_POLL_INOUT ) {
         $multi->{ "r$socket" } = AE::io $socket, 0, sub {
             $multi->socket_action( $socket, CURL_CSELECT_IN );
         };
     }

     # register write event
     if ( $poll == CURL_POLL_OUT or $poll == CURL_POLL_INOUT ) {
         $multi->{ "w$socket" } = AE::io $socket, 1, sub {
             $multi->socket_action( $socket, CURL_CSELECT_OUT );
         };
     }

     return 1;
 }


 # timer callback:
 # It triggers timeout update. Timeout value tells us how soon socket_action
 # must be called if there were no actions on sockets. This will allow
 # curl to trigger timeout events.
 sub _cb_timer
 {
     my ( $multi, $timeout_ms ) = @_;
     #warn "on_timer( $timeout_ms )\n";

     # deregister old timer
     delete $multi->{timer};

     my $cb = sub {
         $multi->socket_action( WWW::CurlOO::Multi::CURL_SOCKET_TIMEOUT );
     };

     if ( $timeout_ms < 0 ) {
         # Negative timeout means there is no timeout at all. Normally happens
         # if there are no handles anymore.
         #
         # However, curl_multi_timeout(3) says:
         #
         # Note: if libcurl returns a -1 timeout here, it just means that
         # libcurl currently has no stored timeout value. You must not wait
         # too long (more than a few seconds perhaps) before you call
         # curl_multi_perform() again.

         # XXX: this is missing yet
         #if ( $multi->handles ) {
             $multi->{timer} = AE::timer 10, 10, $cb;
         #}
     } else {
         # This will trigger timeouts if there are any.
         $multi->{timer} = AE::timer $timeout_ms / 1000, 0, $cb;
     }

     return 1;
 }

 # add one handle and kickstart download
 sub add_handle($$)
 {
     my $multi = shift;
     my $easy = shift;

     die "easy cannot finish()\n"
         unless $easy->can( 'finish' );

     # Calling socket_action with default arguments will trigger socket callback
     # and register IO events.
     #
     # It _must_ be called _after_ add_handle(); AE will take care of that.
     #
     # We are delaying the call because in some cases socket_action may finish
     # inmediatelly (i.e. there was some error or we used persistent connections
     # and server returned data right away) and it could confuse our
     # application -- it would appear to have finished before it started.
     AE::timer 0, 0, sub {
         $multi->socket_action();
     };

     $multi->SUPER::add_handle( $easy );
 }

 # perform and call any callbacks that have finished
 sub socket_action
 {
     my $multi = shift;

     my $active = $multi->SUPER::socket_action( @_ );
     return if $multi->{active} == $active;

     $multi->{active} = $active;

     while ( my ( $msg, $easy, $result ) = $multi->info_read() ) {
         if ( $msg == WWW::CurlOO::Multi::CURLMSG_DONE ) {
             $multi->remove_handle( $easy );
             $easy->finish( $result );
         } else {
             die "I don't know what to do with message $msg.\n";
         }
     }
 }

 1;

TEST Easy package

Multi::Event requires Easy object to provide finish() method.

 package Easy::Event;
 use strict;
 use warnings;
 use WWW::CurlOO::Easy qw(/^CURLOPT_/);
 use base qw(WWW::CurlOO::Easy);

 sub new
 {
     my $class = shift;
     my $uri = shift;
     my $cb = shift;

     my $easy = $class->SUPER::new( { uri => $uri, body => '', cb => $cb } );
     $easy->setopt( CURLOPT_URL, $uri );
     $easy->setopt( CURLOPT_WRITEHEADER, \$easy->{headers} );
     $easy->setopt( CURLOPT_FILE, \$easy->{body} );

     return $easy;
 }

 sub finish
 {
     my ( $easy, $result ) = @_;

     printf "\nFinished downloading %s: %s: %d bytes\n",
         $easy->{uri}, $result, length $easy->{body};

     $easy->{cb}->( $easy->{body} );
 }

 1;

TEST APPLICATION

 #!perl
 use strict;
 use warnings;
 use Easy::Event;
 use Multi::Event;
 use AnyEvent;

 my $multi = Multi::Event->new();
 my $cv = AE::cv;


 my @uris = (
     "http://www.google.com/search?q=perl",
     "http://www.google.com/search?q=curl",
     "http://www.google.com/search?q=perl+curl",
 );


 my $i = scalar @uris;
 sub done
 {
     my $body = shift;

     # process...

     unless ( --$i ) {
         $cv->send;
     }
 }

 my $timer;
 $timer = AE::timer 0, 0.1, sub {
     my $uri = shift @uris;
     $multi->add_handle( Easy::Event->new( $uri, \&done ) );

     unless ( @uris ) {
         undef $timer;
     }
 };

 $cv->recv;

 exit 0;