[ Index ]

WordPress Source Cross Reference

title

Body

[close]

/wp-includes/ -> class-snoopy.php (source)

   1  <?php
   2  
   3  /*************************************************
   4  
   5  Snoopy - the PHP net client
   6  Author: Monte Ohrt <monte@ispi.net>
   7  Copyright (c): 1999-2000 ispi, all rights reserved
   8  Version: 1.01
   9  
  10   * This library is free software; you can redistribute it and/or
  11   * modify it under the terms of the GNU Lesser General Public
  12   * License as published by the Free Software Foundation; either
  13   * version 2.1 of the License, or (at your option) any later version.
  14   *
  15   * This library is distributed in the hope that it will be useful,
  16   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18   * Lesser General Public License for more details.
  19   *
  20   * You should have received a copy of the GNU Lesser General Public
  21   * License along with this library; if not, write to the Free Software
  22   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23  
  24  You may contact the author of Snoopy by e-mail at:
  25  monte@ispi.net
  26  
  27  Or, write to:
  28  Monte Ohrt
  29  CTO, ispi
  30  237 S. 70th suite 220
  31  Lincoln, NE 68510
  32  
  33  The latest version of Snoopy can be obtained from:
  34  http://snoopy.sourceforge.net/
  35  
  36  *************************************************/
  37  
  38  if ( !in_array('Snoopy', get_declared_classes() ) ) :
  39  class Snoopy
  40  {
  41      /**** Public variables ****/
  42  
  43      /* user definable vars */
  44  
  45      var $host            =    "www.php.net";        // host name we are connecting to
  46      var $port            =    80;                    // port we are connecting to
  47      var $proxy_host        =    "";                    // proxy host to use
  48      var $proxy_port        =    "";                    // proxy port to use
  49      var $proxy_user        =    "";                    // proxy user to use
  50      var $proxy_pass        =    "";                    // proxy password to use
  51  
  52      var $agent            =    "Snoopy v1.2.3";    // agent we masquerade as
  53      var    $referer        =    "";                    // referer info to pass
  54      var $cookies        =    array();            // array of cookies to pass
  55                                                  // $cookies["username"]="joe";
  56      var    $rawheaders        =    array();            // array of raw headers to send
  57                                                  // $rawheaders["Content-type"]="text/html";
  58  
  59      var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow
  60      var $lastredirectaddr    =    "";                // contains address of last redirected address
  61      var    $offsiteok        =    true;                // allows redirection off-site
  62      var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow
  63      var $expandlinks    =    true;                // expand links to fully qualified URLs.
  64                                                  // this only applies to fetchlinks()
  65                                                  // submitlinks(), and submittext()
  66      var $passcookies    =    true;                // pass set cookies back through redirects
  67                                                  // NOTE: this currently does not respect
  68                                                  // dates, domains or paths.
  69  
  70      var    $user            =    "";                    // user for http authentication
  71      var    $pass            =    "";                    // password for http authentication
  72  
  73      // http accept types
  74      var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  75  
  76      var $results        =    "";                    // where the content is put
  77  
  78      var $error            =    "";                    // error messages sent here
  79      var    $response_code    =    "";                    // response code returned from server
  80      var    $headers        =    array();            // headers returned from server sent here
  81      var    $maxlength        =    500000;                // max return data length (body)
  82      var $read_timeout    =    0;                    // timeout on read operations, in seconds
  83                                                  // supported only since PHP 4 Beta 4
  84                                                  // set to 0 to disallow timeouts
  85      var $timed_out        =    false;                // if a read operation timed out
  86      var    $status            =    0;                    // http request status
  87  
  88      var $temp_dir        =    "/tmp";                // temporary directory that the webserver
  89                                                  // has permission to write to.
  90                                                  // under Windows, this should be C:\temp
  91  
  92      var    $curl_path        =    "/usr/local/bin/curl";
  93                                                  // Snoopy will use cURL for fetching
  94                                                  // SSL content if a full system path to
  95                                                  // the cURL binary is supplied here.
  96                                                  // set to false if you do not have
  97                                                  // cURL installed. See http://curl.haxx.se
  98                                                  // for details on installing cURL.
  99                                                  // Snoopy does *not* use the cURL
 100                                                  // library functions built into php,
 101                                                  // as these functions are not stable
 102                                                  // as of this Snoopy release.
 103  
 104      /**** Private variables ****/
 105  
 106      var    $_maxlinelen    =    4096;                // max line length (headers)
 107  
 108      var $_httpmethod    =    "GET";                // default http request method
 109      var $_httpversion    =    "HTTP/1.0";            // default http request version
 110      var $_submit_method    =    "POST";                // default submit method
 111      var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type
 112      var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type
 113      var $_redirectaddr    =    false;                // will be set if page fetched is a redirect
 114      var $_redirectdepth    =    0;                    // increments on an http redirect
 115      var $_frameurls        =     array();            // frame src urls
 116      var $_framedepth    =    0;                    // increments on frame depth
 117  
 118      var $_isproxy        =    false;                // set if using a proxy server
 119      var $_fp_timeout    =    30;                    // timeout for socket connection
 120  
 121  /*======================================================================*\
 122      Function:    fetch
 123      Purpose:    fetch the contents of a web page
 124                  (and possibly other protocols in the
 125                  future like ftp, nntp, gopher, etc.)
 126      Input:        $URI    the location of the page to fetch
 127      Output:        $this->results    the output text from the fetch
 128  \*======================================================================*/
 129  
 130  	function fetch($URI)
 131      {
 132  
 133          //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 134          $URI_PARTS = parse_url($URI);
 135          if (!empty($URI_PARTS["user"]))
 136              $this->user = $URI_PARTS["user"];
 137          if (!empty($URI_PARTS["pass"]))
 138              $this->pass = $URI_PARTS["pass"];
 139          if (empty($URI_PARTS["query"]))
 140              $URI_PARTS["query"] = '';
 141          if (empty($URI_PARTS["path"]))
 142              $URI_PARTS["path"] = '';
 143  
 144          switch(strtolower($URI_PARTS["scheme"]))
 145          {
 146              case "http":
 147                  $this->host = $URI_PARTS["host"];
 148                  if(!empty($URI_PARTS["port"]))
 149                      $this->port = $URI_PARTS["port"];
 150                  if($this->_connect($fp))
 151                  {
 152                      if($this->_isproxy)
 153                      {
 154                          // using proxy, send entire URI
 155                          $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 156                      }
 157                      else
 158                      {
 159                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 160                          // no proxy, send only the path
 161                          $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 162                      }
 163  
 164                      $this->_disconnect($fp);
 165  
 166                      if($this->_redirectaddr)
 167                      {
 168                          /* url was redirected, check if we've hit the max depth */
 169                          if($this->maxredirs > $this->_redirectdepth)
 170                          {
 171                              // only follow redirect if it's on this site, or offsiteok is true
 172                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 173                              {
 174                                  /* follow the redirect */
 175                                  $this->_redirectdepth++;
 176                                  $this->lastredirectaddr=$this->_redirectaddr;
 177                                  $this->fetch($this->_redirectaddr);
 178                              }
 179                          }
 180                      }
 181  
 182                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 183                      {
 184                          $frameurls = $this->_frameurls;
 185                          $this->_frameurls = array();
 186  
 187                          while(list(,$frameurl) = each($frameurls))
 188                          {
 189                              if($this->_framedepth < $this->maxframes)
 190                              {
 191                                  $this->fetch($frameurl);
 192                                  $this->_framedepth++;
 193                              }
 194                              else
 195                                  break;
 196                          }
 197                      }
 198                  }
 199                  else
 200                  {
 201                      return false;
 202                  }
 203                  return true;
 204                  break;
 205              case "https":
 206                  if(!$this->curl_path)
 207                      return false;
 208                  if(function_exists("is_executable"))
 209                      if (!is_executable($this->curl_path))
 210                          return false;
 211                  $this->host = $URI_PARTS["host"];
 212                  if(!empty($URI_PARTS["port"]))
 213                      $this->port = $URI_PARTS["port"];
 214                  if($this->_isproxy)
 215                  {
 216                      // using proxy, send entire URI
 217                      $this->_httpsrequest($URI,$URI,$this->_httpmethod);
 218                  }
 219                  else
 220                  {
 221                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 222                      // no proxy, send only the path
 223                      $this->_httpsrequest($path, $URI, $this->_httpmethod);
 224                  }
 225  
 226                  if($this->_redirectaddr)
 227                  {
 228                      /* url was redirected, check if we've hit the max depth */
 229                      if($this->maxredirs > $this->_redirectdepth)
 230                      {
 231                          // only follow redirect if it's on this site, or offsiteok is true
 232                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 233                          {
 234                              /* follow the redirect */
 235                              $this->_redirectdepth++;
 236                              $this->lastredirectaddr=$this->_redirectaddr;
 237                              $this->fetch($this->_redirectaddr);
 238                          }
 239                      }
 240                  }
 241  
 242                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 243                  {
 244                      $frameurls = $this->_frameurls;
 245                      $this->_frameurls = array();
 246  
 247                      while(list(,$frameurl) = each($frameurls))
 248                      {
 249                          if($this->_framedepth < $this->maxframes)
 250                          {
 251                              $this->fetch($frameurl);
 252                              $this->_framedepth++;
 253                          }
 254                          else
 255                              break;
 256                      }
 257                  }
 258                  return true;
 259                  break;
 260              default:
 261                  // not a valid protocol
 262                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 263                  return false;
 264                  break;
 265          }
 266          return true;
 267      }
 268  
 269  /*======================================================================*\
 270      Function:    submit
 271      Purpose:    submit an http form
 272      Input:        $URI    the location to post the data
 273                  $formvars    the formvars to use.
 274                      format: $formvars["var"] = "val";
 275                  $formfiles  an array of files to submit
 276                      format: $formfiles["var"] = "/dir/filename.ext";
 277      Output:        $this->results    the text output from the post
 278  \*======================================================================*/
 279  
 280  	function submit($URI, $formvars="", $formfiles="")
 281      {
 282          unset($postdata);
 283  
 284          $postdata = $this->_prepare_post_body($formvars, $formfiles);
 285  
 286          $URI_PARTS = parse_url($URI);
 287          if (!empty($URI_PARTS["user"]))
 288              $this->user = $URI_PARTS["user"];
 289          if (!empty($URI_PARTS["pass"]))
 290              $this->pass = $URI_PARTS["pass"];
 291          if (empty($URI_PARTS["query"]))
 292              $URI_PARTS["query"] = '';
 293          if (empty($URI_PARTS["path"]))
 294              $URI_PARTS["path"] = '';
 295  
 296          switch(strtolower($URI_PARTS["scheme"]))
 297          {
 298              case "http":
 299                  $this->host = $URI_PARTS["host"];
 300                  if(!empty($URI_PARTS["port"]))
 301                      $this->port = $URI_PARTS["port"];
 302                  if($this->_connect($fp))
 303                  {
 304                      if($this->_isproxy)
 305                      {
 306                          // using proxy, send entire URI
 307                          $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 308                      }
 309                      else
 310                      {
 311                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 312                          // no proxy, send only the path
 313                          $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 314                      }
 315  
 316                      $this->_disconnect($fp);
 317  
 318                      if($this->_redirectaddr)
 319                      {
 320                          /* url was redirected, check if we've hit the max depth */
 321                          if($this->maxredirs > $this->_redirectdepth)
 322                          {
 323                              if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 324                                  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 325  
 326                              // only follow redirect if it's on this site, or offsiteok is true
 327                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 328                              {
 329                                  /* follow the redirect */
 330                                  $this->_redirectdepth++;
 331                                  $this->lastredirectaddr=$this->_redirectaddr;
 332                                  if( strpos( $this->_redirectaddr, "?" ) > 0 )
 333                                      $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 334                                  else
 335                                      $this->submit($this->_redirectaddr,$formvars, $formfiles);
 336                              }
 337                          }
 338                      }
 339  
 340                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 341                      {
 342                          $frameurls = $this->_frameurls;
 343                          $this->_frameurls = array();
 344  
 345                          while(list(,$frameurl) = each($frameurls))
 346                          {
 347                              if($this->_framedepth < $this->maxframes)
 348                              {
 349                                  $this->fetch($frameurl);
 350                                  $this->_framedepth++;
 351                              }
 352                              else
 353                                  break;
 354                          }
 355                      }
 356  
 357                  }
 358                  else
 359                  {
 360                      return false;
 361                  }
 362                  return true;
 363                  break;
 364              case "https":
 365                  if(!$this->curl_path)
 366                      return false;
 367                  if(function_exists("is_executable"))
 368                      if (!is_executable($this->curl_path))
 369                          return false;
 370                  $this->host = $URI_PARTS["host"];
 371                  if(!empty($URI_PARTS["port"]))
 372                      $this->port = $URI_PARTS["port"];
 373                  if($this->_isproxy)
 374                  {
 375                      // using proxy, send entire URI
 376                      $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 377                  }
 378                  else
 379                  {
 380                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 381                      // no proxy, send only the path
 382                      $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 383                  }
 384  
 385                  if($this->_redirectaddr)
 386                  {
 387                      /* url was redirected, check if we've hit the max depth */
 388                      if($this->maxredirs > $this->_redirectdepth)
 389                      {
 390                          if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 391                              $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 392  
 393                          // only follow redirect if it's on this site, or offsiteok is true
 394                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 395                          {
 396                              /* follow the redirect */
 397                              $this->_redirectdepth++;
 398                              $this->lastredirectaddr=$this->_redirectaddr;
 399                              if( strpos( $this->_redirectaddr, "?" ) > 0 )
 400                                  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 401                              else
 402                                  $this->submit($this->_redirectaddr,$formvars, $formfiles);
 403                          }
 404                      }
 405                  }
 406  
 407                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 408                  {
 409                      $frameurls = $this->_frameurls;
 410                      $this->_frameurls = array();
 411  
 412                      while(list(,$frameurl) = each($frameurls))
 413                      {
 414                          if($this->_framedepth < $this->maxframes)
 415                          {
 416                              $this->fetch($frameurl);
 417                              $this->_framedepth++;
 418                          }
 419                          else
 420                              break;
 421                      }
 422                  }
 423                  return true;
 424                  break;
 425  
 426              default:
 427                  // not a valid protocol
 428                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 429                  return false;
 430                  break;
 431          }
 432          return true;
 433      }
 434  
 435  /*======================================================================*\
 436      Function:    fetchlinks
 437      Purpose:    fetch the links from a web page
 438      Input:        $URI    where you are fetching from
 439      Output:        $this->results    an array of the URLs
 440  \*======================================================================*/
 441  
 442  	function fetchlinks($URI)
 443      {
 444          if ($this->fetch($URI))
 445          {
 446              if($this->lastredirectaddr)
 447                  $URI = $this->lastredirectaddr;
 448              if(is_array($this->results))
 449              {
 450                  for($x=0;$x<count($this->results);$x++)
 451                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 452              }
 453              else
 454                  $this->results = $this->_striplinks($this->results);
 455  
 456              if($this->expandlinks)
 457                  $this->results = $this->_expandlinks($this->results, $URI);
 458              return true;
 459          }
 460          else
 461              return false;
 462      }
 463  
 464  /*======================================================================*\
 465      Function:    fetchform
 466      Purpose:    fetch the form elements from a web page
 467      Input:        $URI    where you are fetching from
 468      Output:        $this->results    the resulting html form
 469  \*======================================================================*/
 470  
 471  	function fetchform($URI)
 472      {
 473  
 474          if ($this->fetch($URI))
 475          {
 476  
 477              if(is_array($this->results))
 478              {
 479                  for($x=0;$x<count($this->results);$x++)
 480                      $this->results[$x] = $this->_stripform($this->results[$x]);
 481              }
 482              else
 483                  $this->results = $this->