| [ Index ] |
WordPress Source Cross Reference |
[Summary view] [Print] [Text view]
1 <?php 2 3 /************************************************* 4 5 Snoopy - the PHP net client 6 Author: Monte Ohrt <monte@ispi.net> 7 Copyright (c): 1999-2000 ispi, all rights reserved 8 Version: 1.01 9 10 * This library is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this library; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 You may contact the author of Snoopy by e-mail at: 25 monte@ispi.net 26 27 Or, write to: 28 Monte Ohrt 29 CTO, ispi 30 237 S. 70th suite 220 31 Lincoln, NE 68510 32 33 The latest version of Snoopy can be obtained from: 34 http://snoopy.sourceforge.net/ 35 36 *************************************************/ 37 38 if ( !in_array('Snoopy', get_declared_classes() ) ) : 39 class Snoopy 40 { 41 /**** Public variables ****/ 42 43 /* user definable vars */ 44 45 var $host = "www.php.net"; // host name we are connecting to 46 var $port = 80; // port we are connecting to 47 var $proxy_host = ""; // proxy host to use 48 var $proxy_port = ""; // proxy port to use 49 var $proxy_user = ""; // proxy user to use 50 var $proxy_pass = ""; // proxy password to use 51 52 var $agent = "Snoopy v1.2.3"; // agent we masquerade as 53 var $referer = ""; // referer info to pass 54 var $cookies = array(); // array of cookies to pass 55 // $cookies["username"]="joe"; 56 var $rawheaders = array(); // array of raw headers to send 57 // $rawheaders["Content-type"]="text/html"; 58 59 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow 60 var $lastredirectaddr = ""; // contains address of last redirected address 61 var $offsiteok = true; // allows redirection off-site 62 var $maxframes = 0; // frame content depth maximum. 0 = disallow 63 var $expandlinks = true; // expand links to fully qualified URLs. 64 // this only applies to fetchlinks() 65 // submitlinks(), and submittext() 66 var $passcookies = true; // pass set cookies back through redirects 67 // NOTE: this currently does not respect 68 // dates, domains or paths. 69 70 var $user = ""; // user for http authentication 71 var $pass = ""; // password for http authentication 72 73 // http accept types 74 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; 75 76 var $results = ""; // where the content is put 77 78 var $error = ""; // error messages sent here 79 var $response_code = ""; // response code returned from server 80 var $headers = array(); // headers returned from server sent here 81 var $maxlength = 500000; // max return data length (body) 82 var $read_timeout = 0; // timeout on read operations, in seconds 83 // supported only since PHP 4 Beta 4 84 // set to 0 to disallow timeouts 85 var $timed_out = false; // if a read operation timed out 86 var $status = 0; // http request status 87 88 var $temp_dir = "/tmp"; // temporary directory that the webserver 89 // has permission to write to. 90 // under Windows, this should be C:\temp 91 92 var $curl_path = "/usr/local/bin/curl"; 93 // Snoopy will use cURL for fetching 94 // SSL content if a full system path to 95 // the cURL binary is supplied here. 96 // set to false if you do not have 97 // cURL installed. See http://curl.haxx.se 98 // for details on installing cURL. 99 // Snoopy does *not* use the cURL 100 // library functions built into php, 101 // as these functions are not stable 102 // as of this Snoopy release. 103 104 /**** Private variables ****/ 105 106 var $_maxlinelen = 4096; // max line length (headers) 107 108 var $_httpmethod = "GET"; // default http request method 109 var $_httpversion = "HTTP/1.0"; // default http request version 110 var $_submit_method = "POST"; // default submit method 111 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type 112 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type 113 var $_redirectaddr = false; // will be set if page fetched is a redirect 114 var $_redirectdepth = 0; // increments on an http redirect 115 var $_frameurls = array(); // frame src urls 116 var $_framedepth = 0; // increments on frame depth 117 118 var $_isproxy = false; // set if using a proxy server 119 var $_fp_timeout = 30; // timeout for socket connection 120 121 /*======================================================================*\ 122 Function: fetch 123 Purpose: fetch the contents of a web page 124 (and possibly other protocols in the 125 future like ftp, nntp, gopher, etc.) 126 Input: $URI the location of the page to fetch 127 Output: $this->results the output text from the fetch 128 \*======================================================================*/ 129 130 function fetch($URI) 131 { 132 133 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); 134 $URI_PARTS = parse_url($URI); 135 if (!empty($URI_PARTS["user"])) 136 $this->user = $URI_PARTS["user"]; 137 if (!empty($URI_PARTS["pass"])) 138 $this->pass = $URI_PARTS["pass"]; 139 if (empty($URI_PARTS["query"])) 140 $URI_PARTS["query"] = ''; 141 if (empty($URI_PARTS["path"])) 142 $URI_PARTS["path"] = ''; 143 144 switch(strtolower($URI_PARTS["scheme"])) 145 { 146 case "http": 147 $this->host = $URI_PARTS["host"]; 148 if(!empty($URI_PARTS["port"])) 149 $this->port = $URI_PARTS["port"]; 150 if($this->_connect($fp)) 151 { 152 if($this->_isproxy) 153 { 154 // using proxy, send entire URI 155 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); 156 } 157 else 158 { 159 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 160 // no proxy, send only the path 161 $this->_httprequest($path, $fp, $URI, $this->_httpmethod); 162 } 163 164 $this->_disconnect($fp); 165 166 if($this->_redirectaddr) 167 { 168 /* url was redirected, check if we've hit the max depth */ 169 if($this->maxredirs > $this->_redirectdepth) 170 { 171 // only follow redirect if it's on this site, or offsiteok is true 172 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 173 { 174 /* follow the redirect */ 175 $this->_redirectdepth++; 176 $this->lastredirectaddr=$this->_redirectaddr; 177 $this->fetch($this->_redirectaddr); 178 } 179 } 180 } 181 182 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 183 { 184 $frameurls = $this->_frameurls; 185 $this->_frameurls = array(); 186 187 while(list(,$frameurl) = each($frameurls)) 188 { 189 if($this->_framedepth < $this->maxframes) 190 { 191 $this->fetch($frameurl); 192 $this->_framedepth++; 193 } 194 else 195 break; 196 } 197 } 198 } 199 else 200 { 201 return false; 202 } 203 return true; 204 break; 205 case "https": 206 if(!$this->curl_path) 207 return false; 208 if(function_exists("is_executable")) 209 if (!is_executable($this->curl_path)) 210 return false; 211 $this->host = $URI_PARTS["host"]; 212 if(!empty($URI_PARTS["port"])) 213 $this->port = $URI_PARTS["port"]; 214 if($this->_isproxy) 215 { 216 // using proxy, send entire URI 217 $this->_httpsrequest($URI,$URI,$this->_httpmethod); 218 } 219 else 220 { 221 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 222 // no proxy, send only the path 223 $this->_httpsrequest($path, $URI, $this->_httpmethod); 224 } 225 226 if($this->_redirectaddr) 227 { 228 /* url was redirected, check if we've hit the max depth */ 229 if($this->maxredirs > $this->_redirectdepth) 230 { 231 // only follow redirect if it's on this site, or offsiteok is true 232 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 233 { 234 /* follow the redirect */ 235 $this->_redirectdepth++; 236 $this->lastredirectaddr=$this->_redirectaddr; 237 $this->fetch($this->_redirectaddr); 238 } 239 } 240 } 241 242 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 243 { 244 $frameurls = $this->_frameurls; 245 $this->_frameurls = array(); 246 247 while(list(,$frameurl) = each($frameurls)) 248 { 249 if($this->_framedepth < $this->maxframes) 250 { 251 $this->fetch($frameurl); 252 $this->_framedepth++; 253 } 254 else 255 break; 256 } 257 } 258 return true; 259 break; 260 default: 261 // not a valid protocol 262 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 263 return false; 264 break; 265 } 266 return true; 267 } 268 269 /*======================================================================*\ 270 Function: submit 271 Purpose: submit an http form 272 Input: $URI the location to post the data 273 $formvars the formvars to use. 274 format: $formvars["var"] = "val"; 275 $formfiles an array of files to submit 276 format: $formfiles["var"] = "/dir/filename.ext"; 277 Output: $this->results the text output from the post 278 \*======================================================================*/ 279 280 function submit($URI, $formvars="", $formfiles="") 281 { 282 unset($postdata); 283 284 $postdata = $this->_prepare_post_body($formvars, $formfiles); 285 286 $URI_PARTS = parse_url($URI); 287 if (!empty($URI_PARTS["user"])) 288 $this->user = $URI_PARTS["user"]; 289 if (!empty($URI_PARTS["pass"])) 290 $this->pass = $URI_PARTS["pass"]; 291 if (empty($URI_PARTS["query"])) 292 $URI_PARTS["query"] = ''; 293 if (empty($URI_PARTS["path"])) 294 $URI_PARTS["path"] = ''; 295 296 switch(strtolower($URI_PARTS["scheme"])) 297 { 298 case "http": 299 $this->host = $URI_PARTS["host"]; 300 if(!empty($URI_PARTS["port"])) 301 $this->port = $URI_PARTS["port"]; 302 if($this->_connect($fp)) 303 { 304 if($this->_isproxy) 305 { 306 // using proxy, send entire URI 307 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata); 308 } 309 else 310 { 311 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 312 // no proxy, send only the path 313 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); 314 } 315 316 $this->_disconnect($fp); 317 318 if($this->_redirectaddr) 319 { 320 /* url was redirected, check if we've hit the max depth */ 321 if($this->maxredirs > $this->_redirectdepth) 322 { 323 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) 324 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); 325 326 // only follow redirect if it's on this site, or offsiteok is true 327 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 328 { 329 /* follow the redirect */ 330 $this->_redirectdepth++; 331 $this->lastredirectaddr=$this->_redirectaddr; 332 if( strpos( $this->_redirectaddr, "?" ) > 0 ) 333 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get 334 else 335 $this->submit($this->_redirectaddr,$formvars, $formfiles); 336 } 337 } 338 } 339 340 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 341 { 342 $frameurls = $this->_frameurls; 343 $this->_frameurls = array(); 344 345 while(list(,$frameurl) = each($frameurls)) 346 { 347 if($this->_framedepth < $this->maxframes) 348 { 349 $this->fetch($frameurl); 350 $this->_framedepth++; 351 } 352 else 353 break; 354 } 355 } 356 357 } 358 else 359 { 360 return false; 361 } 362 return true; 363 break; 364 case "https": 365 if(!$this->curl_path) 366 return false; 367 if(function_exists("is_executable")) 368 if (!is_executable($this->curl_path)) 369 return false; 370 $this->host = $URI_PARTS["host"]; 371 if(!empty($URI_PARTS["port"])) 372 $this->port = $URI_PARTS["port"]; 373 if($this->_isproxy) 374 { 375 // using proxy, send entire URI 376 $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); 377 } 378 else 379 { 380 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 381 // no proxy, send only the path 382 $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); 383 } 384 385 if($this->_redirectaddr) 386 { 387 /* url was redirected, check if we've hit the max depth */ 388 if($this->maxredirs > $this->_redirectdepth) 389 { 390 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) 391 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); 392 393 // only follow redirect if it's on this site, or offsiteok is true 394 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 395 { 396 /* follow the redirect */ 397 $this->_redirectdepth++; 398 $this->lastredirectaddr=$this->_redirectaddr; 399 if( strpos( $this->_redirectaddr, "?" ) > 0 ) 400 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get 401 else 402 $this->submit($this->_redirectaddr,$formvars, $formfiles); 403 } 404 } 405 } 406 407 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 408 { 409 $frameurls = $this->_frameurls; 410 $this->_frameurls = array(); 411 412 while(list(,$frameurl) = each($frameurls)) 413 { 414 if($this->_framedepth < $this->maxframes) 415 { 416 $this->fetch($frameurl); 417 $this->_framedepth++; 418 } 419 else 420 break; 421 } 422 } 423 return true; 424 break; 425 426 default: 427 // not a valid protocol 428 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 429 return false; 430 break; 431 } 432 return true; 433 } 434 435 /*======================================================================*\ 436 Function: fetchlinks 437 Purpose: fetch the links from a web page 438 Input: $URI where you are fetching from 439 Output: $this->results an array of the URLs 440 \*======================================================================*/ 441 442 function fetchlinks($URI) 443 { 444 if ($this->fetch($URI)) 445 { 446 if($this->lastredirectaddr) 447 $URI = $this->lastredirectaddr; 448 if(is_array($this->results)) 449 { 450 for($x=0;$x<count($this->results);$x++) 451 $this->results[$x] = $this->_striplinks($this->results[$x]); 452 } 453 else 454 $this->results = $this->_striplinks($this->results); 455 456 if($this->expandlinks) 457 $this->results = $this->_expandlinks($this->results, $URI); 458 return true; 459 } 460 else 461 return false; 462 } 463 464 /*======================================================================*\ 465 Function: fetchform 466 Purpose: fetch the form elements from a web page 467 Input: $URI where you are fetching from 468 Output: $this->results the resulting html form 469 \*======================================================================*/ 470 471 function fetchform($URI) 472 { 473 474 if ($this->fetch($URI)) 475 { 476 477 if(is_array($this->results)) 478 { 479 for($x=0;$x<count($this->results);$x++) 480 $this->results[$x] = $this->_stripform($this->results[$x]); 481 } 482 else 483 $this->results = $this->