| [ Index ] |
WordPress Source Cross Reference |
[Summary view] [Print] [Text view]
1 <?php 2 /* 3 * Project: MagpieRSS: a simple RSS integration tool 4 * File: A compiled file for RSS syndication 5 * Author: Kellan Elliott-McCrea <kellan@protest.net> 6 * Version: 0.51 7 * License: GPL 8 */ 9 10 define('RSS', 'RSS'); 11 define('ATOM', 'Atom'); 12 define('MAGPIE_USER_AGENT', 'WordPress/' . $wp_version); 13 14 class MagpieRSS { 15 var $parser; 16 var $current_item = array(); // item currently being parsed 17 var $items = array(); // collection of parsed items 18 var $channel = array(); // hash of channel fields 19 var $textinput = array(); 20 var $image = array(); 21 var $feed_type; 22 var $feed_version; 23 24 // parser variables 25 var $stack = array(); // parser stack 26 var $inchannel = false; 27 var $initem = false; 28 var $incontent = false; // if in Atom <content mode="xml"> field 29 var $intextinput = false; 30 var $inimage = false; 31 var $current_field = ''; 32 var $current_namespace = false; 33 34 //var $ERROR = ""; 35 36 var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); 37 38 function MagpieRSS ($source) { 39 40 # if PHP xml isn't compiled in, die 41 # 42 if ( !function_exists('xml_parser_create') ) 43 trigger_error( "Failed to load PHP's XML Extension. http://www.php.net/manual/en/ref.xml.php" ); 44 45 $parser = @xml_parser_create(); 46 47 if ( !is_resource($parser) ) 48 trigger_error( "Failed to create an instance of PHP's XML parser. http://www.php.net/manual/en/ref.xml.php"); 49 50 51 $this->parser = $parser; 52 53 # pass in parser, and a reference to this object 54 # setup handlers 55 # 56 xml_set_object( $this->parser, $this ); 57 xml_set_element_handler($this->parser, 58 'feed_start_element', 'feed_end_element' ); 59 60 xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 61 62 $status = xml_parse( $this->parser, $source ); 63 64 if (! $status ) { 65 $errorcode = xml_get_error_code( $this->parser ); 66 if ( $errorcode != XML_ERROR_NONE ) { 67 $xml_error = xml_error_string( $errorcode ); 68 $error_line = xml_get_current_line_number($this->parser); 69 $error_col = xml_get_current_column_number($this->parser); 70 $errormsg = "$xml_error at line $error_line, column $error_col"; 71 72 $this->error( $errormsg ); 73 } 74 } 75 76 xml_parser_free( $this->parser ); 77 78 $this->normalize(); 79 } 80 81 function feed_start_element($p, $element, &$attrs) { 82 $el = $element = strtolower($element); 83 $attrs = array_change_key_case($attrs, CASE_LOWER); 84 85 // check for a namespace, and split if found 86 $ns = false; 87 if ( strpos( $element, ':' ) ) { 88 list($ns, $el) = split( ':', $element, 2); 89 } 90 if ( $ns and $ns != 'rdf' ) { 91 $this->current_namespace = $ns; 92 } 93 94 # if feed type isn't set, then this is first element of feed 95 # identify feed from root element 96 # 97 if (!isset($this->feed_type) ) { 98 if ( $el == 'rdf' ) { 99 $this->feed_type = RSS; 100 $this->feed_version = '1.0'; 101 } 102 elseif ( $el == 'rss' ) { 103 $this->feed_type = RSS; 104 $this->feed_version = $attrs['version']; 105 } 106 elseif ( $el == 'feed' ) { 107 $this->feed_type = ATOM; 108 $this->feed_version = $attrs['version']; 109 $this->inchannel = true; 110 } 111 return; 112 } 113 114 if ( $el == 'channel' ) 115 { 116 $this->inchannel = true; 117 } 118 elseif ($el == 'item' or $el == 'entry' ) 119 { 120 $this->initem = true; 121 if ( isset($attrs['rdf:about']) ) { 122 $this->current_item['about'] = $attrs['rdf:about']; 123 } 124 } 125 126 // if we're in the default namespace of an RSS feed, 127 // record textinput or image fields 128 elseif ( 129 $this->feed_type == RSS and 130 $this->current_namespace == '' and 131 $el == 'textinput' ) 132 { 133 $this->intextinput = true; 134 } 135 136 elseif ( 137 $this->feed_type == RSS and 138 $this->current_namespace == '' and 139 $el == 'image' ) 140 { 141 $this->inimage = true; 142 } 143 144 # handle atom content constructs 145 elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 146 { 147 // avoid clashing w/ RSS mod_content 148 if ($el == 'content' ) { 149 $el = 'atom_content'; 150 } 151 152 $this->incontent = $el; 153 154 155 } 156 157 // if inside an Atom content construct (e.g. content or summary) field treat tags as text 158 elseif ($this->feed_type == ATOM and $this->incontent ) 159 { 160 // if tags are inlined, then flatten 161 $attrs_str = join(' ', 162 array_map('map_attrs', 163 array_keys($attrs), 164 array_values($attrs) ) ); 165 166 $this->append_content( "<$element $attrs_str>" ); 167 168 array_unshift( $this->stack, $el ); 169 } 170 171 // Atom support many links per containging element. 172 // Magpie treats link elements of type rel='alternate' 173 // as being equivalent to RSS's simple link element. 174 // 175 elseif ($this->feed_type == ATOM and $el == 'link' ) 176 { 177 if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) 178 { 179 $link_el = 'link'; 180 } 181 else { 182 $link_el = 'link_' . $attrs['rel']; 183 } 184 185 $this->append($link_el, $attrs['href']); 186 } 187 // set stack[0] to current element 188 else { 189 array_unshift($this->stack, $el); 190 } 191 } 192 193 194 195 function feed_cdata ($p, $text) { 196 197 if ($this->feed_type == ATOM and $this->incontent) 198 { 199 $this->append_content( $text ); 200 } 201 else { 202 $current_el = join('_', array_reverse($this->stack)); 203 $this->append($current_el, $text); 204 } 205 } 206 207 function feed_end_element ($p, $el) { 208 $el = strtolower($el); 209 210 if ( $el == 'item' or $el == 'entry' ) 211 { 212 $this->items[] = $this->current_item; 213 $this->current_item = array(); 214 $this->initem = false; 215 } 216 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) 217 { 218 $this->intextinput = false; 219 } 220 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) 221 { 222 $this->inimage = false; 223 } 224 elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 225 { 226 $this->incontent = false; 227 } 228 elseif ($el == 'channel' or $el == 'feed' ) 229 { 230 $this->inchannel = false; 231 } 232 elseif ($this->feed_type == ATOM and $this->incontent ) { 233 // balance tags properly 234 // note: i don't think this is actually neccessary 235 if ( $this->stack[0] == $el ) 236 { 237 $this->append_content("</$el>"); 238 } 239 else { 240 $this->append_content("<$el />"); 241 } 242 243 array_shift( $this->stack ); 244 } 245 else { 246 array_shift( $this->stack ); 247 } 248 249 $this->current_namespace = false; 250 } 251 252 function concat (&$str1, $str2="") { 253 if (!isset($str1) ) { 254 $str1=""; 255 } 256 $str1 .= $str2; 257 } 258 259 function append_content($text) { 260 if ( $this->initem ) { 261 $this->concat( $this->current_item[ $this->incontent ], $text ); 262 } 263 elseif ( $this->inchannel ) { 264 $this->concat( $this->channel[ $this->incontent ], $text ); 265 } 266 } 267 268 // smart append - field and namespace aware 269 function append($el, $text) { 270 if (!$el) { 271 return; 272 } 273 if ( $this->current_namespace ) 274 { 275 if ( $this->initem ) { 276 $this->concat( 277 $this->current_item[ $this->current_namespace ][ $el ], $text); 278 } 279 elseif ($this->inchannel) { 280 $this->concat( 281 $this->channel[ $this->current_namespace][ $el ], $text ); 282 } 283 elseif ($this->intextinput) { 284 $this->concat( 285 $this->textinput[ $this->current_namespace][ $el ], $text ); 286 } 287 elseif ($this->inimage) { 288 $this->concat( 289 $this->image[ $this->current_namespace ][ $el ], $text ); 290 } 291 } 292 else { 293 if ( $this->initem ) { 294 $this->concat( 295 $this->current_item[ $el ], $text); 296 } 297 elseif ($this->intextinput) { 298 $this->concat( 299 $this->textinput[ $el ], $text ); 300 } 301 elseif ($this->inimage) { 302 $this->concat( 303 $this->image[ $el ], $text ); 304 } 305 elseif ($this->inchannel) { 306 $this->concat( 307 $this->channel[ $el ], $text ); 308 } 309 310 } 311 } 312 313 function normalize () { 314 // if atom populate rss fields 315 if ( $this->is_atom() ) { 316 $this->channel['descripton'] = $this->channel['tagline']; 317 for ( $i = 0; $i < count($this->items); $i++) { 318 $item = $this->items[$i]; 319 if ( isset($item['summary']) ) 320 $item['description'] = $item['summary']; 321 if ( isset($item['atom_content'])) 322 $item['content']['encoded'] = $item['atom_content']; 323 324 $this->items[$i] = $item; 325 } 326 } 327 elseif ( $this->is_rss() ) { 328 $this->channel['tagline'] = $this->channel['description']; 329 for ( $i = 0; $i < count($this->items); $i++) { 330 $item = $this->items[$i]; 331 if ( isset($item['description'])) 332 $item['summary'] = $item['description']; 333 if ( isset($item['content']['encoded'] ) ) 334 $item['atom_content'] = $item['content']['encoded']; 335 336 $this->items[$i] = $item; 337 } 338 } 339 } 340 341 function is_rss () { 342 if ( $this->feed_type == RSS ) { 343 return $this->feed_version; 344 } 345 else { 346 return false; 347 } 348 } 349 350 function is_atom() { 351 if ( $this->feed_type == ATOM ) { 352 return $this->feed_version; 353 } 354 else { 355 return false; 356 } 357 } 358 359 function map_attrs($k, $v) { 360 return "$k=\"$v\""; 361 } 362 363 function error( $errormsg, $lvl = E_USER_WARNING ) { 364 // append PHP's error message if track_errors enabled 365 if ( isset($php_errormsg) ) { 366 $errormsg .= " ($php_errormsg)"; 367 } 368 if ( MAGPIE_DEBUG ) { 369 trigger_error( $errormsg, $lvl); 370 } else { 371 error_log( $errormsg, 0); 372 } 373 } 374 375 } 376 require_once( dirname(__FILE__) . '/class-snoopy.php'); 377 378 function fetch_rss ($url) { 379 // initialize constants 380 init(); 381 382 if ( !isset($url) ) { 383 // error("fetch_rss called without a url"); 384 return false; 385 } 386 387 // if cache is disabled 388 if ( !MAGPIE_CACHE_ON ) { 389 // fetch file, and parse it 390 $resp = _fetch_remote_file( $url ); 391 if ( is_success( $resp->status ) ) { 392 return _response_to_rss( $resp ); 393 } 394 else { 395 // error("Failed to fetch $url and cache is off"); 396 return false; 397 } 398 } 399 // else cache is ON 400 else { 401 // Flow 402 // 1. check cache 403 // 2. if there is a hit, make sure its fresh 404 // 3. if cached obj fails freshness check, fetch remote 405 // 4. if remote fails, return stale object, or error 406 407 $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); 408 409 if (MAGPIE_DEBUG and $cache->ERROR) { 410 debug($cache->ERROR, E_USER_WARNING); 411 } 412 413 414 $cache_status = 0; // response of check_cache 415 $request_headers = array(); // HTTP headers to send with fetch 416 $rss = 0; // parsed RSS object 417 $errormsg = 0; // errors, if any 418 419 if (!$cache->ERROR) { 420 // return cache HIT, MISS, or STALE 421 $cache_status = $cache->check_cache( $url ); 422 } 423 424 // if object cached, and cache is fresh, return cached obj 425 if ( $cache_status == 'HIT' ) { 426 $rss = $cache->get( $url ); 427 if ( isset($rss) and $rss ) { 428 $rss->from_cache = 1; 429 if ( MAGPIE_DEBUG > 1) { 430 debug("MagpieRSS: Cache HIT", E_USER_NOTICE); 431 } 432 return $rss; 433 } 434 } 435 436 // else attempt a conditional get 437 438 // setup headers 439 if ( $cache_status == 'STALE' ) { 440 $rss = $cache->get( $url ); 441 if ( $rss->etag and $rss->last_modified ) { 442 $request_headers['If-None-Match'] = $rss->etag; 443 $request_headers['If-Last-Modified'] = $rss->last_modified; 444 } 445 } 446 447 $resp = _fetch_remote_file( $url, $request_headers ); 448 449 if (isset($resp) and $resp) { 450 if ($resp->status == '304' ) { 451 // we have the most current copy 452 if ( MAGPIE_DEBUG > 1) { 453 debug("Got 304 for $url"); 454 } 455 // reset cache on 304 (at minutillo insistent prodding) 456 $cache->set($url, $rss); 457 return $rss; 458 } 459 elseif ( is_success( $resp->status ) ) { 460 $rss = _response_to_rss( $resp ); 461 if ( $rss ) { 462 if (MAGPIE_DEBUG > 1) { 463 debug("Fetch successful"); 464 } 465 // add object to cache 466 $cache->set( $url, $rss ); 467 return $rss; 468 } 469 } 470 else { 471 $errormsg = "Failed to fetch $url. "; 472 if ( $resp->error ) { 473 # compensate for Snoopy's annoying habbit to tacking 474 # on '\n' 475 $http_error = substr($resp->error, 0, -2); 476 $errormsg .= "(HTTP Error: $http_error)"; 477 } 478 else { 479 $errormsg .= "(HTTP Response: " . $resp->response_code .')'; 480 } 481 } 482 } 483 else { 484 $errormsg = "Unable to retrieve RSS file for unknown reasons."; 485 } 486 487 // else fetch failed 488 489 // attempt to return cached object 490 if ($rss) { 491 if ( MAGPIE_DEBUG ) { 492 debug("Returning STALE object for $url"); 493 } 494 return $rss; 495 }