[ Index ]

WordPress Source Cross Reference

title

Body

[close]

/wp-includes/ -> rss.php (source)

   1  <?php
   2  /*
   3   * Project:     MagpieRSS: a simple RSS integration tool
   4   * File:        A compiled file for RSS syndication
   5   * Author:      Kellan Elliott-McCrea <kellan@protest.net>
   6   * Version:        0.51
   7   * License:        GPL
   8   */
   9  
  10  define('RSS', 'RSS');
  11  define('ATOM', 'Atom');
  12  define('MAGPIE_USER_AGENT', 'WordPress/' . $wp_version);
  13  
  14  class MagpieRSS { 
  15      var $parser;
  16      var $current_item    = array();    // item currently being parsed
  17      var $items            = array();    // collection of parsed items
  18      var $channel        = array();    // hash of channel fields
  19      var $textinput        = array();
  20      var $image            = array();
  21      var $feed_type;
  22      var $feed_version;
  23  
  24      // parser variables
  25      var $stack                = array(); // parser stack
  26      var $inchannel            = false;
  27      var $initem             = false;
  28      var $incontent            = false; // if in Atom <content mode="xml"> field 
  29      var $intextinput        = false;
  30      var $inimage             = false;
  31      var $current_field        = '';
  32      var $current_namespace    = false;
  33  
  34      //var $ERROR = "";
  35  
  36      var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright');
  37  
  38  	function MagpieRSS ($source) {
  39  
  40          # if PHP xml isn't compiled in, die
  41          #
  42          if ( !function_exists('xml_parser_create') )
  43              trigger_error( "Failed to load PHP's XML Extension. http://www.php.net/manual/en/ref.xml.php" );
  44  
  45          $parser = @xml_parser_create();
  46  
  47          if ( !is_resource($parser) )
  48              trigger_error( "Failed to create an instance of PHP's XML parser. http://www.php.net/manual/en/ref.xml.php");
  49  
  50  
  51          $this->parser = $parser;
  52  
  53          # pass in parser, and a reference to this object
  54          # setup handlers
  55          #
  56          xml_set_object( $this->parser, $this );
  57          xml_set_element_handler($this->parser, 
  58                  'feed_start_element', 'feed_end_element' );
  59  
  60          xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 
  61  
  62          $status = xml_parse( $this->parser, $source );
  63  
  64          if (! $status ) {
  65              $errorcode = xml_get_error_code( $this->parser );
  66              if ( $errorcode != XML_ERROR_NONE ) {
  67                  $xml_error = xml_error_string( $errorcode );
  68                  $error_line = xml_get_current_line_number($this->parser);
  69                  $error_col = xml_get_current_column_number($this->parser);
  70                  $errormsg = "$xml_error at line $error_line, column $error_col";
  71  
  72                  $this->error( $errormsg );
  73              }
  74          }
  75  
  76          xml_parser_free( $this->parser );
  77  
  78          $this->normalize();
  79      }
  80  
  81  	function feed_start_element($p, $element, &$attrs) {
  82          $el = $element = strtolower($element);
  83          $attrs = array_change_key_case($attrs, CASE_LOWER);
  84  
  85          // check for a namespace, and split if found
  86          $ns    = false;
  87          if ( strpos( $element, ':' ) ) {
  88              list($ns, $el) = split( ':', $element, 2); 
  89          }
  90          if ( $ns and $ns != 'rdf' ) {
  91              $this->current_namespace = $ns;
  92          }
  93  
  94          # if feed type isn't set, then this is first element of feed
  95          # identify feed from root element
  96          #
  97          if (!isset($this->feed_type) ) {
  98              if ( $el == 'rdf' ) {
  99                  $this->feed_type = RSS;
 100                  $this->feed_version = '1.0';
 101              }
 102              elseif ( $el == 'rss' ) {
 103                  $this->feed_type = RSS;
 104                  $this->feed_version = $attrs['version'];
 105              }
 106              elseif ( $el == 'feed' ) {
 107                  $this->feed_type = ATOM;
 108                  $this->feed_version = $attrs['version'];
 109                  $this->inchannel = true;
 110              }
 111              return;
 112          }
 113  
 114          if ( $el == 'channel' ) 
 115          {
 116              $this->inchannel = true;
 117          }
 118          elseif ($el == 'item' or $el == 'entry' ) 
 119          {
 120              $this->initem = true;
 121              if ( isset($attrs['rdf:about']) ) {
 122                  $this->current_item['about'] = $attrs['rdf:about'];
 123              }
 124          }
 125  
 126          // if we're in the default namespace of an RSS feed,
 127          //  record textinput or image fields
 128          elseif ( 
 129              $this->feed_type == RSS and 
 130              $this->current_namespace == '' and 
 131              $el == 'textinput' ) 
 132          {
 133              $this->intextinput = true;
 134          }
 135  
 136          elseif (
 137              $this->feed_type == RSS and 
 138              $this->current_namespace == '' and 
 139              $el == 'image' ) 
 140          {
 141              $this->inimage = true;
 142          }
 143  
 144          # handle atom content constructs
 145          elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
 146          {
 147              // avoid clashing w/ RSS mod_content
 148              if ($el == 'content' ) {
 149                  $el = 'atom_content';
 150              }
 151  
 152              $this->incontent = $el;
 153  
 154  
 155          }
 156  
 157          // if inside an Atom content construct (e.g. content or summary) field treat tags as text
 158          elseif ($this->feed_type == ATOM and $this->incontent ) 
 159          {
 160              // if tags are inlined, then flatten
 161              $attrs_str = join(' ', 
 162                      array_map('map_attrs', 
 163                      array_keys($attrs), 
 164                      array_values($attrs) ) );
 165  
 166              $this->append_content( "<$element $attrs_str>"  );
 167  
 168              array_unshift( $this->stack, $el );
 169          }
 170  
 171          // Atom support many links per containging element.
 172          // Magpie treats link elements of type rel='alternate'
 173          // as being equivalent to RSS's simple link element.
 174          //
 175          elseif ($this->feed_type == ATOM and $el == 'link' ) 
 176          {
 177              if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) 
 178              {
 179                  $link_el = 'link';
 180              }
 181              else {
 182                  $link_el = 'link_' . $attrs['rel'];
 183              }
 184  
 185              $this->append($link_el, $attrs['href']);
 186          }
 187          // set stack[0] to current element
 188          else {
 189              array_unshift($this->stack, $el);
 190          }
 191      }
 192  
 193  
 194  
 195  	function feed_cdata ($p, $text) {
 196  
 197          if ($this->feed_type == ATOM and $this->incontent) 
 198          {
 199              $this->append_content( $text );
 200          }
 201          else {
 202              $current_el = join('_', array_reverse($this->stack));
 203              $this->append($current_el, $text);
 204          }
 205      }
 206  
 207  	function feed_end_element ($p, $el) {
 208          $el = strtolower($el);
 209  
 210          if ( $el == 'item' or $el == 'entry' ) 
 211          {
 212              $this->items[] = $this->current_item;
 213              $this->current_item = array();
 214              $this->initem = false;
 215          }
 216          elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) 
 217          {
 218              $this->intextinput = false;
 219          }
 220          elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) 
 221          {
 222              $this->inimage = false;
 223          }
 224          elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
 225          {
 226              $this->incontent = false;
 227          }
 228          elseif ($el == 'channel' or $el == 'feed' ) 
 229          {
 230              $this->inchannel = false;
 231          }
 232          elseif ($this->feed_type == ATOM and $this->incontent  ) {
 233              // balance tags properly
 234              // note:  i don't think this is actually neccessary
 235              if ( $this->stack[0] == $el ) 
 236              {
 237                  $this->append_content("</$el>");
 238              }
 239              else {
 240                  $this->append_content("<$el />");
 241              }
 242  
 243              array_shift( $this->stack );
 244          }
 245          else {
 246              array_shift( $this->stack );
 247          }
 248  
 249          $this->current_namespace = false;
 250      }
 251  
 252  	function concat (&$str1, $str2="") {
 253          if (!isset($str1) ) {
 254              $str1="";
 255          }
 256          $str1 .= $str2;
 257      }
 258  
 259  	function append_content($text) {
 260          if ( $this->initem ) {
 261              $this->concat( $this->current_item[ $this->incontent ], $text );
 262          }
 263          elseif ( $this->inchannel ) {
 264              $this->concat( $this->channel[ $this->incontent ], $text );
 265          }
 266      }
 267  
 268      // smart append - field and namespace aware
 269  	function append($el, $text) {
 270          if (!$el) {
 271              return;
 272          }
 273          if ( $this->current_namespace ) 
 274          {
 275              if ( $this->initem ) {
 276                  $this->concat(
 277                      $this->current_item[ $this->current_namespace ][ $el ], $text);
 278              }
 279              elseif ($this->inchannel) {
 280                  $this->concat(
 281                      $this->channel[ $this->current_namespace][ $el ], $text );
 282              }
 283              elseif ($this->intextinput) {
 284                  $this->concat(
 285                      $this->textinput[ $this->current_namespace][ $el ], $text );
 286              }
 287              elseif ($this->inimage) {
 288                  $this->concat(
 289                      $this->image[ $this->current_namespace ][ $el ], $text );
 290              }
 291          }
 292          else {
 293              if ( $this->initem ) {
 294                  $this->concat(
 295                      $this->current_item[ $el ], $text);
 296              }
 297              elseif ($this->intextinput) {
 298                  $this->concat(
 299                      $this->textinput[ $el ], $text );
 300              }
 301              elseif ($this->inimage) {
 302                  $this->concat(
 303                      $this->image[ $el ], $text );
 304              }
 305              elseif ($this->inchannel) {
 306                  $this->concat(
 307                      $this->channel[ $el ], $text );
 308              }
 309  
 310          }
 311      }
 312  
 313  	function normalize () {
 314          // if atom populate rss fields
 315          if ( $this->is_atom() ) {
 316              $this->channel['descripton'] = $this->channel['tagline'];
 317              for ( $i = 0; $i < count($this->items); $i++) {
 318                  $item = $this->items[$i];
 319                  if ( isset($item['summary']) )
 320                      $item['description'] = $item['summary'];
 321                  if ( isset($item['atom_content']))
 322                      $item['content']['encoded'] = $item['atom_content'];
 323  
 324                  $this->items[$i] = $item;
 325              }
 326          }
 327          elseif ( $this->is_rss() ) {
 328              $this->channel['tagline'] = $this->channel['description'];
 329              for ( $i = 0; $i < count($this->items); $i++) {
 330                  $item = $this->items[$i];
 331                  if ( isset($item['description']))
 332                      $item['summary'] = $item['description'];
 333                  if ( isset($item['content']['encoded'] ) )
 334                      $item['atom_content'] = $item['content']['encoded'];
 335  
 336                  $this->items[$i] = $item;
 337              }
 338          }
 339      }
 340  
 341  	function is_rss () {
 342          if ( $this->feed_type == RSS ) {
 343              return $this->feed_version;
 344          }
 345          else {
 346              return false;
 347          }
 348      }
 349  
 350  	function is_atom() {
 351          if ( $this->feed_type == ATOM ) {
 352              return $this->feed_version;
 353          }
 354          else {
 355              return false;
 356          }
 357      }
 358  
 359  	function map_attrs($k, $v) {
 360          return "$k=\"$v\"";
 361      }
 362  
 363  	function error( $errormsg, $lvl = E_USER_WARNING ) {
 364          // append PHP's error message if track_errors enabled
 365          if ( isset($php_errormsg) ) {
 366              $errormsg .= " ($php_errormsg)";
 367          }
 368          if ( MAGPIE_DEBUG ) {
 369              trigger_error( $errormsg, $lvl);
 370          } else {
 371              error_log( $errormsg, 0);
 372          }
 373      }
 374  
 375  }
 376  require_once( dirname(__FILE__) . '/class-snoopy.php');
 377  
 378  function fetch_rss ($url) {
 379      // initialize constants
 380      init();
 381  
 382      if ( !isset($url) ) {
 383          // error("fetch_rss called without a url");
 384          return false;
 385      }
 386  
 387      // if cache is disabled
 388      if ( !MAGPIE_CACHE_ON ) {
 389          // fetch file, and parse it
 390          $resp = _fetch_remote_file( $url );
 391          if ( is_success( $resp->status ) ) {
 392              return _response_to_rss( $resp );
 393          }
 394          else {
 395              // error("Failed to fetch $url and cache is off");
 396              return false;
 397          }
 398      } 
 399      // else cache is ON
 400      else {
 401          // Flow
 402          // 1. check cache
 403          // 2. if there is a hit, make sure its fresh
 404          // 3. if cached obj fails freshness check, fetch remote
 405          // 4. if remote fails, return stale object, or error
 406  
 407          $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
 408  
 409          if (MAGPIE_DEBUG and $cache->ERROR) {
 410              debug($cache->ERROR, E_USER_WARNING);
 411          }
 412  
 413  
 414          $cache_status      = 0;        // response of check_cache
 415          $request_headers = array(); // HTTP headers to send with fetch
 416          $rss              = 0;        // parsed RSS object
 417          $errormsg         = 0;        // errors, if any
 418  
 419          if (!$cache->ERROR) {
 420              // return cache HIT, MISS, or STALE
 421              $cache_status = $cache->check_cache( $url );
 422          }
 423  
 424          // if object cached, and cache is fresh, return cached obj
 425          if ( $cache_status == 'HIT' ) {
 426              $rss = $cache->get( $url );
 427              if ( isset($rss) and $rss ) {
 428                  $rss->from_cache = 1;
 429                  if ( MAGPIE_DEBUG > 1) {
 430                  debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
 431              }
 432                  return $rss;
 433              }
 434          }
 435  
 436          // else attempt a conditional get
 437  
 438          // setup headers
 439          if ( $cache_status == 'STALE' ) {
 440              $rss = $cache->get( $url );
 441              if ( $rss->etag and $rss->last_modified ) {
 442                  $request_headers['If-None-Match'] = $rss->etag;
 443                  $request_headers['If-Last-Modified'] = $rss->last_modified;
 444              }
 445          }
 446  
 447          $resp = _fetch_remote_file( $url, $request_headers );
 448  
 449          if (isset($resp) and $resp) {
 450              if ($resp->status == '304' ) {
 451                  // we have the most current copy
 452                  if ( MAGPIE_DEBUG > 1) {
 453                      debug("Got 304 for $url");
 454                  }
 455                  // reset cache on 304 (at minutillo insistent prodding)
 456                  $cache->set($url, $rss);
 457                  return $rss;
 458              }
 459              elseif ( is_success( $resp->status ) ) {
 460                  $rss = _response_to_rss( $resp );
 461                  if ( $rss ) {
 462                      if (MAGPIE_DEBUG > 1) {
 463                          debug("Fetch successful");
 464                      }
 465                      // add object to cache
 466                      $cache->set( $url, $rss );
 467                      return $rss;
 468                  }
 469              }
 470              else {
 471                  $errormsg = "Failed to fetch $url. ";
 472                  if ( $resp->error ) {
 473                      # compensate for Snoopy's annoying habbit to tacking
 474                      # on '\n'
 475                      $http_error = substr($resp->error, 0, -2); 
 476                      $errormsg .= "(HTTP Error: $http_error)";
 477                  }
 478                  else {
 479                      $errormsg .=  "(HTTP Response: " . $resp->response_code .')';
 480                  }
 481              }
 482          }
 483          else {
 484              $errormsg = "Unable to retrieve RSS file for unknown reasons.";
 485          }
 486  
 487          // else fetch failed
 488  
 489          // attempt to return cached object
 490          if ($rss) {
 491              if ( MAGPIE_DEBUG ) {
 492                  debug("Returning STALE object for $url");
 493              }
 494              return $rss;
 495          }