[ Index ]

WordPress Source Cross Reference

title

Body

[close]

/wp-includes/ -> formatting.php (source)

   1  <?php
   2  
   3  function wptexturize($text) {
   4      $output = '';
   5      // Capture tags and everything inside them
   6      $textarr = preg_split("/(<.*>)/Us", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
   7      $stop = count($textarr); $next = true; // loop stuff
   8      for ($i = 0; $i < $stop; $i++) {
   9          $curl = $textarr[$i];
  10  
  11          if (isset($curl{0}) && '<' != $curl{0} && $next) { // If it's not a tag
  12              $curl = str_replace('---', '&#8212;', $curl);
  13              $curl = str_replace(' -- ', ' &#8212; ', $curl);
  14              $curl = str_replace('--', '&#8211;', $curl);
  15              $curl = str_replace('xn&#8211;', 'xn--', $curl);
  16              $curl = str_replace('...', '&#8230;', $curl);
  17              $curl = str_replace('``', '&#8220;', $curl);
  18  
  19              // This is a hack, look at this more later. It works pretty well though.
  20              $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause");
  21              $cockneyreplace = array("&#8217;tain&#8217;t","&#8217;twere","&#8217;twas","&#8217;tis","&#8217;twill","&#8217;til","&#8217;bout","&#8217;nuff","&#8217;round","&#8217;cause");
  22              $curl = str_replace($cockney, $cockneyreplace, $curl);
  23  
  24              $curl = preg_replace("/'s/", '&#8217;s', $curl);
  25              $curl = preg_replace("/'(\d\d(?:&#8217;|')?s)/", "&#8217;$1", $curl);
  26              $curl = preg_replace('/(\s|\A|")\'/', '$1&#8216;', $curl);
  27              $curl = preg_replace('/(\d+)"/', '$1&#8243;', $curl);
  28              $curl = preg_replace("/(\d+)'/", '$1&#8242;', $curl);
  29              $curl = preg_replace("/(\S)'([^'\s])/", "$1&#8217;$2", $curl);
  30              $curl = preg_replace('/(\s|\A)"(?!\s)/', '$1&#8220;$2', $curl);
  31              $curl = preg_replace('/"(\s|\S|\Z)/', '&#8221;$1', $curl);
  32              $curl = preg_replace("/'([\s.]|\Z)/", '&#8217;$1', $curl);
  33              $curl = preg_replace("/ \(tm\)/i", ' &#8482;', $curl);
  34              $curl = str_replace("''", '&#8221;', $curl);
  35  
  36              $curl = preg_replace('/(\d+)x(\d+)/', "$1&#215;$2", $curl);
  37  
  38          } elseif (strstr($curl, '<code') || strstr($curl, '<pre') || strstr($curl, '<kbd' || strstr($curl, '<style') || strstr($curl, '<script'))) {
  39              // strstr is fast
  40              $next = false;
  41          } else {
  42              $next = true;
  43          }
  44          $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
  45          $output .= $curl;
  46      }
  47      return $output;
  48  }
  49  
  50  function clean_pre($text) {
  51      $text = str_replace('<br />', '', $text);
  52      $text = str_replace('<p>', "\n", $text);
  53      $text = str_replace('</p>', '', $text);
  54      return $text;
  55  }
  56  
  57  function wpautop($pee, $br = 1) {
  58      $pee = $pee . "\n"; // just to make things a little easier, pad the end
  59      $pee = preg_replace('|<br />\s*<br />|', "\n\n", $pee);
  60      // Space things out a little
  61      $pee = preg_replace('!(<(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|math|p|h[1-6])[^>]*>)!', "\n$1", $pee); 
  62      $pee = preg_replace('!(</(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|math|p|h[1-6])>)!', "$1\n\n", $pee);
  63      $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines 
  64      $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates
  65      $pee = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "<p>$1</p>\n", $pee); // make paragraphs, including one at the end 
  66      $pee = preg_replace('|<p>\s*?</p>|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace 
  67      $pee = preg_replace('!<p>\s*(</?(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|hr|pre|select|form|blockquote|address|math|p|h[1-6])[^>]*>)\s*</p>!', "$1", $pee); // don't pee all over a tag
  68      $pee = preg_replace("|<p>(<li.+?)</p>|", "$1", $pee); // problem with nested lists
  69      $pee = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $pee);
  70      $pee = str_replace('</blockquote></p>', '</p></blockquote>', $pee);
  71      $pee = preg_replace('!<p>\s*(</?(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|hr|pre|select|form|blockquote|address|math|p|h[1-6])[^>]*>)!', "$1", $pee);
  72      $pee = preg_replace('!(</?(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|math|p|h[1-6])[^>]*>)\s*</p>!', "$1", $pee); 
  73      if ($br) {
  74          $pee = preg_replace('/<(script|style).*?<\/\\1>/se', 'str_replace("\n", "<WPPreserveNewline />", "\\0")', $pee);
  75          $pee = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $pee); // optionally make line breaks
  76          $pee = str_replace('<WPPreserveNewline />', "\n", $pee);
  77      }
  78      $pee = preg_replace('!(</?(?:table|thead|tfoot|caption|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|math|p|h[1-6])[^>]*>)\s*<br />!', "$1", $pee);
  79      $pee = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)>)!', '$1', $pee);
  80      $pee = preg_replace('!(<pre.*?>)(.*?)</pre>!ise', " stripslashes('$1') .  stripslashes(clean_pre('$2'))  . '</pre>' ", $pee);
  81  
  82      return $pee; 
  83  }
  84  
  85  
  86  function seems_utf8($Str) { # by bmorel at ssi dot fr
  87      for ($i=0; $i<strlen($Str); $i++) {
  88          if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb
  89          elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
  90          elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
  91          elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
  92          elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
  93          elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
  94          else return false; # Does not match any model
  95          for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
  96              if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80))
  97              return false;
  98          }
  99      }
 100      return true;
 101  }
 102  
 103  function wp_specialchars( $text, $quotes = 0 ) {
 104      // Like htmlspecialchars except don't double-encode HTML entities
 105      $text = preg_replace('/&([^#])(?![a-z1-4]{1,8};)/', '&#038;$1', $text);
 106      $text = str_replace('<', '&lt;', $text);
 107      $text = str_replace('>', '&gt;', $text);
 108      if ( 'double' === $quotes ) {
 109          $text = str_replace('"', '&quot;', $text);
 110      } elseif ( 'single' === $quotes ) {
 111          $text = str_replace("'", '&#039;', $text);
 112      } elseif ( $quotes ) {
 113          $text = str_replace('"', '&quot;', $text);
 114          $text = str_replace("'", '&#039;', $text);
 115      }
 116      return $text;
 117  }
 118  
 119  function utf8_uri_encode( $utf8_string ) {
 120    $unicode = '';        
 121    $values = array();
 122    $num_octets = 1;
 123          
 124    for ($i = 0; $i < strlen( $utf8_string ); $i++ ) {
 125  
 126      $value = ord( $utf8_string[ $i ] );
 127              
 128      if ( $value < 128 ) {
 129        $unicode .= chr($value);
 130      } else {
 131        if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3;
 132                  
 133        $values[] = $value;
 134        
 135        if ( count( $values ) == $num_octets ) {
 136      if ($num_octets == 3) {
 137        $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
 138      } else {
 139        $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
 140      }
 141  
 142      $values = array();
 143      $num_octets = 1;
 144        }
 145      }
 146    }
 147  
 148    return $unicode;    
 149  }
 150  
 151  function remove_accents($string) {
 152      if (seems_utf8($string)) {
 153          $chars = array(
 154          // Decompositions for Latin-1 Supplement
 155          chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
 156          chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
 157          chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
 158          chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
 159          chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
 160          chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
 161          chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
 162          chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
 163          chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
 164          chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
 165          chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
 166          chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
 167          chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
 168          chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
 169          chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
 170          chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
 171          chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
 172          chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
 173          chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
 174          chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
 175          chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
 176          chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
 177          chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
 178          chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
 179          chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
 180          chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
 181          chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
 182          chr(195).chr(191) => 'y',
 183          // Decompositions for Latin Extended-A
 184          chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
 185          chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
 186          chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
 187          chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
 188          chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
 189          chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
 190          chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
 191          chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
 192          chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
 193          chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
 194          chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
 195          chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
 196          chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
 197          chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
 198          chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
 199          chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
 200          chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
 201          chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
 202          chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
 203          chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
 204          chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
 205          chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
 206          chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
 207          chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
 208          chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
 209          chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
 210          chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
 211          chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
 212          chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
 213          chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
 214          chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
 215          chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
 216          chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
 217          chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
 218          chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
 219          chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
 220          chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
 221          chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
 222          chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
 223          chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
 224          chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
 225          chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
 226          chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
 227          chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
 228          chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
 229          chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
 230          chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
 231          chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
 232          chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
 233          chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
 234          chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
 235          chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
 236          chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
 237          chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
 238          chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
 239          chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
 240          chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
 241          chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
 242          chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
 243          chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
 244          chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
 245          chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
 246          chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
 247          chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
 248          // Euro Sign
 249          chr(226).chr(130).chr(172) => 'E');
 250  
 251          $string = strtr($string, $chars);
 252      } else {
 253          // Assume ISO-8859-1 if not UTF-8
 254          $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
 255              .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
 256              .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
 257              .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
 258              .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
 259              .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
 260              .chr(228).chr(229).