我可以在 PHP 或 javascript 中使用某些函数将所有 HTML 实体转换为它们的十进制等价物吗?
Is there some function I can use in PHP or javascript to convert ALL HTML entities to their decimal equivalents?
问题在标题里。我正在使用 jQuery 的 XML 解析功能来处理 XML,它通常包含格式为
的特殊 HTML 字符,这通常会破坏我的应用程序,因为 jQuery 不会将其识别为有效 XML.
为了避免这种情况,目前我只是在将 XML 传递给客户端之前使用 PHP 对其进行处理 - 这是我的代码片段:
$fixedmessage = str_replace('Â', 'Â', htmlentities($MessageText[$j], ENT_COMPAT, "UTF-8" ));
$fixedmessage = str_replace('£', '£', $fixedmessage);
$fixedmessage = str_replace('Ã', 'Ã', $fixedmessage);
$fixedmessage = str_replace('¡', '¡', $fixedmessage);
$fixedmessage = str_replace('á', 'á', $fixedmessage);
$fixedmessage = str_replace('í', 'í', $fixedmessage);
...
由于 PHP 的 htmlentities
功能似乎对除绝对基础之外的所有功能几乎毫无用处,我只是 运行 手动替换每个特殊字符,因为它变成了问题,但这不是特别优雅,或者我想这不是一种特别有效的做事方式。有没有更好的方法?
如何先解码它们,然后在 XML 模式下重新编码:
htmlentities(
html_entity_decode($str),
ENT_XML1);
这是一个简单的解决方案:
function decode_named_entities($str) {
static $entities = array( "Aacute"=>"00C1", "aacute"=>"00E1", "Acirc"=>"00C2", "acirc"=>"00E2",
"acute"=>"00B4", "AElig"=>"00C6", "aelig"=>"00E6", "Agrave"=>"00C0", "agrave"=>"00E0",
"alefsym"=>"2135", "Alpha"=>"0391", "alpha"=>"03B1", "amp"=>"0026", "and"=>"2227", "ang"=>"2220",
"apos"=>"0027", "Aring"=>"00C5", "aring"=>"00E5", "asymp"=>"2248", "Atilde"=>"00C3",
"atilde"=>"00E3", "Auml"=>"00C4", "auml"=>"00E4", "bdquo"=>"201E", "Beta"=>"0392", "beta"=>"03B2",
"brvbar"=>"00A6", "bull"=>"2022", "cap"=>"2229", "Ccedil"=>"00C7", "ccedil"=>"00E7",
"cedil"=>"00B8", "cent"=>"00A2", "Chi"=>"03A7", "chi"=>"03C7", "circ"=>"02C6", "clubs"=>"2663",
"cong"=>"2245", "copy"=>"00A9", "crarr"=>"21B5", "cup"=>"222A", "curren"=>"00A4", "dagger"=>"2020",
"Dagger"=>"2021", "darr"=>"2193", "dArr"=>"21D3", "deg"=>"00B0", "Delta"=>"0394", "delta"=>"03B4",
"diams"=>"2666", "divide"=>"00F7", "Eacute"=>"00C9", "eacute"=>"00E9", "Ecirc"=>"00CA",
"ecirc"=>"00EA", "Egrave"=>"00C8", "egrave"=>"00E8", "empty"=>"2205", "emsp"=>"2003",
"ensp"=>"2002", "Epsilon"=>"0395", "epsilon"=>"03B5", "equiv"=>"2261", "Eta"=>"0397",
"eta"=>"03B7", "ETH"=>"00D0", "eth"=>"00F0", "Euml"=>"00CB", "euml"=>"00EB", "euro"=>"20AC",
"exist"=>"2203", "fnof"=>"0192", "forall"=>"2200", "frac12"=>"00BD", "frac14"=>"00BC",
"frac34"=>"00BE", "frasl"=>"2044", "Gamma"=>"0393", "gamma"=>"03B3", "ge"=>"2265", "gt"=>"003E",
"harr"=>"2194", "hArr"=>"21D4", "hearts"=>"2665", "hellip"=>"2026", "Iacute"=>"00CD",
"iacute"=>"00ED", "Icirc"=>"00CE", "icirc"=>"00EE", "iexcl"=>"00A1", "Igrave"=>"00CC",
"igrave"=>"00EC", "image"=>"2111", "infin"=>"221E", "int"=>"222B", "Iota"=>"0399", "iota"=>"03B9",
"iquest"=>"00BF", "isin"=>"2208", "Iuml"=>"00CF", "iuml"=>"00EF", "Kappa"=>"039A", "kappa"=>"03BA",
"Lambda"=>"039B", "lambda"=>"03BB", "lang"=>"2329", "laquo"=>"00AB", "larr"=>"2190",
"lArr"=>"21D0", "lceil"=>"2308", "ldquo"=>"201C", "le"=>"2264", "lfloor"=>"230A", "lowast"=>"2217",
"loz"=>"25CA", "lrm"=>"200E", "lsaquo"=>"2039", "lsquo"=>"2018", "lt"=>"003C", "macr"=>"00AF",
"mdash"=>"2014", "micro"=>"00B5", "middot"=>"00B7", "minus"=>"2212", "Mu"=>"039C", "mu"=>"03BC",
"nabla"=>"2207", "nbsp"=>"00A0", "ndash"=>"2013", "ne"=>"2260", "ni"=>"220B", "not"=>"00AC",
"notin"=>"2209", "nsub"=>"2284", "Ntilde"=>"00D1", "ntilde"=>"00F1", "Nu"=>"039D", "nu"=>"03BD",
"Oacute"=>"00D3", "oacute"=>"00F3", "Ocirc"=>"00D4", "ocirc"=>"00F4", "OElig"=>"0152",
"oelig"=>"0153", "Ograve"=>"00D2", "ograve"=>"00F2", "oline"=>"203E", "Omega"=>"03A9",
"omega"=>"03C9", "Omicron"=>"039F", "omicron"=>"03BF", "oplus"=>"2295", "or"=>"2228",
"ordf"=>"00AA", "ordm"=>"00BA", "Oslash"=>"00D8", "oslash"=>"00F8", "Otilde"=>"00D5",
"otilde"=>"00F5", "otimes"=>"2297", "Ouml"=>"00D6", "ouml"=>"00F6", "para"=>"00B6", "part"=>"2202",
"permil"=>"2030", "perp"=>"22A5", "Phi"=>"03A6", "phi"=>"03C6", "Pi"=>"03A0", "pi"=>"03C0",
"piv"=>"03D6", "plusmn"=>"00B1", "pound"=>"00A3", "prime"=>"2032", "Prime"=>"2033", "prod"=>"220F",
"prop"=>"221D", "Psi"=>"03A8", "psi"=>"03C8", "quot"=>"0022", "radic"=>"221A", "rang"=>"232A",
"raquo"=>"00BB", "rarr"=>"2192", "rArr"=>"21D2", "rceil"=>"2309", "rdquo"=>"201D", "real"=>"211C",
"reg"=>"00AE", "rfloor"=>"230B", "Rho"=>"03A1", "rho"=>"03C1", "rlm"=>"200F", "rsaquo"=>"203A",
"rsquo"=>"2019", "sbquo"=>"201A", "Scaron"=>"0160", "scaron"=>"0161", "sdot"=>"22C5",
"sect"=>"00A7", "shy"=>"00AD", "Sigma"=>"03A3", "sigma"=>"03C3", "sigmaf"=>"03C2", "sim"=>"223C",
"spades"=>"2660", "sub"=>"2282", "sube"=>"2286", "sum"=>"2211", "sup"=>"2283", "sup1"=>"00B9",
"sup2"=>"00B2", "sup3"=>"00B3", "supe"=>"2287", "szlig"=>"00DF", "Tau"=>"03A4", "tau"=>"03C4",
"there4"=>"2234", "Theta"=>"0398", "theta"=>"03B8", "thetasym"=>"03D1", "thinsp"=>"2009",
"THORN"=>"00DE", "thorn"=>"00FE", "tilde"=>"02DC", "times"=>"00D7", "trade"=>"2122",
"Uacute"=>"00DA", "uacute"=>"00FA", "uarr"=>"2191", "uArr"=>"21D1", "Ucirc"=>"00DB",
"ucirc"=>"00FB", "Ugrave"=>"00D9", "ugrave"=>"00F9", "uml"=>"00A8", "upsih"=>"03D2",
"Upsilon"=>"03A5", "upsilon"=>"03C5", "Uuml"=>"00DC", "uuml"=>"00FC", "weierp"=>"2118",
"Xi"=>"039E", "xi"=>"03BE", "Yacute"=>"00DD", "yacute"=>"00FD", "yen"=>"00A5", "yuml"=>"00FF",
"Yuml"=>"0178", "Zeta"=>"0396", "zeta"=>"03B6", "zwj"=>"200D", "zwnj"=>"200C");
return preg_replace_callback('~&([A-Za-z]+);~',
function($m) use($entities) {
$e = $m[1];
return isset($entities[$e]) ? "&#x{$entities[$e]};" : "&$e;";
},
$str
);
}
更快的方法是从上面生成两个数组:
$search = ["Á", "á" etc
$replac = ["Á", "á" etc
并应用 str_replace
.
<?php
$entities = getEntities();
$from = array();
$to = array();
foreach ($entities as $ent => $code) {
$from[] = '&' . $ent . ';';
$to[] = '&#' . $code . ';';
}
$str = 'ξ and τ';
// you can pass arrays to str_replace
// see http://php.net/manual/en/function.str-replace.php
echo str_replace($from, $to, $str);
function getEntities() {
// http://www.mit.edu/afs.new/sipb/project/php/include/entities.h
return array(
'AElig' => 198,
'Aacute' => 193,
'Acirc' => 194,
'Agrave' => 192,
'Alpha' => 913,
'Aring' => 197,
'Atilde' => 195,
'Auml' => 196,
'Beta' => 914,
'Ccedil' => 199,
'Chi' => 935,
'Dagger' => 8225,
'Delta' => 916,
'ETH' => 208,
'Eacute' => 201,
'Ecirc' => 202,
'Egrave' => 200,
'Epsilon' => 917,
'Eta' => 919,
'Euml' => 203,
'Gamma' => 915,
'Iacute' => 205,
'Icirc' => 206,
'Igrave' => 204,
'Iota' => 921,
'Iuml' => 207,
'Kappa' => 922,
'Lambda' => 923,
'Mu' => 924,
'Ntilde' => 209,
'Nu' => 925,
'OElig' => 338,
'Oacute' => 211,
'Ocirc' => 212,
'Ograve' => 210,
'Omega' => 937,
'Omicron' => 927,
'Oslash' => 216,
'Otilde' => 213,
'Ouml' => 214,
'Phi' => 934,
'Pi' => 928,
'Prime' => 8243,
'Psi' => 936,
'Rho' => 929,
'Scaron' => 352,
'Sigma' => 931,
'THORN' => 222,
'Tau' => 932,
'Theta' => 920,
'Uacute' => 218,
'Ucirc' => 219,
'Ugrave' => 217,
'Upsilon' => 933,
'Uuml' => 220,
'Xi' => 926,
'Yacute' => 221,
'Yuml' => 376,
'Zeta' => 918,
'aacute' => 225,
'acirc' => 226,
'acute' => 180,
'aelig' => 230,
'agrave' => 224,
'alefsym' => 8501,
'alpha' => 945,
'amp' => 38,
'and' => 8743,
'ang' => 8736,
'aring' => 229,
'asymp' => 8776,
'atilde' => 227,
'auml' => 228,
'bdquo' => 8222,
'beta' => 946,
'brvbar' => 166,
'bull' => 8226,
'cap' => 8745,
'ccedil' => 231,
'cedil' => 184,
'cent' => 162,
'chi' => 967,
'circ' => 710,
'clubs' => 9827,
'cong' => 8773,
'copy' => 169,
'crarr' => 8629,
'cup' => 8746,
'curren' => 164,
'dArr' => 8659,
'dagger' => 8224,
'darr' => 8595,
'deg' => 176,
'delta' => 948,
'diams' => 9830,
'divide' => 247,
'eacute' => 233,
'ecirc' => 234,
'egrave' => 232,
'empty' => 8709,
'emsp' => 8195,
'ensp' => 8194,
'epsilon' => 949,
'equiv' => 8801,
'eta' => 951,
'eth' => 240,
'euml' => 235,
'euro' => 8364,
'exist' => 8707,
'fnof' => 402,
'forall' => 8704,
'frac12' => 189,
'frac14' => 188,
'frac34' => 190,
'frasl' => 8260,
'gamma' => 947,
'ge' => 8805,
'gt' => 62,
'hArr' => 8660,
'harr' => 8596,
'hearts' => 9829,
'hellip' => 8230,
'iacute' => 237,
'icirc' => 238,
'iexcl' => 161,
'igrave' => 236,
'image' => 8465,
'infin' => 8734,
'int' => 8747,
'iota' => 953,
'iquest' => 191,
'isin' => 8712,
'iuml' => 239,
'kappa' => 954,
'lArr' => 8656,
'lambda' => 955,
'lang' => 9001,
'laquo' => 171,
'larr' => 8592,
'lceil' => 8968,
'ldquo' => 8220,
'le' => 8804,
'lfloor' => 8970,
'lowast' => 8727,
'loz' => 9674,
'lrm' => 8206,
'lsaquo' => 8249,
'lsquo' => 8216,
'lt' => 60,
'macr' => 175,
'mdash' => 8212,
'micro' => 181,
'middot' => 183,
'minus' => 8722,
'mu' => 956,
'nabla' => 8711,
'nbsp' => 160,
'ndash' => 8211,
'ne' => 8800,
'ni' => 8715,
'not' => 172,
'notin' => 8713,
'nsub' => 8836,
'ntilde' => 241,
'nu' => 957,
'oacute' => 243,
'ocirc' => 244,
'oelig' => 339,
'ograve' => 242,
'oline' => 8254,
'omega' => 969,
'omicron' => 959,
'oplus' => 8853,
'or' => 8744,
'ordf' => 170,
'ordm' => 186,
'oslash' => 248,
'otilde' => 245,
'otimes' => 8855,
'ouml' => 246,
'para' => 182,
'part' => 8706,
'permil' => 8240,
'perp' => 8869,
'phi' => 966,
'pi' => 960,
'piv' => 982,
'plusmn' => 177,
'pound' => 163,
'prime' => 8242,
'prod' => 8719,
'prop' => 8733,
'psi' => 968,
'quot' => 34,
'rArr' => 8658,
'radic' => 8730,
'rang' => 9002,
'raquo' => 187,
'rarr' => 8594,
'rceil' => 8969,
'rdquo' => 8221,
'real' => 8476,
'reg' => 174,
'rfloor' => 8971,
'rho' => 961,
'rlm' => 8207,
'rsaquo' => 8250,
'rsquo' => 8217,
'sbquo' => 8218,
'scaron' => 353,
'sdot' => 8901,
'sect' => 167,
'shy' => 173,
'sigma' => 963,
'sigmaf' => 962,
'sim' => 8764,
'spades' => 9824,
'sub' => 8834,
'sube' => 8838,
'sum' => 8721,
'sup' => 8835,
'sup1' => 185,
'sup2' => 178,
'sup3' => 179,
'supe' => 8839,
'szlig' => 223,
'tau' => 964,
'there4' => 8756,
'theta' => 952,
'thetasym' => 977,
'thinsp' => 8201,
'thorn' => 254,
'tilde' => 732,
'times' => 215,
'trade' => 8482,
'uArr' => 8657,
'uacute' => 250,
'uarr' => 8593,
'ucirc' => 251,
'ugrave' => 249,
'uml' => 168,
'upsih' => 978,
'upsilon' => 965,
'uuml' => 252,
'weierp' => 8472,
'xi' => 958,
'yacute' => 253,
'yen' => 165,
'yuml' => 255,
'zeta' => 950,
'zwj' => 8205,
'zwnj' => 8204,
);
}
?>
问题在标题里。我正在使用 jQuery 的 XML 解析功能来处理 XML,它通常包含格式为
的特殊 HTML 字符,这通常会破坏我的应用程序,因为 jQuery 不会将其识别为有效 XML.
为了避免这种情况,目前我只是在将 XML 传递给客户端之前使用 PHP 对其进行处理 - 这是我的代码片段:
$fixedmessage = str_replace('Â', 'Â', htmlentities($MessageText[$j], ENT_COMPAT, "UTF-8" ));
$fixedmessage = str_replace('£', '£', $fixedmessage);
$fixedmessage = str_replace('Ã', 'Ã', $fixedmessage);
$fixedmessage = str_replace('¡', '¡', $fixedmessage);
$fixedmessage = str_replace('á', 'á', $fixedmessage);
$fixedmessage = str_replace('í', 'í', $fixedmessage);
...
由于 PHP 的 htmlentities
功能似乎对除绝对基础之外的所有功能几乎毫无用处,我只是 运行 手动替换每个特殊字符,因为它变成了问题,但这不是特别优雅,或者我想这不是一种特别有效的做事方式。有没有更好的方法?
如何先解码它们,然后在 XML 模式下重新编码:
htmlentities(
html_entity_decode($str),
ENT_XML1);
这是一个简单的解决方案:
function decode_named_entities($str) {
static $entities = array( "Aacute"=>"00C1", "aacute"=>"00E1", "Acirc"=>"00C2", "acirc"=>"00E2",
"acute"=>"00B4", "AElig"=>"00C6", "aelig"=>"00E6", "Agrave"=>"00C0", "agrave"=>"00E0",
"alefsym"=>"2135", "Alpha"=>"0391", "alpha"=>"03B1", "amp"=>"0026", "and"=>"2227", "ang"=>"2220",
"apos"=>"0027", "Aring"=>"00C5", "aring"=>"00E5", "asymp"=>"2248", "Atilde"=>"00C3",
"atilde"=>"00E3", "Auml"=>"00C4", "auml"=>"00E4", "bdquo"=>"201E", "Beta"=>"0392", "beta"=>"03B2",
"brvbar"=>"00A6", "bull"=>"2022", "cap"=>"2229", "Ccedil"=>"00C7", "ccedil"=>"00E7",
"cedil"=>"00B8", "cent"=>"00A2", "Chi"=>"03A7", "chi"=>"03C7", "circ"=>"02C6", "clubs"=>"2663",
"cong"=>"2245", "copy"=>"00A9", "crarr"=>"21B5", "cup"=>"222A", "curren"=>"00A4", "dagger"=>"2020",
"Dagger"=>"2021", "darr"=>"2193", "dArr"=>"21D3", "deg"=>"00B0", "Delta"=>"0394", "delta"=>"03B4",
"diams"=>"2666", "divide"=>"00F7", "Eacute"=>"00C9", "eacute"=>"00E9", "Ecirc"=>"00CA",
"ecirc"=>"00EA", "Egrave"=>"00C8", "egrave"=>"00E8", "empty"=>"2205", "emsp"=>"2003",
"ensp"=>"2002", "Epsilon"=>"0395", "epsilon"=>"03B5", "equiv"=>"2261", "Eta"=>"0397",
"eta"=>"03B7", "ETH"=>"00D0", "eth"=>"00F0", "Euml"=>"00CB", "euml"=>"00EB", "euro"=>"20AC",
"exist"=>"2203", "fnof"=>"0192", "forall"=>"2200", "frac12"=>"00BD", "frac14"=>"00BC",
"frac34"=>"00BE", "frasl"=>"2044", "Gamma"=>"0393", "gamma"=>"03B3", "ge"=>"2265", "gt"=>"003E",
"harr"=>"2194", "hArr"=>"21D4", "hearts"=>"2665", "hellip"=>"2026", "Iacute"=>"00CD",
"iacute"=>"00ED", "Icirc"=>"00CE", "icirc"=>"00EE", "iexcl"=>"00A1", "Igrave"=>"00CC",
"igrave"=>"00EC", "image"=>"2111", "infin"=>"221E", "int"=>"222B", "Iota"=>"0399", "iota"=>"03B9",
"iquest"=>"00BF", "isin"=>"2208", "Iuml"=>"00CF", "iuml"=>"00EF", "Kappa"=>"039A", "kappa"=>"03BA",
"Lambda"=>"039B", "lambda"=>"03BB", "lang"=>"2329", "laquo"=>"00AB", "larr"=>"2190",
"lArr"=>"21D0", "lceil"=>"2308", "ldquo"=>"201C", "le"=>"2264", "lfloor"=>"230A", "lowast"=>"2217",
"loz"=>"25CA", "lrm"=>"200E", "lsaquo"=>"2039", "lsquo"=>"2018", "lt"=>"003C", "macr"=>"00AF",
"mdash"=>"2014", "micro"=>"00B5", "middot"=>"00B7", "minus"=>"2212", "Mu"=>"039C", "mu"=>"03BC",
"nabla"=>"2207", "nbsp"=>"00A0", "ndash"=>"2013", "ne"=>"2260", "ni"=>"220B", "not"=>"00AC",
"notin"=>"2209", "nsub"=>"2284", "Ntilde"=>"00D1", "ntilde"=>"00F1", "Nu"=>"039D", "nu"=>"03BD",
"Oacute"=>"00D3", "oacute"=>"00F3", "Ocirc"=>"00D4", "ocirc"=>"00F4", "OElig"=>"0152",
"oelig"=>"0153", "Ograve"=>"00D2", "ograve"=>"00F2", "oline"=>"203E", "Omega"=>"03A9",
"omega"=>"03C9", "Omicron"=>"039F", "omicron"=>"03BF", "oplus"=>"2295", "or"=>"2228",
"ordf"=>"00AA", "ordm"=>"00BA", "Oslash"=>"00D8", "oslash"=>"00F8", "Otilde"=>"00D5",
"otilde"=>"00F5", "otimes"=>"2297", "Ouml"=>"00D6", "ouml"=>"00F6", "para"=>"00B6", "part"=>"2202",
"permil"=>"2030", "perp"=>"22A5", "Phi"=>"03A6", "phi"=>"03C6", "Pi"=>"03A0", "pi"=>"03C0",
"piv"=>"03D6", "plusmn"=>"00B1", "pound"=>"00A3", "prime"=>"2032", "Prime"=>"2033", "prod"=>"220F",
"prop"=>"221D", "Psi"=>"03A8", "psi"=>"03C8", "quot"=>"0022", "radic"=>"221A", "rang"=>"232A",
"raquo"=>"00BB", "rarr"=>"2192", "rArr"=>"21D2", "rceil"=>"2309", "rdquo"=>"201D", "real"=>"211C",
"reg"=>"00AE", "rfloor"=>"230B", "Rho"=>"03A1", "rho"=>"03C1", "rlm"=>"200F", "rsaquo"=>"203A",
"rsquo"=>"2019", "sbquo"=>"201A", "Scaron"=>"0160", "scaron"=>"0161", "sdot"=>"22C5",
"sect"=>"00A7", "shy"=>"00AD", "Sigma"=>"03A3", "sigma"=>"03C3", "sigmaf"=>"03C2", "sim"=>"223C",
"spades"=>"2660", "sub"=>"2282", "sube"=>"2286", "sum"=>"2211", "sup"=>"2283", "sup1"=>"00B9",
"sup2"=>"00B2", "sup3"=>"00B3", "supe"=>"2287", "szlig"=>"00DF", "Tau"=>"03A4", "tau"=>"03C4",
"there4"=>"2234", "Theta"=>"0398", "theta"=>"03B8", "thetasym"=>"03D1", "thinsp"=>"2009",
"THORN"=>"00DE", "thorn"=>"00FE", "tilde"=>"02DC", "times"=>"00D7", "trade"=>"2122",
"Uacute"=>"00DA", "uacute"=>"00FA", "uarr"=>"2191", "uArr"=>"21D1", "Ucirc"=>"00DB",
"ucirc"=>"00FB", "Ugrave"=>"00D9", "ugrave"=>"00F9", "uml"=>"00A8", "upsih"=>"03D2",
"Upsilon"=>"03A5", "upsilon"=>"03C5", "Uuml"=>"00DC", "uuml"=>"00FC", "weierp"=>"2118",
"Xi"=>"039E", "xi"=>"03BE", "Yacute"=>"00DD", "yacute"=>"00FD", "yen"=>"00A5", "yuml"=>"00FF",
"Yuml"=>"0178", "Zeta"=>"0396", "zeta"=>"03B6", "zwj"=>"200D", "zwnj"=>"200C");
return preg_replace_callback('~&([A-Za-z]+);~',
function($m) use($entities) {
$e = $m[1];
return isset($entities[$e]) ? "&#x{$entities[$e]};" : "&$e;";
},
$str
);
}
更快的方法是从上面生成两个数组:
$search = ["Á", "á" etc
$replac = ["Á", "á" etc
并应用 str_replace
.
<?php
$entities = getEntities();
$from = array();
$to = array();
foreach ($entities as $ent => $code) {
$from[] = '&' . $ent . ';';
$to[] = '&#' . $code . ';';
}
$str = 'ξ and τ';
// you can pass arrays to str_replace
// see http://php.net/manual/en/function.str-replace.php
echo str_replace($from, $to, $str);
function getEntities() {
// http://www.mit.edu/afs.new/sipb/project/php/include/entities.h
return array(
'AElig' => 198,
'Aacute' => 193,
'Acirc' => 194,
'Agrave' => 192,
'Alpha' => 913,
'Aring' => 197,
'Atilde' => 195,
'Auml' => 196,
'Beta' => 914,
'Ccedil' => 199,
'Chi' => 935,
'Dagger' => 8225,
'Delta' => 916,
'ETH' => 208,
'Eacute' => 201,
'Ecirc' => 202,
'Egrave' => 200,
'Epsilon' => 917,
'Eta' => 919,
'Euml' => 203,
'Gamma' => 915,
'Iacute' => 205,
'Icirc' => 206,
'Igrave' => 204,
'Iota' => 921,
'Iuml' => 207,
'Kappa' => 922,
'Lambda' => 923,
'Mu' => 924,
'Ntilde' => 209,
'Nu' => 925,
'OElig' => 338,
'Oacute' => 211,
'Ocirc' => 212,
'Ograve' => 210,
'Omega' => 937,
'Omicron' => 927,
'Oslash' => 216,
'Otilde' => 213,
'Ouml' => 214,
'Phi' => 934,
'Pi' => 928,
'Prime' => 8243,
'Psi' => 936,
'Rho' => 929,
'Scaron' => 352,
'Sigma' => 931,
'THORN' => 222,
'Tau' => 932,
'Theta' => 920,
'Uacute' => 218,
'Ucirc' => 219,
'Ugrave' => 217,
'Upsilon' => 933,
'Uuml' => 220,
'Xi' => 926,
'Yacute' => 221,
'Yuml' => 376,
'Zeta' => 918,
'aacute' => 225,
'acirc' => 226,
'acute' => 180,
'aelig' => 230,
'agrave' => 224,
'alefsym' => 8501,
'alpha' => 945,
'amp' => 38,
'and' => 8743,
'ang' => 8736,
'aring' => 229,
'asymp' => 8776,
'atilde' => 227,
'auml' => 228,
'bdquo' => 8222,
'beta' => 946,
'brvbar' => 166,
'bull' => 8226,
'cap' => 8745,
'ccedil' => 231,
'cedil' => 184,
'cent' => 162,
'chi' => 967,
'circ' => 710,
'clubs' => 9827,
'cong' => 8773,
'copy' => 169,
'crarr' => 8629,
'cup' => 8746,
'curren' => 164,
'dArr' => 8659,
'dagger' => 8224,
'darr' => 8595,
'deg' => 176,
'delta' => 948,
'diams' => 9830,
'divide' => 247,
'eacute' => 233,
'ecirc' => 234,
'egrave' => 232,
'empty' => 8709,
'emsp' => 8195,
'ensp' => 8194,
'epsilon' => 949,
'equiv' => 8801,
'eta' => 951,
'eth' => 240,
'euml' => 235,
'euro' => 8364,
'exist' => 8707,
'fnof' => 402,
'forall' => 8704,
'frac12' => 189,
'frac14' => 188,
'frac34' => 190,
'frasl' => 8260,
'gamma' => 947,
'ge' => 8805,
'gt' => 62,
'hArr' => 8660,
'harr' => 8596,
'hearts' => 9829,
'hellip' => 8230,
'iacute' => 237,
'icirc' => 238,
'iexcl' => 161,
'igrave' => 236,
'image' => 8465,
'infin' => 8734,
'int' => 8747,
'iota' => 953,
'iquest' => 191,
'isin' => 8712,
'iuml' => 239,
'kappa' => 954,
'lArr' => 8656,
'lambda' => 955,
'lang' => 9001,
'laquo' => 171,
'larr' => 8592,
'lceil' => 8968,
'ldquo' => 8220,
'le' => 8804,
'lfloor' => 8970,
'lowast' => 8727,
'loz' => 9674,
'lrm' => 8206,
'lsaquo' => 8249,
'lsquo' => 8216,
'lt' => 60,
'macr' => 175,
'mdash' => 8212,
'micro' => 181,
'middot' => 183,
'minus' => 8722,
'mu' => 956,
'nabla' => 8711,
'nbsp' => 160,
'ndash' => 8211,
'ne' => 8800,
'ni' => 8715,
'not' => 172,
'notin' => 8713,
'nsub' => 8836,
'ntilde' => 241,
'nu' => 957,
'oacute' => 243,
'ocirc' => 244,
'oelig' => 339,
'ograve' => 242,
'oline' => 8254,
'omega' => 969,
'omicron' => 959,
'oplus' => 8853,
'or' => 8744,
'ordf' => 170,
'ordm' => 186,
'oslash' => 248,
'otilde' => 245,
'otimes' => 8855,
'ouml' => 246,
'para' => 182,
'part' => 8706,
'permil' => 8240,
'perp' => 8869,
'phi' => 966,
'pi' => 960,
'piv' => 982,
'plusmn' => 177,
'pound' => 163,
'prime' => 8242,
'prod' => 8719,
'prop' => 8733,
'psi' => 968,
'quot' => 34,
'rArr' => 8658,
'radic' => 8730,
'rang' => 9002,
'raquo' => 187,
'rarr' => 8594,
'rceil' => 8969,
'rdquo' => 8221,
'real' => 8476,
'reg' => 174,
'rfloor' => 8971,
'rho' => 961,
'rlm' => 8207,
'rsaquo' => 8250,
'rsquo' => 8217,
'sbquo' => 8218,
'scaron' => 353,
'sdot' => 8901,
'sect' => 167,
'shy' => 173,
'sigma' => 963,
'sigmaf' => 962,
'sim' => 8764,
'spades' => 9824,
'sub' => 8834,
'sube' => 8838,
'sum' => 8721,
'sup' => 8835,
'sup1' => 185,
'sup2' => 178,
'sup3' => 179,
'supe' => 8839,
'szlig' => 223,
'tau' => 964,
'there4' => 8756,
'theta' => 952,
'thetasym' => 977,
'thinsp' => 8201,
'thorn' => 254,
'tilde' => 732,
'times' => 215,
'trade' => 8482,
'uArr' => 8657,
'uacute' => 250,
'uarr' => 8593,
'ucirc' => 251,
'ugrave' => 249,
'uml' => 168,
'upsih' => 978,
'upsilon' => 965,
'uuml' => 252,
'weierp' => 8472,
'xi' => 958,
'yacute' => 253,
'yen' => 165,
'yuml' => 255,
'zeta' => 950,
'zwj' => 8205,
'zwnj' => 8204,
);
}
?>