package { /** * @author Rob Ruchte rob@thirdpartylabs.com * * A static class for encoding and decoding XML and HTML character entities. * Based on an AS2 class from Jim Cheng jim.cheng@effectiveui.com * */ public class CharacterEntity { private static var instance : CharacterEntity; private static var allowInstantiation : Boolean = false; private static var initialized : Boolean = false; private static var entityMap : Object = new Object(); private static var xmlEntities : Object = new Object(); private static var xhtmlEntities : Object = new Object(); private static var entities : Object = new Object(); private static var _temp = initialize(); /** * Constructor * Only valid if called from getInstance() */ public function CharacterEntity() : void { if (!allowInstantiation) { throw new Error("Error: Instantiation failed: Use CharacterEntity.getInstance() instead of new."); } } /** * Returns an instance of this class, creating it if necessary, * provides Singleton behavior */ public static function getInstance() : CharacterEntity { if (instance == null) { allowInstantiation = true; instance = new CharacterEntity(); allowInstantiation = false; } return instance; } /** * Decodes character entities in a given string to UTF-8 Unicode equivalents. * * @param The character entities encoded text to decode. * @param Whether to only use the XML character entity set or * the full 252 character HTML entity set. * * @usage decode('Mý string', true); */ public static function decode(text : String, xmlOnly : Boolean = false) : String { if (xmlOnly) { return decodeEntityWithTable(text, xmlEntities) } else { return decodeEntityWithTable(text, entities) } } /** * Encodes a Unicode string, transforming characters with character entity * equivalents from their Unicode bytecode to the character entity representation. * * @param The Unicode text (e.g. Actionscript string) to encode. * @param Whether to only use the XML character entity set or * the full 252 character HTML entity set. * * @usage encode(' Unicode string', true); */ public static function encode(text : String, xmlOnly : Boolean = false) : String { if (xmlOnly) { return encodeEntityWithTable(text, xmlEntities) } else { return encodeEntityWithTable(text, entities) } } public static function decodeXHTML(text : String, decodeAmpersands:Boolean=false) : String { if (typeof text != 'string') { return text; } //undo flash's ampersand RE-encoding? if (decodeAmpersands) { text = text.replace(new RegExp("&", "\g"), "&"); } return decodeEntityWithTable(text, xhtmlEntities) } public static function encodeXHTML(text : String) : String { return encodeEntityWithTable(text, xhtmlEntities) } public static function decodeEntityWithTable(text:String, translationTable:Object):String { if (typeof text != 'string') { return text; } var i : String; if (text.indexOf('&') == -1) { return text; } for (i in translationTable) { text = text.replace(new RegExp(i, "\g"), translationTable[i]); } text = decodeNumericReferences(text); return text; } public static function encodeEntityWithTable(text:String, translationTable:Object):String { if (typeof text != 'string') { return text; } var i : String; if (text.indexOf('&') == -1) { return text; } var i : String; for (i in translationTable) { text = text.replace(new RegExp(translationTable[i], "\g"), i); } return text; } /** * Decodes numeric entity references in a given string to UTF-8 Unicode equivalents. * * @param The string to decode. * * @usage decodeNumericReferences('Ý'); */ public static function decodeNumericReferences(text : String) : String { if (typeof text != 'string') { return text; } var output : String = ''; var lastPosition : Number = 0; var cursor : Number = 0; var semi : Number; var code : String; cursor = text.indexOf('&#', 0); while (cursor != -1) { semi = text.indexOf(';', cursor); code = text.substring(cursor + 2, semi); output += text.substring(lastPosition, cursor); if (code.charAt(0) == 'x') { output += String.fromCharCode(parseInt('0' + code, 16)); } else { output += String.fromCharCode(parseInt(code, 10)); } lastPosition = semi + 1; cursor = text.indexOf('&#', lastPosition); } output += text.substr(lastPosition); return output; } /** * Initialization method -- do not call, it is invoked on class initialzation. */ private static function initialize() { if (initialized) return; var myInstance : CharacterEntity = getInstance(); // Specified by XML 1.0 entityMap['quot'] = 0x0022; // " quotation mark entityMap['amp'] = 0x0026; // & ampersand entityMap['apos'] = 0x0027; // ' apostrophe entityMap['lt'] = 0x003C; // < less-than sign entityMap['gt'] = 0x003E; // > greater-than sign // Specified by HTML and XHTML DTDs entityMap['nbsp'] = 0x00A0; // no-break space entityMap['iexcl'] = 0x00A1; // ¡ inverted exclamation mark entityMap['cent'] = 0x00A2; // ¢ cent sign entityMap['pound'] = 0x00A3; // £ pound sign entityMap['curren'] = 0x00A4; // ¤ currency sign entityMap['yen'] = 0x00A5; // ¥ yen sign entityMap['brvbar'] = 0x00A6; // ¦ broken bar entityMap['sect'] = 0x00A7; // § section sign entityMap['uml'] = 0x00A8; // ¨ diaeresis entityMap['copy'] = 0x00A9; // © copyright sign entityMap['ordf'] = 0x00AA; // ª feminine ordinal indicator entityMap['laquo'] = 0x00AB; // « left-pointing double angle quotation mark entityMap['not'] = 0x00AC; // ¬ not sign entityMap['shy'] = 0x00AD; // ­ soft hyphen entityMap['reg'] = 0x00AE; // ® registered sign entityMap['macr'] = 0x00AF; // ¯ macron entityMap['deg'] = 0x00B0; // ° degree sign entityMap['plusmn'] = 0x00B1; // ± plus-minus sign entityMap['sup2'] = 0x00B2; // ² superscript two entityMap['sup3'] = 0x00B3; // ³ superscript three entityMap['acute'] = 0x00B4; // ´ acute accent entityMap['micro'] = 0x00B5; // µ micro sign entityMap['para'] = 0x00B6; // ¶ pilcrow sign entityMap['middot'] = 0x00B7; // · middle dot entityMap['cedil'] = 0x00B8; // ¸ cedilla entityMap['sup1'] = 0x00B9; // ¹ superscript one entityMap['ordm'] = 0x00BA; // º masculine ordinal indicator entityMap['raquo'] = 0x00BB; // » right-pointing double angle quotation mark entityMap['frac14'] = 0x00BC; // ¼ vulgar fraction one quarter entityMap['frac12'] = 0x00BD; // ½ vulgar fraction one half entityMap['frac34'] = 0x00BE; // ¾ vulgar fraction three quarters entityMap['iquest'] = 0x00BF; // ¿ inverted question mark entityMap['Agrave'] = 0x00C0; // À latin capital letter a with grave entityMap['Aacute'] = 0x00C1; // Á latin capital letter a with acute entityMap['Acirc'] = 0x00C2; //  latin capital letter a with circumflex entityMap['Atilde'] = 0x00C3; // à latin capital letter a with tilde entityMap['Auml'] = 0x00C4; // Ä latin capital letter a with diaeresis entityMap['Aring'] = 0x00C5; // Å latin capital letter a with ring above entityMap['AElig'] = 0x00C6; // Æ latin capital letter ae entityMap['Ccedil'] = 0x00C7; // Ç latin capital letter c with cedilla entityMap['Egrave'] = 0x00C8; // È latin capital letter e with grave entityMap['Eacute'] = 0x00C9; // É latin capital letter e with acute entityMap['Ecirc'] = 0x00CA; // Ê latin capital letter e with circumflex entityMap['Euml'] = 0x00CB; // Ë latin capital letter e with diaeresis entityMap['Igrave'] = 0x00CC; // Ì latin capital letter i with grave entityMap['Iacute'] = 0x00CD; // Í latin capital letter i with acute entityMap['Icirc'] = 0x00CE; // Î latin capital letter i with circumflex entityMap['Iuml'] = 0x00CF; // Ï latin capital letter i with diaeresis entityMap['ETH'] = 0x00D0; // Ð latin capital letter eth entityMap['Ntilde'] = 0x00D1; // Ñ latin capital letter n with tilde entityMap['Ograve'] = 0x00D2; // Ò latin capital letter o with grave entityMap['Oacute'] = 0x00D3; // Ó latin capital letter o with acute entityMap['Ocirc'] = 0x00D4; // Ô latin capital letter o with circumflex entityMap['Otilde'] = 0x00D5; // Õ latin capital letter o with tilde entityMap['Ouml'] = 0x00D6; // Ö latin capital letter o with diaeresis entityMap['times'] = 0x00D7; // × multiplication sign entityMap['Oslash'] = 0x00D8; // Ø latin capital letter o with stroke entityMap['Ugrave'] = 0x00D9; // Ù latin capital letter u with grave entityMap['Uacute'] = 0x00DA; // Ú latin capital letter u with acute entityMap['Ucirc'] = 0x00DB; // Û latin capital letter u with circumflex entityMap['Uuml'] = 0x00DC; // Ü latin capital letter u with diaeresis entityMap['Yacute'] = 0x00DD; // Ý latin capital letter y with acute entityMap['THORN'] = 0x00DE; // Þ latin capital letter thorn entityMap['szlig'] = 0x00DF; // ß latin capital letter sharp s entityMap['agrave'] = 0x00E0; // à latin small letter a with grave entityMap['aacute'] = 0x00E1; // á latin small letter a with acute entityMap['acirc'] = 0x00E2; // â latin small letter a with circumflex entityMap['atilde'] = 0x00E3; // ã latin small letter a with tilde entityMap['auml'] = 0x00E4; // ä latin small letter a with diaeresis entityMap['aring'] = 0x00E5; // å latin small letter a with ring entityMap['aelig'] = 0x00E6; // æ latin small letter ae entityMap['ccedil'] = 0x00E7; // ç latin small letter c with cedilla entityMap['egrave'] = 0x00E8; // è latin small letter e with grave entityMap['eacute'] = 0x00E9; // é latin small letter e with acute entityMap['ecirc'] = 0x00EA; // ê latin small letter e with circumflex entityMap['euml'] = 0x00EB; // ë latin small letter e with diaeresis entityMap['igrave'] = 0x00EC; // ì latin small letter i with grave entityMap['iacute'] = 0x00ED; // í latin small letter i with acute entityMap['icirc'] = 0x00EE; // î latin small letter i with circumflex entityMap['iuml'] = 0x00EF; // ï latin small letter i with diaeresis entityMap['eth'] = 0x00F0; // ð latin small letter eth entityMap['ntilde'] = 0x00F1; // ñ latin small letter n with tilde entityMap['ograve'] = 0x00F2; // ò latin small letter o with grave entityMap['oacute'] = 0x00F3; // ó latin small letter o with acute entityMap['ocirc'] = 0x00F4; // ô latin small letter o with circumflex entityMap['otilde'] = 0x00F5; // õ latin small letter o with tilde entityMap['ouml'] = 0x00F6; // ö latin small letter o with diaeresis entityMap['divide'] = 0x00F7; // ÷ division sign entityMap['oslash'] = 0x00F8; // ø latin small letter o with stroke entityMap['ugrave'] = 0x00F9; // ù latin small letter u with grave entityMap['uacute'] = 0x00FA; // ú latin small letter u with acute entityMap['ucirc'] = 0x00FB; // û latin small letter u with circumflex entityMap['uuml'] = 0x00FC; // ü latin small letter u with diaeresis entityMap['yacute'] = 0x00FD; // ý latin small letter y with acute entityMap['thorn'] = 0x00FE; // þ latin small letter thorn entityMap['yuml'] = 0x00FF; // ÿ latin small letter y with diaeresis entityMap['OElig'] = 0x0152; // Œ latin capital ligature oe entityMap['oelig'] = 0x0153; // œ latin small ligature oe entityMap['Scaron'] = 0x0160; // Š latin capital letter s with caron entityMap['scaron'] = 0x0161; // š latin small letter s with caron entityMap['Yuml'] = 0x0178; // Ÿ latin cpital letter y with diaeresis entityMap['fnof'] = 0x0192; // ƒ latin small letter f with hook entityMap['circ'] = 0x02C6; // ˆ modifier letter circumflex accent entityMap['tilde'] = 0x02DC; // ˜ small tilde entityMap['Alpha'] = 0x0391; // Α greek capital letter alpha entityMap['Beta'] = 0x0392; // Β greek capital letter beta entityMap['Gamma'] = 0x0393; // Γ greek capital letter gamma entityMap['Delta'] = 0x0394; // Δ greek capital letter delta entityMap['Epsilon'] = 0x0395; // Ε greek capital letter epsilon entityMap['Zeta'] = 0x0396; // Ζ greek capital letter zeta entityMap['Eta'] = 0x0397; // Η greek capital letter eta entityMap['Theta'] = 0x0398; // Θ greek capital letter theta entityMap['Iota'] = 0x0399; // Ι greek capital letter iota entityMap['Kappa'] = 0x039A; // Κ greek capital letter kappa entityMap['Lambda'] = 0x039B; // Λ greek capital letter lambda entityMap['Mu'] = 0x039C; // Μ greek capital letter mu entityMap['Nu'] = 0x039D; // Ν greek capital letter nu entityMap['Xi'] = 0x039E; // Ξ greek capital letter xi entityMap['Omicron'] = 0x039F; // Ο greek capital letter omicron entityMap['Pi'] = 0x03A0; // Π greek capital letter pi entityMap['Rho'] = 0x03A1; // Ρ greek capital letter rho entityMap['Sigma'] = 0x03A3; // Σ greek capital letter sigma entityMap['Tau'] = 0x03A4; // Τ greek capital letter tau entityMap['Upsilon'] = 0x03A5; // Υ greek capital letter upsilon entityMap['Phi'] = 0x03A6; // Φ greek capital letter phi entityMap['Chi'] = 0x03A7; // Χ greek capital letter chi entityMap['Psi'] = 0x03A8; // Ψ greek capital letter psi entityMap['Omega'] = 0x03A9; // Ω greek capital letter omega entityMap['alpha'] = 0x03B1; // α greek small letter alpha entityMap['beta'] = 0x03B2; // β greek small letter beta entityMap['gamma'] = 0x03B3; // γ greek small letter gamma entityMap['delta'] = 0x03B4; // δ greek small letter delta entityMap['epsilon'] = 0x03B5; // ε greek small letter epsilon entityMap['zeta'] = 0x03B6; // ζ greek small letter zeta entityMap['eta'] = 0x03B7; // η greek small letter eta entityMap['theta'] = 0x03B8; // θ greek small letter theta entityMap['iota'] = 0x03B9; // ι greek small letter iota entityMap['kappa'] = 0x03BA; // κ greek small letter kappa entityMap['lambda'] = 0x03BB; // λ greek small letter lambda entityMap['mu'] = 0x03BC; // μ greek small letter mu entityMap['nu'] = 0x03BD; // ν greek small letter nu entityMap['xi'] = 0x03BE; // ξ greek small letter xi entityMap['omicron'] = 0x03BF; // ο greek small letter omicron entityMap['pi'] = 0x03C0; // π greek small letter pi entityMap['rho'] = 0x03C1; // ρ greek small letter rho entityMap['sigmaf'] = 0x03C2; // ς greek small letter final sigma entityMap['sigma'] = 0x03C3; // σ greek small letter sigma entityMap['tau'] = 0x03C4; // τ greek small letter tau entityMap['upsilon'] = 0x03C5; // υ greek small letter upsilon entityMap['phi'] = 0x03C6; // φ greek small letter phi entityMap['chi'] = 0x03C7; // χ greek small letter chi entityMap['psi'] = 0x03C8; // ψ greek small letter psi entityMap['omega'] = 0x03C9; // ω greek small symbol omega entityMap['thetasym'] = 0x03D1; // ϑ greek theta symbol entityMap['upsih'] = 0x03D2; // ϒ greek upsilon with hook symbol entityMap['piv'] = 0x03D6; // ϖ greek pi symbol entityMap['ensp'] = 0x2002; //   en space entityMap['emsp'] = 0x2003; //   em space entityMap['thinsp'] = 0x2009; //   thin space entityMap['zwnj'] = 0x200C; // zero width non-joiner entityMap['zwj'] = 0x200D; // zero width joiner entityMap['lrm'] = 0x200E; // left-to-right mark entityMap['rlm'] = 0x200F; // right-to-left mark entityMap['ndash'] = 0x2013; // – en dash entityMap['mdash'] = 0x2014; // — em dash entityMap['lsquo'] = 0x2018; // ‘ left single quotation mark entityMap['rsquo'] = 0x2019; // ’ right single quotation mark entityMap['sbquo'] = 0x201A; // ‚ single low-9 quotation mark entityMap['ldquo'] = 0x201C; // “ left double quotation mark entityMap['rdquo'] = 0x201D; // ” right double quotation mark entityMap['bdquo'] = 0x201E; // „ double low-9 quotation mark entityMap['dagger'] = 0x2020; // † dagger entityMap['Dagger'] = 0x2021; // ‡ double dagger entityMap['bull'] = 0x2022; // • bullet entityMap['hellip'] = 0x2026; // … horizontal ellipsis entityMap['permil'] = 0x2030; // ‰ per mille sign entityMap['prime'] = 0x2032; // ′ prime entityMap['Prime'] = 0x2033; // ″ double prime entityMap['lsaquo'] = 0x2039; // ‹ single left-pointing angle quotation mark entityMap['rsaquo'] = 0x203A; // › single right-pointing angle quotation mark entityMap['oline'] = 0x203E; // ‾ overline entityMap['frasl'] = 0x2044; // ⁄ fraction slash entityMap['euro'] = 0x20AC; // € euro sign entityMap['image'] = 0x2111; // ℑ black-letter capital i entityMap['weierp'] = 0x2118; // ℘ script capital p entityMap['real'] = 0x211C; // ℜ black-letter capital r entityMap['trade'] = 0x2122; // ™ trade mark sign entityMap['alefsym'] = 0x2135; // ℵ alef symbol entityMap['larr'] = 0x2190; // ← leftwards arrow entityMap['uarr'] = 0x2191; // ↑ upwards arrow entityMap['rarr'] = 0x2192; // → rightwards arrow entityMap['darr'] = 0x2193; // ↓ downwards arrow entityMap['harr'] = 0x2194; // ↔ left right arrow entityMap['crarr'] = 0x21B5; // ↵ downwards arrow with corner leftwards entityMap['lArr'] = 0x21D0; // ⇐ leftwards double arrow entityMap['uArr'] = 0x21D1; // ⇑ upwards double arrow entityMap['rArr'] = 0x21D2; // ⇒ rightwards double arrow entityMap['dArr'] = 0x21D3; // ⇓ downwards double arrow entityMap['hArr'] = 0x21D4; // ⇔⇔ left right double arrow entityMap['forall'] = 0x2200; // ∀ for all entityMap['part'] = 0x2202; // ∂ partial differential entityMap['exist'] = 0x2203; // ∃ there exists entityMap['empty'] = 0x2205; // ∅ empty set entityMap['nabla'] = 0x2207; // ∇ nabla entityMap['isin'] = 0x2208; // ∈ element of entityMap['notin'] = 0x2209; // ∉ not an element of entityMap['ni'] = 0x220B; // ∋ contains as member entityMap['prod'] = 0x220F; // ∏ n-ary product entityMap['sum'] = 0x2211; // ∑ n-ary summation entityMap['minus'] = 0x2212; // − minus sign entityMap['lowast'] = 0x2217; // ∗ asterisk operator entityMap['radic'] = 0x221A; // √ square root entityMap['prop'] = 0x221D; // ∝ proportional to entityMap['infin'] = 0x221E; // ∞ infinity entityMap['ang'] = 0x2220; // ∠ angle entityMap['and'] = 0x2227; // ∧ logical and entityMap['or'] = 0x2228; // ∨ logical or entityMap['cap'] = 0x2229; // ∩ intersection entityMap['cup'] = 0x222A; // ∪ union entityMap['int'] = 0x222B; // ∫ integral entityMap['there4'] = 0x2234; // ∴ therefore entityMap['sim'] = 0x223C; // ∼ tilde operator entityMap['cong'] = 0x2245; // ≅ congruent to entityMap['asymp'] = 0x2248; // ≈ almost equal to entityMap['ne'] = 0x2260; // ≠ not equal to entityMap['equiv'] = 0x2261; // ≡ identical to entityMap['le'] = 0x2264; // ≤ less-than or equal to entityMap['ge'] = 0x2265; // ≥ greater-than or equal to entityMap['sub'] = 0x2282; // ⊂ subset of entityMap['sup'] = 0x2283; // ⊃ superset of entityMap['nsub'] = 0x2284; // ⊄ not a subset of entityMap['sube'] = 0x2286; // ⊆ subset of or equal to entityMap['supe'] = 0x2287; // ⊇ superset of or equal to entityMap['oplus'] = 0x2295; // ⊕ circled plus entityMap['otimes'] = 0x2297; // ⊗ circled times entityMap['perp'] = 0x22A5; // ⊥ up tack entityMap['sdot'] = 0x22C5; // ⋅ dot operator entityMap['lceil'] = 0x2308; // ⌈ left ceiling entityMap['rceil'] = 0x2309; // ⌉ right ceiling entityMap['lfloor'] = 0x230A; // ⌊ left floor entityMap['rfloor'] = 0x230B; // ⌋ right floor entityMap['lang'] = 0x2329; // ⌋ left-pointing angle bracket entityMap['rang'] = 0x232A; // 〉⌋ right-pointing angle bracket entityMap['loz'] = 0x25CA; // ◊ lozenge entityMap['spades'] = 0x2660; // ♠ black spade suit entityMap['clubs'] = 0x2663; // ♣ black club suit entityMap['hearts'] = 0x2665; // ♥ black heart suit entityMap['diams'] = 0x2666; // ♦ black diamond suit entityMap['not'] = 0x00AC; // ¬ not sign entityMap['and'] = 0x2227; // ∧ logical and entityMap['or'] = 0x2228; // ∨ logical or entityMap['ne'] = 0x2260; // ≠ not equal to entityMap['le'] = 0x2264; // ≤ less-than or equal to entityMap['ge'] = 0x2265; // ≥ greater-than or equal to var i : String; var str : String; xmlEntities = new Object(); xmlEntities['"'] = String.fromCharCode(entityMap['quot']); xmlEntities['&'] = String.fromCharCode(entityMap['amp']); xmlEntities['''] = String.fromCharCode(entityMap['apos']); xmlEntities['<'] = String.fromCharCode(entityMap['lt']); xmlEntities['>'] = String.fromCharCode(entityMap['gt']); for (i in entityMap) { str = '&' + i + ';'; entities[str] = String.fromCharCode(entityMap[i]); if (!xmlEntities[str]) { xhtmlEntities[str] = String.fromCharCode(entityMap[i]); } } initialized = true; } } }