String.entityEncode, String.entityDecode and String.regexEncode

So as I do all this nifty client side content generation I found the need for a clean entity encode/decode process. I was pretty shocked I couldn’t find anything to suit my needs so I built one. It builds it’s reverse lookup on load to minimize the total size, then generates the regex to encode/decode. To this end, it also includes String.regexEncode, which encodes special characters in strings, for the purpose of generating a selector.

if(!String.regexEncode){
String.regexChars = ['\\', '&','^', '$', '*', '+', '?', '.', '(', ')', '|', '{', '}', '[', ']'];
String.regexEncodeRegexObject = new RegExp('([\\'+String.regexChars.join('|\\')+'])', 'g');
String.implement({
regexEncode : function(){
return this.replace(String.regexEncodeRegexObject, '\\$1');
}
});
}
if( (!String.entityEncode) && (!String.entityDecode) ){
String.entities = {};
String.entities.byCode = { 38: '&', 60: '<', 62: '>', 160: ' ', 161: '¡', 162: '¢', 163: '£', 164: '¤', 165: '¥', 166: '¦', 167: '§', 168: '¨', 169: '©', 170: 'ª', 171: '«', 172: '¬', 173: '­', 174: '®', 175: '¯', 176: '°', 177: '±', 178: '²', 179: '³', 180: '´', 181: 'µ', 182: '¶', 183: '·', 184: '¸', 185: '¹', 186: 'º', 187: '»', 188: '¼', 189: '½', 190: '¾', 191: '¿', 192: 'À', 193: 'Á', 194: 'Â', 195: 'Ã', 196: 'Ä', 197: 'Å', 198: 'Æ', 199: 'Ç', 200: 'È', 201: 'É', 202: 'Ê', 203: 'Ë', 204: 'Ì', 205: 'Í', 206: 'Î', 207: 'Ï', 208: 'Ð', 209: 'Ñ', 210: 'Ò', 211: 'Ó', 212: 'Ô', 213: 'Õ', 214: 'Ö', 215: '×', 216: 'Ø', 217: 'Ù', 218: 'Ú', 219: 'Û', 220: 'Ü', 221: 'Ý', 222: 'Þ', 223: 'ß', 224: 'à', 225: 'á', 226: 'â', 227: 'ã', 228: 'ä', 229: 'å', 230: 'æ', 231: 'ç', 232: 'è', 233: 'é', 234: 'ê', 235: 'ë', 236: 'ì', 237: 'í', 238: 'î', 239: 'ï', 240: 'ð', 241: 'ñ', 242: 'ò', 243: 'ó', 244: 'ô', 245: 'õ', 246: 'ö', 247: '÷', 248: 'ø', 249: 'ù', 250: 'ú', 251: 'û', 252: 'ü', 253: 'ý', 254: 'þ', 255: 'ÿ', 264: 'Ĉ', 265: 'ĉ', 338: 'Œ', 339: 'œ', 352: 'Š', 353: 'š', 372: 'Ŵ', 373: 'ŵ', 374: 'Ŷ', 375: 'ŷ', 376: 'Ÿ', 402: 'ƒ', 710: 'ˆ', 732: '˜', 913: 'Α', 914: 'Β', 915: 'Γ', 916: 'Δ', 917: 'Ε', 918: 'Ζ', 919: 'Η', 920: 'Θ', 921: 'Ι', 922: 'Κ', 923: 'Λ', 924: 'Μ', 925: 'Ν', 926: 'Ξ', 927: 'Ο', 928: 'Π', 929: 'Ρ', 931: 'Σ', 932: 'Τ', 933: 'Υ', 934: 'Φ', 935: 'Χ', 936: 'Ψ', 937: 'Ω', 945: 'α', 946: 'β', 947: 'γ', 948: 'δ', 949: 'ε', 950: 'ζ', 951: 'η', 952: 'θ', 953: 'ι', 954: 'κ', 955: 'λ', 956: 'μ', 957: 'ν', 958: 'ξ', 959: 'ο', 960: 'π', 961: 'ρ', 962: 'ς', 963: 'σ', 964: 'τ', 965: 'υ', 966: 'φ', 967: 'χ', 968: 'ψ', 969: 'ω', 977: 'ϑ', 978: 'ϒ', 982: 'ϖ', 8194: ' ', 8195: ' ', 8201: ' ', 8204: '‌', 8205: '‍', 8206: '‎', 8207: '‏', 8211: '–', 8212: '—', 8216: '‘', 8217: '’', 8218: '‚', 8220: '“', 8221: '”', 8222: '„', 8224: '†', 8225: '‡', 8226: '•', 8230: '…', 8240: '‰', 8242: '′', 8243: '″', 8249: '‹', 8250: '›', 8254: '‾', 8260: '⁄', 8364: '€', 8472: '℘', 8465: 'ℑ', 8476: 'ℜ', 8482: '™', 8501: 'ℵ', 8592: '←', 8593: '↑', 8594: '→', 8595: '↓', 8596: '↔', 8629: '↵', 8656: '⇐', 8657: '⇑', 8658: '⇒', 8659: '⇓', 8660: '⇔', 8704: '∀', 8706: '∂', 8707: '∃', 8709: '∅', 8711: '∇', 8712: '∈', 8713: '∉', 8715: '∋', 8719: '∏', 8721: '∑', 8722: '−', 8727: '∗', 8729: '∙', 8730: '√', 8733: '∝', 8734: '∞', 8736: '∠', 8743: '∧', 8744: '∨', 8745: '∩', 8746: '∪', 8747: '∫', 8756: '∴', 8764: '∼', 8773: '≅', 8776: '≈', 8800: '≠', 8801: '≡', 8804: '≤', 8805: '≥', 8834: '⊂', 8835: '⊃', 8836: '⊄', 8838: '⊆', 8839: '⊇', 8853: '⊕', 8855: '⊗', 8869: '⊥', 8901: '⋅', 8968: '⌈', 8969: '⌉', 8970: '⌊', 8971: '⌋', 9001: '⟨', 9002: '⟩', 9642: '▪', 9643: '▫', 9674: '◊', 9702: '◦', 9824: '♠', 9827: '♣', 9829: '♥', 9830: '♦' };
String.entities.byName = {};
Object.each(String.entities.byCode, function(entity, code){ String.entities.byName[entity] = code; });
var charSelectorString = '('+Object.keys(String.entities.byCode).map( function(code){ return String.fromCharCode(code).regexEncode() }, this).join('|')+')';
String.entities.charSelector = new RegExp( charSelectorString, 'g' );
var entitySelectorString = '('+Object.keys(String.entities.byName).map(function(entity){ return entity.regexEncode() }, this).join('|')+')';
String.entities.entitySelector = new RegExp( entitySelectorString, 'gi');
String.implement({
entityEncode : function(){
return this.replace(
String.entities.charSelector,
function(str, chr) {
return String.entities.byCode[chr.charCodeAt()];
}
);
},
entityDecode : function(){
return this.replace(
String.entities.entitySelector,
function(str, entity) {
return String.fromCharCode(String.entities.byName[entity]);
}
);
}
});
}

Enjoy,
-abbey

Leave a Reply