2 * @license AngularJS v1.0.0
3 * (c) 2010-2012 Google, Inc. http://angularjs.org
6 (function(window, angular, undefined) {
16 * HTML Parser By Misko Hevery (misko@hevery.com)
17 * based on: HTML Parser By John Resig (ejohn.org)
18 * Original code by Erik Arvidsson, Mozilla Public License
19 * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
22 * htmlParser(htmlString, {
23 * start: function(tag, attrs, unary) {},
24 * end: function(tag) {},
25 * chars: function(text) {},
26 * comment: function(text) {}
34 * @name ngSanitize.$sanitize
38 * The input is sanitized by parsing the html into tokens. All safe tokens (from a whitelist) are
39 * then serialized back to properly escaped html string. This means that no unsafe input can make
40 * it into the returned string, however, since our parser is more strict than a typical browser
41 * parser, it's possible that some obscure input, which would be recognized as valid HTML by a
42 * browser, won't make it through the sanitizer.
44 * @param {string} html Html input.
45 * @returns {string} Sanitized html.
48 <doc:example module="ngSanitize">
51 function Ctrl($scope) {
53 '<p style="color:blue">an html\n' +
54 '<em onmouseover="this.textContent=\'PWN3D!\'">click here</em>\n' +
58 <div ng-controller="Ctrl">
59 Snippet: <textarea ng-model="snippet" cols="60" rows="3"></textarea>
69 <pre><div ng-bind-html="snippet"><br/></div></pre>
72 <div ng-bind-html="snippet"></div>
75 <tr id="escaped-html">
77 <td><pre><div ng-bind="snippet"><br/></div></pre></td>
78 <td><div ng-bind="snippet"></div></td>
80 <tr id="html-unsafe-filter">
81 <td>unsafe html filter</td>
82 <td><pre><div ng-bind-html-unsafe="snippet"><br/></div></pre></td>
83 <td><div ng-bind-html-unsafe="snippet"></div></td>
89 it('should sanitize the html snippet ', function() {
90 expect(using('#html-filter').element('div').html()).
91 toBe('<p>an html\n<em>click here</em>\nsnippet</p>');
94 it('should escape snippet without any filter', function() {
95 expect(using('#escaped-html').element('div').html()).
96 toBe("<p style=\"color:blue\">an html\n" +
97 "<em onmouseover=\"this.textContent='PWN3D!'\">click here</em>\n" +
101 it('should inline raw snippet if filtered as unsafe', function() {
102 expect(using('#html-unsafe-filter').element("div").html()).
103 toBe("<p style=\"color:blue\">an html\n" +
104 "<em onmouseover=\"this.textContent='PWN3D!'\">click here</em>\n" +
108 it('should update', function() {
109 input('snippet').enter('new <b>text</b>');
110 expect(using('#html-filter').binding('snippet')).toBe('new <b>text</b>');
111 expect(using('#escaped-html').element('div').html()).toBe("new <b>text</b>");
112 expect(using('#html-unsafe-filter').binding("snippet")).toBe('new <b>text</b>');
117 var $sanitize = function(html) {
119 htmlParser(html, htmlSanitizeWriter(buf));
124 // Regular Expressions for parsing tags and attributes
125 var START_TAG_REGEXP = /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/,
126 END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/,
127 ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
128 BEGIN_TAG_REGEXP = /^</,
129 BEGING_END_TAGE_REGEXP = /^<\s*\//,
130 COMMENT_REGEXP = /<!--(.*?)-->/g,
131 CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
132 URI_REGEXP = /^((ftp|https?):\/\/|mailto:|#)/,
133 NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character)
136 // Good source of info about elements and attributes
137 // http://dev.w3.org/html5/spec/Overview.html#semantics
138 // http://simon.html5.org/html-elements
140 // Safe Void Elements - HTML5
141 // http://dev.w3.org/html5/spec/Overview.html#void-elements
142 var voidElements = makeMap("area,br,col,hr,img,wbr");
144 // Elements that you can, intentionally, leave open (and which close themselves)
145 // http://dev.w3.org/html5/spec/Overview.html#optional-tags
146 var optionalEndTagBlockElements = makeMap("colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr"),
147 optionalEndTagInlineElements = makeMap("rp,rt"),
148 optionalEndTagElements = angular.extend({}, optionalEndTagInlineElements, optionalEndTagBlockElements);
150 // Safe Block Elements - HTML5
151 var blockElements = angular.extend({}, optionalEndTagBlockElements, makeMap("address,article,aside," +
152 "blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,h6," +
153 "header,hgroup,hr,ins,map,menu,nav,ol,pre,script,section,table,ul"));
155 // Inline Elements - HTML5
156 var inlineElements = angular.extend({}, optionalEndTagInlineElements, makeMap("a,abbr,acronym,b,bdi,bdo," +
157 "big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,samp,small," +
158 "span,strike,strong,sub,sup,time,tt,u,var"));
161 // Special Elements (can contain anything)
162 var specialElements = makeMap("script,style");
164 var validElements = angular.extend({}, voidElements, blockElements, inlineElements, optionalEndTagElements);
166 //Attributes that have href and hence need to be sanitized
167 var uriAttrs = makeMap("background,cite,href,longdesc,src,usemap");
168 var validAttrs = angular.extend({}, uriAttrs, makeMap(
169 'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+
170 'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,'+
171 'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,'+
172 'scope,scrolling,shape,span,start,summary,target,title,type,'+
173 'valign,value,vspace,width'));
175 function makeMap(str) {
176 var obj = {}, items = str.split(','), i;
177 for (i = 0; i < items.length; i++) obj[items[i]] = true;
184 * htmlParser(htmlString, {
185 * start: function(tag, attrs, unary) {},
186 * end: function(tag) {},
187 * chars: function(text) {},
188 * comment: function(text) {}
191 * @param {string} html string
192 * @param {object} handler
194 function htmlParser( html, handler ) {
195 var index, chars, match, stack = [], last = html;
196 stack.last = function() { return stack[ stack.length - 1 ]; };
201 // Make sure we're not in a script or style element
202 if ( !stack.last() || !specialElements[ stack.last() ] ) {
205 if ( html.indexOf("<!--") === 0 ) {
206 index = html.indexOf("-->");
209 if (handler.comment) handler.comment( html.substring( 4, index ) );
210 html = html.substring( index + 3 );
215 } else if ( BEGING_END_TAGE_REGEXP.test(html) ) {
216 match = html.match( END_TAG_REGEXP );
219 html = html.substring( match[0].length );
220 match[0].replace( END_TAG_REGEXP, parseEndTag );
225 } else if ( BEGIN_TAG_REGEXP.test(html) ) {
226 match = html.match( START_TAG_REGEXP );
229 html = html.substring( match[0].length );
230 match[0].replace( START_TAG_REGEXP, parseStartTag );
236 index = html.indexOf("<");
238 var text = index < 0 ? html : html.substring( 0, index );
239 html = index < 0 ? "" : html.substring( index );
241 if (handler.chars) handler.chars( decodeEntities(text) );
245 html = html.replace(new RegExp("(.*)<\\s*\\/\\s*" + stack.last() + "[^>]*>", 'i'), function(all, text){
247 replace(COMMENT_REGEXP, "$1").
248 replace(CDATA_REGEXP, "$1");
250 if (handler.chars) handler.chars( decodeEntities(text) );
255 parseEndTag( "", stack.last() );
258 if ( html == last ) {
259 throw "Parse Error: " + html;
264 // Clean up any remaining tags
267 function parseStartTag( tag, tagName, rest, unary ) {
268 tagName = angular.lowercase(tagName);
269 if ( blockElements[ tagName ] ) {
270 while ( stack.last() && inlineElements[ stack.last() ] ) {
271 parseEndTag( "", stack.last() );
275 if ( optionalEndTagElements[ tagName ] && stack.last() == tagName ) {
276 parseEndTag( "", tagName );
279 unary = voidElements[ tagName ] || !!unary;
282 stack.push( tagName );
286 rest.replace(ATTR_REGEXP, function(match, name, doubleQuotedValue, singleQoutedValue, unqoutedValue) {
287 var value = doubleQuotedValue
292 attrs[name] = decodeEntities(value);
294 if (handler.start) handler.start( tagName, attrs, unary );
297 function parseEndTag( tag, tagName ) {
299 tagName = angular.lowercase(tagName);
301 // Find the closest opened tag of the same type
302 for ( pos = stack.length - 1; pos >= 0; pos-- )
303 if ( stack[ pos ] == tagName )
307 // Close all the open elements, up the stack
308 for ( i = stack.length - 1; i >= pos; i-- )
309 if (handler.end) handler.end( stack[ i ] );
311 // Remove the open elements from the stack
318 * decodes all entities into regular string
320 * @returns {string} A string with decoded entities.
322 var hiddenPre=document.createElement("pre");
323 function decodeEntities(value) {
324 hiddenPre.innerHTML=value.replace(/</g,"<");
325 return hiddenPre.innerText || hiddenPre.textContent || '';
329 * Escapes all potentially dangerous characters, so that the
330 * resulting string can be safely inserted into attribute or
333 * @returns escaped text
335 function encodeEntities(value) {
337 replace(/&/g, '&').
338 replace(NON_ALPHANUMERIC_REGEXP, function(value){
339 return '&#' + value.charCodeAt(0) + ';';
341 replace(/</g, '<').
342 replace(/>/g, '>');
346 * create an HTML/XML writer which writes to buffer
347 * @param {Array} buf use buf.jain('') to get out sanitized html string
348 * @returns {object} in the form of {
349 * start: function(tag, attrs, unary) {},
350 * end: function(tag) {},
351 * chars: function(text) {},
352 * comment: function(text) {}
355 function htmlSanitizeWriter(buf){
357 var out = angular.bind(buf, buf.push);
359 start: function(tag, attrs, unary){
360 tag = angular.lowercase(tag);
361 if (!ignore && specialElements[tag]) {
364 if (!ignore && validElements[tag] == true) {
367 angular.forEach(attrs, function(value, key){
368 var lkey=angular.lowercase(key);
369 if (validAttrs[lkey]==true && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) {
373 out(encodeEntities(value));
377 out(unary ? '/>' : '>');
381 tag = angular.lowercase(tag);
382 if (!ignore && validElements[tag] == true) {
391 chars: function(chars){
393 out(encodeEntities(chars));
400 // define ngSanitize module and register $sanitize service
401 angular.module('ngSanitize', []).value('$sanitize', $sanitize);
405 * @name ngSanitize.directive:ngBindHtml
408 * Creates a binding that will sanitize the result of evaluating the `expression` with the
409 * {@link ngSanitize.$sanitize $sanitize} service and innerHTML the result into the current element.
411 * See {@link ngSanitize.$sanitize $sanitize} docs for examples.
414 * @param {expression} ngBindHtml {@link guide/expression Expression} to evaluate.
416 angular.module('ngSanitize').directive('ngBindHtml', ['$sanitize', function($sanitize) {
417 return function(scope, element, attr) {
418 element.addClass('ng-binding').data('$binding', attr.ngBindHtml);
419 scope.$watch(attr.ngBindHtml, function(value) {
420 value = $sanitize(value);
421 element.html(value || '');
427 * @name ngSanitize.filter:linky
431 * Finds links in text input and turns them into html links. Supports http/https/ftp/mailto and
432 * plain email address links.
434 * @param {string} text Input text.
435 * @returns {string} Html-linkified text.
438 <doc:example module="ngSanitize">
441 function Ctrl($scope) {
443 'Pretty text with some links:\n'+
444 'http://angularjs.org/,\n'+
445 'mailto:us@somewhere.org,\n'+
446 'another@somewhere.org,\n'+
447 'and one more: ftp://127.0.0.1/.';
450 <div ng-controller="Ctrl">
451 Snippet: <textarea ng-model="snippet" cols="60" rows="3"></textarea>
458 <tr id="linky-filter">
459 <td>linky filter</td>
461 <pre><div ng-bind-html="snippet | linky"><br></div></pre>
464 <div ng-bind-html="snippet | linky"></div>
467 <tr id="escaped-html">
469 <td><pre><div ng-bind="snippet"><br></div></pre></td>
470 <td><div ng-bind="snippet"></div></td>
475 it('should linkify the snippet with urls', function() {
476 expect(using('#linky-filter').binding('snippet | linky')).
477 toBe('Pretty text with some links: ' +
478 '<a href="http://angularjs.org/">http://angularjs.org/</a>, ' +
479 '<a href="mailto:us@somewhere.org">us@somewhere.org</a>, ' +
480 '<a href="mailto:another@somewhere.org">another@somewhere.org</a>, ' +
481 'and one more: <a href="ftp://127.0.0.1/">ftp://127.0.0.1/</a>.');
484 it ('should not linkify snippet without the linky filter', function() {
485 expect(using('#escaped-html').binding('snippet')).
486 toBe("Pretty text with some links:\n" +
487 "http://angularjs.org/,\n" +
488 "mailto:us@somewhere.org,\n" +
489 "another@somewhere.org,\n" +
490 "and one more: ftp://127.0.0.1/.");
493 it('should update', function() {
494 input('snippet').enter('new http://link.');
495 expect(using('#linky-filter').binding('snippet | linky')).
496 toBe('new <a href="http://link">http://link</a>.');
497 expect(using('#escaped-html').binding('snippet')).toBe('new http://link.');
502 angular.module('ngSanitize').filter('linky', function() {
503 var LINKY_URL_REGEXP = /((ftp|https?):\/\/|(mailto:)?[A-Za-z0-9._%+-]+@)\S*[^\s\.\;\,\(\)\{\}\<\>]/,
504 MAILTO_REGEXP = /^mailto:/;
506 return function(text) {
507 if (!text) return text;
511 // TODO(vojta): use $sanitize instead
512 var writer = htmlSanitizeWriter(html);
515 while ((match = raw.match(LINKY_URL_REGEXP))) {
516 // We can not end in these as they are sometimes found at the end of the sentence
518 // if we did not match ftp/http/mailto then assume mailto
519 if (match[2] == match[3]) url = 'mailto:' + url;
521 writer.chars(raw.substr(0, i));
522 writer.start('a', {href:url});
523 writer.chars(match[0].replace(MAILTO_REGEXP, ''));
525 raw = raw.substring(i + match[0].length);
528 return html.join('');
532 })(window, window.angular);