blob: 1e5090c9e6856b3b4a2688345e33b244aee63e25 [file] [log] [blame]
aviau039001d2016-09-29 16:39:05 -04001/*
2 * Copyright (c) 2016 SoapBox Innovations Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21*/
22
23'use strict';
24
25;(function (window, linkify) {
26 var linkifyHtml = function (linkify) {
27 'use strict';
28
29 var HTML5NamedCharRefs = {};
30
31 function EntityParser(named) {
32 this.named = named;
33 }
34
35 var HEXCHARCODE = /^#[xX]([A-Fa-f0-9]+)$/;
36 var CHARCODE = /^#([0-9]+)$/;
37 var NAMED = /^([A-Za-z0-9]+)$/;
38
39 EntityParser.prototype.parse = function (entity) {
40 if (!entity) {
41 return;
42 }
43 var matches = entity.match(HEXCHARCODE);
44 if (matches) {
45 return '&#x' + matches[1] + ';';
46 }
47 matches = entity.match(CHARCODE);
48 if (matches) {
49 return '&#' + matches[1] + ';';
50 }
51 matches = entity.match(NAMED);
52 if (matches) {
53 return '&' + matches[1] + ';';
54 }
55 };
56
57 var WSP = /[\t\n\f ]/;
58 var ALPHA = /[A-Za-z]/;
59 var CRLF = /\r\n?/g;
60
61 function isSpace(char) {
62 return WSP.test(char);
63 }
64
65 function isAlpha(char) {
66 return ALPHA.test(char);
67 }
68
69 function preprocessInput(input) {
70 return input.replace(CRLF, "\n");
71 }
72
73 function EventedTokenizer(delegate, entityParser) {
74 this.delegate = delegate;
75 this.entityParser = entityParser;
76
77 this.state = null;
78 this.input = null;
79
80 this.index = -1;
81 this.line = -1;
82 this.column = -1;
83 this.tagLine = -1;
84 this.tagColumn = -1;
85
86 this.reset();
87 }
88
89 EventedTokenizer.prototype = {
90 reset: function reset() {
91 this.state = 'beforeData';
92 this.input = '';
93
94 this.index = 0;
95 this.line = 1;
96 this.column = 0;
97
98 this.tagLine = -1;
99 this.tagColumn = -1;
100
101 this.delegate.reset();
102 },
103
104 tokenize: function tokenize(input) {
105 this.reset();
106 this.tokenizePart(input);
107 this.tokenizeEOF();
108 },
109
110 tokenizePart: function tokenizePart(input) {
111 this.input += preprocessInput(input);
112
113 while (this.index < this.input.length) {
114 this.states[this.state].call(this);
115 }
116 },
117
118 tokenizeEOF: function tokenizeEOF() {
119 this.flushData();
120 },
121
122 flushData: function flushData() {
123 if (this.state === 'data') {
124 this.delegate.finishData();
125 this.state = 'beforeData';
126 }
127 },
128
129 peek: function peek() {
130 return this.input.charAt(this.index);
131 },
132
133 consume: function consume() {
134 var char = this.peek();
135
136 this.index++;
137
138 if (char === "\n") {
139 this.line++;
140 this.column = 0;
141 } else {
142 this.column++;
143 }
144
145 return char;
146 },
147
148 consumeCharRef: function consumeCharRef() {
149 var endIndex = this.input.indexOf(';', this.index);
150 if (endIndex === -1) {
151 return;
152 }
153 var entity = this.input.slice(this.index, endIndex);
154 var chars = this.entityParser.parse(entity);
155 if (chars) {
156 this.index = endIndex + 1;
157 return chars;
158 }
159 },
160
161 markTagStart: function markTagStart() {
162 this.tagLine = this.line;
163 this.tagColumn = this.column;
164 },
165
166 states: {
167 beforeData: function beforeData() {
168 var char = this.peek();
169
170 if (char === "<") {
171 this.state = 'tagOpen';
172 this.markTagStart();
173 this.consume();
174 } else {
175 this.state = 'data';
176 this.delegate.beginData();
177 }
178 },
179
180 data: function data() {
181 var char = this.peek();
182
183 if (char === "<") {
184 this.delegate.finishData();
185 this.state = 'tagOpen';
186 this.markTagStart();
187 this.consume();
188 } else if (char === "&") {
189 this.consume();
190 this.delegate.appendToData(this.consumeCharRef() || "&");
191 } else {
192 this.consume();
193 this.delegate.appendToData(char);
194 }
195 },
196
197 tagOpen: function tagOpen() {
198 var char = this.consume();
199
200 if (char === "!") {
201 this.state = 'markupDeclaration';
202 } else if (char === "/") {
203 this.state = 'endTagOpen';
204 } else if (isAlpha(char)) {
205 this.state = 'tagName';
206 this.delegate.beginStartTag();
207 this.delegate.appendToTagName(char.toLowerCase());
208 }
209 },
210
211 markupDeclaration: function markupDeclaration() {
212 var char = this.consume();
213
214 if (char === "-" && this.input.charAt(this.index) === "-") {
215 this.index++;
216 this.state = 'commentStart';
217 this.delegate.beginComment();
218 }
219 },
220
221 commentStart: function commentStart() {
222 var char = this.consume();
223
224 if (char === "-") {
225 this.state = 'commentStartDash';
226 } else if (char === ">") {
227 this.delegate.finishComment();
228 this.state = 'beforeData';
229 } else {
230 this.delegate.appendToCommentData(char);
231 this.state = 'comment';
232 }
233 },
234
235 commentStartDash: function commentStartDash() {
236 var char = this.consume();
237
238 if (char === "-") {
239 this.state = 'commentEnd';
240 } else if (char === ">") {
241 this.delegate.finishComment();
242 this.state = 'beforeData';
243 } else {
244 this.delegate.appendToCommentData("-");
245 this.state = 'comment';
246 }
247 },
248
249 comment: function comment() {
250 var char = this.consume();
251
252 if (char === "-") {
253 this.state = 'commentEndDash';
254 } else {
255 this.delegate.appendToCommentData(char);
256 }
257 },
258
259 commentEndDash: function commentEndDash() {
260 var char = this.consume();
261
262 if (char === "-") {
263 this.state = 'commentEnd';
264 } else {
265 this.delegate.appendToCommentData("-" + char);
266 this.state = 'comment';
267 }
268 },
269
270 commentEnd: function commentEnd() {
271 var char = this.consume();
272
273 if (char === ">") {
274 this.delegate.finishComment();
275 this.state = 'beforeData';
276 } else {
277 this.delegate.appendToCommentData("--" + char);
278 this.state = 'comment';
279 }
280 },
281
282 tagName: function tagName() {
283 var char = this.consume();
284
285 if (isSpace(char)) {
286 this.state = 'beforeAttributeName';
287 } else if (char === "/") {
288 this.state = 'selfClosingStartTag';
289 } else if (char === ">") {
290 this.delegate.finishTag();
291 this.state = 'beforeData';
292 } else {
293 this.delegate.appendToTagName(char);
294 }
295 },
296
297 beforeAttributeName: function beforeAttributeName() {
298 var char = this.consume();
299
300 if (isSpace(char)) {
301 return;
302 } else if (char === "/") {
303 this.state = 'selfClosingStartTag';
304 } else if (char === ">") {
305 this.delegate.finishTag();
306 this.state = 'beforeData';
307 } else {
308 this.state = 'attributeName';
309 this.delegate.beginAttribute();
310 this.delegate.appendToAttributeName(char);
311 }
312 },
313
314 attributeName: function attributeName() {
315 var char = this.consume();
316
317 if (isSpace(char)) {
318 this.state = 'afterAttributeName';
319 } else if (char === "/") {
320 this.delegate.beginAttributeValue(false);
321 this.delegate.finishAttributeValue();
322 this.state = 'selfClosingStartTag';
323 } else if (char === "=") {
324 this.state = 'beforeAttributeValue';
325 } else if (char === ">") {
326 this.delegate.beginAttributeValue(false);
327 this.delegate.finishAttributeValue();
328 this.delegate.finishTag();
329 this.state = 'beforeData';
330 } else {
331 this.delegate.appendToAttributeName(char);
332 }
333 },
334
335 afterAttributeName: function afterAttributeName() {
336 var char = this.consume();
337
338 if (isSpace(char)) {
339 return;
340 } else if (char === "/") {
341 this.delegate.beginAttributeValue(false);
342 this.delegate.finishAttributeValue();
343 this.state = 'selfClosingStartTag';
344 } else if (char === "=") {
345 this.state = 'beforeAttributeValue';
346 } else if (char === ">") {
347 this.delegate.beginAttributeValue(false);
348 this.delegate.finishAttributeValue();
349 this.delegate.finishTag();
350 this.state = 'beforeData';
351 } else {
352 this.delegate.beginAttributeValue(false);
353 this.delegate.finishAttributeValue();
354 this.state = 'attributeName';
355 this.delegate.beginAttribute();
356 this.delegate.appendToAttributeName(char);
357 }
358 },
359
360 beforeAttributeValue: function beforeAttributeValue() {
361 var char = this.consume();
362
363 if (isSpace(char)) {} else if (char === '"') {
364 this.state = 'attributeValueDoubleQuoted';
365 this.delegate.beginAttributeValue(true);
366 } else if (char === "'") {
367 this.state = 'attributeValueSingleQuoted';
368 this.delegate.beginAttributeValue(true);
369 } else if (char === ">") {
370 this.delegate.beginAttributeValue(false);
371 this.delegate.finishAttributeValue();
372 this.delegate.finishTag();
373 this.state = 'beforeData';
374 } else {
375 this.state = 'attributeValueUnquoted';
376 this.delegate.beginAttributeValue(false);
377 this.delegate.appendToAttributeValue(char);
378 }
379 },
380
381 attributeValueDoubleQuoted: function attributeValueDoubleQuoted() {
382 var char = this.consume();
383
384 if (char === '"') {
385 this.delegate.finishAttributeValue();
386 this.state = 'afterAttributeValueQuoted';
387 } else if (char === "&") {
388 this.delegate.appendToAttributeValue(this.consumeCharRef('"') || "&");
389 } else {
390 this.delegate.appendToAttributeValue(char);
391 }
392 },
393
394 attributeValueSingleQuoted: function attributeValueSingleQuoted() {
395 var char = this.consume();
396
397 if (char === "'") {
398 this.delegate.finishAttributeValue();
399 this.state = 'afterAttributeValueQuoted';
400 } else if (char === "&") {
401 this.delegate.appendToAttributeValue(this.consumeCharRef("'") || "&");
402 } else {
403 this.delegate.appendToAttributeValue(char);
404 }
405 },
406
407 attributeValueUnquoted: function attributeValueUnquoted() {
408 var char = this.consume();
409
410 if (isSpace(char)) {
411 this.delegate.finishAttributeValue();
412 this.state = 'beforeAttributeName';
413 } else if (char === "&") {
414 this.delegate.appendToAttributeValue(this.consumeCharRef(">") || "&");
415 } else if (char === ">") {
416 this.delegate.finishAttributeValue();
417 this.delegate.finishTag();
418 this.state = 'beforeData';
419 } else {
420 this.delegate.appendToAttributeValue(char);
421 }
422 },
423
424 afterAttributeValueQuoted: function afterAttributeValueQuoted() {
425 var char = this.peek();
426
427 if (isSpace(char)) {
428 this.consume();
429 this.state = 'beforeAttributeName';
430 } else if (char === "/") {
431 this.consume();
432 this.state = 'selfClosingStartTag';
433 } else if (char === ">") {
434 this.consume();
435 this.delegate.finishTag();
436 this.state = 'beforeData';
437 } else {
438 this.state = 'beforeAttributeName';
439 }
440 },
441
442 selfClosingStartTag: function selfClosingStartTag() {
443 var char = this.peek();
444
445 if (char === ">") {
446 this.consume();
447 this.delegate.markTagAsSelfClosing();
448 this.delegate.finishTag();
449 this.state = 'beforeData';
450 } else {
451 this.state = 'beforeAttributeName';
452 }
453 },
454
455 endTagOpen: function endTagOpen() {
456 var char = this.consume();
457
458 if (isAlpha(char)) {
459 this.state = 'tagName';
460 this.delegate.beginEndTag();
461 this.delegate.appendToTagName(char.toLowerCase());
462 }
463 }
464 }
465 };
466
467 function Tokenizer(entityParser, options) {
468 this.token = null;
469 this.startLine = 1;
470 this.startColumn = 0;
471 this.options = options || {};
472 this.tokenizer = new EventedTokenizer(this, entityParser);
473 }
474
475 Tokenizer.prototype = {
476 tokenize: function tokenize(input) {
477 this.tokens = [];
478 this.tokenizer.tokenize(input);
479 return this.tokens;
480 },
481
482 tokenizePart: function tokenizePart(input) {
483 this.tokens = [];
484 this.tokenizer.tokenizePart(input);
485 return this.tokens;
486 },
487
488 tokenizeEOF: function tokenizeEOF() {
489 this.tokens = [];
490 this.tokenizer.tokenizeEOF();
491 return this.tokens[0];
492 },
493
494 reset: function reset() {
495 this.token = null;
496 this.startLine = 1;
497 this.startColumn = 0;
498 },
499
500 addLocInfo: function addLocInfo() {
501 if (this.options.loc) {
502 this.token.loc = {
503 start: {
504 line: this.startLine,
505 column: this.startColumn
506 },
507 end: {
508 line: this.tokenizer.line,
509 column: this.tokenizer.column
510 }
511 };
512 }
513 this.startLine = this.tokenizer.line;
514 this.startColumn = this.tokenizer.column;
515 },
516
517 // Data
518
519 beginData: function beginData() {
520 this.token = {
521 type: 'Chars',
522 chars: ''
523 };
524 this.tokens.push(this.token);
525 },
526
527 appendToData: function appendToData(char) {
528 this.token.chars += char;
529 },
530
531 finishData: function finishData() {
532 this.addLocInfo();
533 },
534
535 // Comment
536
537 beginComment: function beginComment() {
538 this.token = {
539 type: 'Comment',
540 chars: ''
541 };
542 this.tokens.push(this.token);
543 },
544
545 appendToCommentData: function appendToCommentData(char) {
546 this.token.chars += char;
547 },
548
549 finishComment: function finishComment() {
550 this.addLocInfo();
551 },
552
553 // Tags - basic
554
555 beginStartTag: function beginStartTag() {
556 this.token = {
557 type: 'StartTag',
558 tagName: '',
559 attributes: [],
560 selfClosing: false
561 };
562 this.tokens.push(this.token);
563 },
564
565 beginEndTag: function beginEndTag() {
566 this.token = {
567 type: 'EndTag',
568 tagName: ''
569 };
570 this.tokens.push(this.token);
571 },
572
573 finishTag: function finishTag() {
574 this.addLocInfo();
575 },
576
577 markTagAsSelfClosing: function markTagAsSelfClosing() {
578 this.token.selfClosing = true;
579 },
580
581 // Tags - name
582
583 appendToTagName: function appendToTagName(char) {
584 this.token.tagName += char;
585 },
586
587 // Tags - attributes
588
589 beginAttribute: function beginAttribute() {
590 this._currentAttribute = ["", "", null];
591 this.token.attributes.push(this._currentAttribute);
592 },
593
594 appendToAttributeName: function appendToAttributeName(char) {
595 this._currentAttribute[0] += char;
596 },
597
598 beginAttributeValue: function beginAttributeValue(isQuoted) {
599 this._currentAttribute[2] = isQuoted;
600 },
601
602 appendToAttributeValue: function appendToAttributeValue(char) {
603 this._currentAttribute[1] = this._currentAttribute[1] || "";
604 this._currentAttribute[1] += char;
605 },
606
607 finishAttributeValue: function finishAttributeValue() {}
608 };
609
610 function tokenize(input, options) {
611 var tokenizer = new Tokenizer(new EntityParser(HTML5NamedCharRefs), options);
612 return tokenizer.tokenize(input);
613 }
614
615 var HTML5Tokenizer = {
616 HTML5NamedCharRefs: HTML5NamedCharRefs,
617 EntityParser: EntityParser,
618 EventedTokenizer: EventedTokenizer,
619 Tokenizer: Tokenizer,
620 tokenize: tokenize
621 };
622
623 var options = linkify.options;
624 var Options = options.Options;
625
626
627 var StartTag = 'StartTag';
628 var EndTag = 'EndTag';
629 var Chars = 'Chars';
630 var Comment = 'Comment';
631
632 /**
633 `tokens` and `token` in this section refer to tokens generated by the HTML
634 parser.
635 */
636 function linkifyHtml(str) {
637 var opts = arguments.length <= 1 || arguments[1] === undefined ? {} : arguments[1];
638
639 var tokens = HTML5Tokenizer.tokenize(str);
640 var linkifiedTokens = [];
641 var linkified = [];
642 var i;
643
644 opts = new Options(opts);
645
646 // Linkify the tokens given by the parser
647 for (i = 0; i < tokens.length; i++) {
648 var token = tokens[i];
649
650 if (token.type === StartTag) {
651 linkifiedTokens.push(token);
652
653 // Ignore all the contents of ignored tags
654 var tagName = token.tagName.toUpperCase();
655 var isIgnored = tagName === 'A' || options.contains(opts.ignoreTags, tagName);
656 if (!isIgnored) {
657 continue;
658 }
659
660 var preskipLen = linkifiedTokens.length;
661 skipTagTokens(tagName, tokens, ++i, linkifiedTokens);
662 i += linkifiedTokens.length - preskipLen - 1;
663 continue;
664 } else if (token.type !== Chars) {
665 // Skip this token, it's not important
666 linkifiedTokens.push(token);
667 continue;
668 }
669
670 // Valid text token, linkify it!
671 var linkifedChars = linkifyChars(token.chars, opts);
672 linkifiedTokens.push.apply(linkifiedTokens, linkifedChars);
673 }
674
675 // Convert the tokens back into a string
676 for (i = 0; i < linkifiedTokens.length; i++) {
677 var _token = linkifiedTokens[i];
678 switch (_token.type) {
679 case StartTag:
680 var link = '<' + _token.tagName;
681 if (_token.attributes.length > 0) {
682 var attrs = attrsToStrings(_token.attributes);
683 link += ' ' + attrs.join(' ');
684 }
685 link += '>';
686 linkified.push(link);
687 break;
688 case EndTag:
689 linkified.push('</' + _token.tagName + '>');
690 break;
691 case Chars:
692 linkified.push(escapeText(_token.chars));
693 break;
694 case Comment:
695 linkified.push('<!--' + escapeText(_token.chars) + '-->');
696 break;
697 }
698 }
699
700 return linkified.join('');
701 }
702
703 /**
704 `tokens` and `token` in this section referes to tokens returned by
705 `linkify.tokenize`. `linkified` will contain HTML Parser-style tokens
706 */
707 function linkifyChars(str, opts) {
708 var tokens = linkify.tokenize(str);
709 var result = [];
710
711 for (var i = 0; i < tokens.length; i++) {
712 var token = tokens[i];
713
714 if (token.type === 'nl' && opts.nl2br) {
715 result.push({
716 type: StartTag,
717 tagName: 'br',
718 attributes: [],
719 selfClosing: true
720 });
721 continue;
722 } else if (!token.isLink || !opts.check(token)) {
723 result.push({ type: Chars, chars: token.toString() });
724 continue;
725 }
726
727 var _opts$resolve = opts.resolve(token);
728
729 var href = _opts$resolve.href;
730 var formatted = _opts$resolve.formatted;
731 var formattedHref = _opts$resolve.formattedHref;
732 var tagName = _opts$resolve.tagName;
733 var className = _opts$resolve.className;
734 var target = _opts$resolve.target;
735 var attributes = _opts$resolve.attributes;
736
737 // Build up attributes
738
739 var attributeArray = [['href', formattedHref]];
740
741 if (className) {
742 attributeArray.push(['class', className]);
743 }
744
745 if (target) {
746 attributeArray.push(['target', target]);
747 }
748
749 for (var attr in attributes) {
750 attributeArray.push([attr, attributes[attr]]);
751 }
752
753 // Add the required tokens
754 result.push({
755 type: StartTag,
756 tagName: tagName,
757 attributes: attributeArray,
758 selfClosing: false
759 });
760 result.push({ type: Chars, chars: formatted });
761 result.push({ type: EndTag, tagName: tagName });
762 }
763
764 return result;
765 }
766
767 /**
768 Returns a list of tokens skipped until the closing tag of tagName.
769
770 * `tagName` is the closing tag which will prompt us to stop skipping
771 * `tokens` is the array of tokens generated by HTML5Tokenizer which
772 * `i` is the index immediately after the opening tag to skip
773 * `skippedTokens` is an array which skipped tokens are being pushed into
774
775 Caveats
776
777 * Assumes that i is the first token after the given opening tagName
778 * The closing tag will be skipped, but nothing after it
779 * Will track whether there is a nested tag of the same type
780 */
781 function skipTagTokens(tagName, tokens, i, skippedTokens) {
782
783 // number of tokens of this type on the [fictional] stack
784 var stackCount = 1;
785
786 while (i < tokens.length && stackCount > 0) {
787 var token = tokens[i];
788 if (token.type === StartTag && token.tagName.toUpperCase() === tagName) {
789 // Nested tag of the same type, "add to stack"
790 stackCount++;
791 } else if (token.type === EndTag && token.tagName.toUpperCase() === tagName) {
792 // Closing tag
793 stackCount--;
794 }
795 skippedTokens.push(token);
796 i++;
797 }
798
799 // Note that if stackCount > 0 here, the HTML is probably invalid
800 return skippedTokens;
801 }
802
803 function escapeText(text) {
804 // Not required, HTML tokenizer ensures this occurs properly
805 return text;
806 }
807
808 function escapeAttr(attr) {
809 return attr.replace(/"/g, '&quot;');
810 }
811
812 function attrsToStrings(attrs) {
813 var attrStrs = [];
814 for (var i = 0; i < attrs.length; i++) {
815 var _attrs$i = attrs[i];
816 var name = _attrs$i[0];
817 var value = _attrs$i[1];
818
819 attrStrs.push(name + '="' + escapeAttr(value) + '"');
820 }
821 return attrStrs;
822 }
823
824 return linkifyHtml;
825 }(linkify);
826 window.linkifyHtml = linkifyHtml;
827})(window, linkify);