blob: 8da2f895295fdc7222e93a614a51532457aab6e5 [file] [log] [blame]
Andreas Traczyk43c08232018-10-31 13:42:09 -04001/*
2 * Copyright (c) 2016 SoapBox Innovations Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21*/
22
23;(function () {
24'use strict';
25
26var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol ? "symbol" : typeof obj; };
27
28(function (exports) {
29 'use strict';
30
31 function inherits(parent, child) {
32 var props = arguments.length <= 2 || arguments[2] === undefined ? {} : arguments[2];
33
34 var extended = Object.create(parent.prototype);
35 for (var p in props) {
36 extended[p] = props[p];
37 }
38 extended.constructor = child;
39 child.prototype = extended;
40 return child;
41 }
42
43 var defaults = {
44 defaultProtocol: 'http',
45 events: null,
46 format: noop,
47 formatHref: noop,
48 nl2br: false,
49 tagName: 'a',
50 target: typeToTarget,
51 validate: true,
52 ignoreTags: [],
53 attributes: null,
54 className: 'linkified' };
55
56 function Options(opts) {
57 opts = opts || {};
58
59 this.defaultProtocol = opts.defaultProtocol || defaults.defaultProtocol;
60 this.events = opts.events || defaults.events;
61 this.format = opts.format || defaults.format;
62 this.formatHref = opts.formatHref || defaults.formatHref;
63 this.nl2br = opts.nl2br || defaults.nl2br;
64 this.tagName = opts.tagName || defaults.tagName;
65 this.target = opts.target || defaults.target;
66 this.validate = opts.validate || defaults.validate;
67 this.ignoreTags = [];
68
69 // linkAttributes and linkClass is deprecated
70 this.attributes = opts.attributes || opts.linkAttributes || defaults.attributes;
71 this.className = opts.className || opts.linkClass || defaults.className;
72
73 // Make all tags names upper case
74
75 var ignoredTags = opts.ignoreTags || defaults.ignoreTags;
76 for (var i = 0; i < ignoredTags.length; i++) {
77 this.ignoreTags.push(ignoredTags[i].toUpperCase());
78 }
79 }
80
81 Options.prototype = {
82 /**
83 * Given the token, return all options for how it should be displayed
84 */
85 resolve: function resolve(token) {
86 var href = token.toHref(this.defaultProtocol);
87 return {
88 formatted: this.get('format', token.toString(), token),
89 formattedHref: this.get('formatHref', href, token),
90 tagName: this.get('tagName', href, token),
91 className: this.get('className', href, token),
92 target: this.get('target', href, token),
93 events: this.getObject('events', href, token),
94 attributes: this.getObject('attributes', href, token)
95 };
96 },
97
98
99 /**
100 * Returns true or false based on whether a token should be displayed as a
101 * link based on the user options. By default,
102 */
103 check: function check(token) {
104 return this.get('validate', token.toString(), token);
105 },
106
107
108 // Private methods
109
110 /**
111 * Resolve an option's value based on the value of the option and the given
112 * params.
113 * @param [String] key Name of option to use
114 * @param operator will be passed to the target option if it's method
115 * @param [MultiToken] token The token from linkify.tokenize
116 */
117 get: function get(key, operator, token) {
118 var option = this[key];
119
120 if (!option) {
121 return option;
122 }
123
124 switch (typeof option === 'undefined' ? 'undefined' : _typeof(option)) {
125 case 'function':
126 return option(operator, token.type);
127 case 'object':
128 var optionValue = option[token.type] || defaults[key];
129 return typeof optionValue === 'function' ? optionValue(operator, token.type) : optionValue;
130 }
131
132 return option;
133 },
134 getObject: function getObject(key, operator, token) {
135 var option = this[key];
136 return typeof option === 'function' ? option(operator, token.type) : option;
137 }
138 };
139
140 /**
141 * Quick indexOf replacement for checking the ignoreTags option
142 */
143 function contains(arr, value) {
144 for (var i = 0; i < arr.length; i++) {
145 if (arr[i] === value) {
146 return true;
147 }
148 }
149 return false;
150 }
151
152 function noop(val) {
153 return val;
154 }
155
156 function typeToTarget(href, type) {
157 return type === 'url' ? '_blank' : null;
158 }
159
160 var options = Object.freeze({
161 defaults: defaults,
162 Options: Options,
163 contains: contains
164 });
165
166 function createStateClass() {
167 return function (tClass) {
168 this.j = [];
169 this.T = tClass || null;
170 };
171 }
172
173 /**
174 A simple state machine that can emit token classes
175
176 The `j` property in this class refers to state jumps. It's a
177 multidimensional array where for each element:
178
179 * index [0] is a symbol or class of symbols to transition to.
180 * index [1] is a State instance which matches
181
182 The type of symbol will depend on the target implementation for this class.
183 In Linkify, we have a two-stage scanner. Each stage uses this state machine
184 but with a slighly different (polymorphic) implementation.
185
186 The `T` property refers to the token class.
187
188 TODO: Can the `on` and `next` methods be combined?
189
190 @class BaseState
191 */
192 var BaseState = createStateClass();
193 BaseState.prototype = {
194 defaultTransition: false,
195
196 /**
197 @method constructor
198 @param {Class} tClass Pass in the kind of token to emit if there are
199 no jumps after this state and the state is accepting.
200 */
201
202 /**
203 On the given symbol(s), this machine should go to the given state
204 @method on
205 @param {Array|Mixed} symbol
206 @param {BaseState} state Note that the type of this state should be the
207 same as the current instance (i.e., don't pass in a different
208 subclass)
209 */
210 on: function on(symbol, state) {
211 if (symbol instanceof Array) {
212 for (var i = 0; i < symbol.length; i++) {
213 this.j.push([symbol[i], state]);
214 }
215 return this;
216 }
217 this.j.push([symbol, state]);
218 return this;
219 },
220
221
222 /**
223 Given the next item, returns next state for that item
224 @method next
225 @param {Mixed} item Should be an instance of the symbols handled by
226 this particular machine.
227 @return {State} state Returns false if no jumps are available
228 */
229 next: function next(item) {
230 for (var i = 0; i < this.j.length; i++) {
231 var jump = this.j[i];
232 var symbol = jump[0]; // Next item to check for
233 var state = jump[1]; // State to jump to if items match
234
235 // compare item with symbol
236 if (this.test(item, symbol)) {
237 return state;
238 }
239 }
240
241 // Nowhere left to jump!
242 return this.defaultTransition;
243 },
244
245
246 /**
247 Does this state accept?
248 `true` only of `this.T` exists
249 @method accepts
250 @return {Boolean}
251 */
252 accepts: function accepts() {
253 return !!this.T;
254 },
255
256
257 /**
258 Determine whether a given item "symbolizes" the symbol, where symbol is
259 a class of items handled by this state machine.
260 This method should be overriden in extended classes.
261 @method test
262 @param {Mixed} item Does this item match the given symbol?
263 @param {Mixed} symbol
264 @return {Boolean}
265 */
266 test: function test(item, symbol) {
267 return item === symbol;
268 },
269
270
271 /**
272 Emit the token for this State (just return it in this case)
273 If this emits a token, this instance is an accepting state
274 @method emit
275 @return {Class} T
276 */
277 emit: function emit() {
278 return this.T;
279 }
280 };
281
282 /**
283 State machine for string-based input
284
285 @class CharacterState
286 @extends BaseState
287 */
288 var CharacterState = inherits(BaseState, createStateClass(), {
289 /**
290 Does the given character match the given character or regular
291 expression?
292 @method test
293 @param {String} char
294 @param {String|RegExp} charOrRegExp
295 @return {Boolean}
296 */
297 test: function test(character, charOrRegExp) {
298 return character === charOrRegExp || charOrRegExp instanceof RegExp && charOrRegExp.test(character);
299 }
300 });
301
302 /**
303 State machine for input in the form of TextTokens
304
305 @class TokenState
306 @extends BaseState
307 */
308 var State = inherits(BaseState, createStateClass(), {
309
310 /**
311 * Similar to `on`, but returns the state the results in the transition from
312 * the given item
313 * @method jump
314 * @param {Mixed} item
315 * @param {Token} [token]
316 * @return state
317 */
318 jump: function jump(token) {
319 var tClass = arguments.length <= 1 || arguments[1] === undefined ? null : arguments[1];
320
321 var state = this.next(new token('')); // dummy temp token
322 if (state === this.defaultTransition) {
323 // Make a new state!
324 state = new this.constructor(tClass);
325 this.on(token, state);
326 } else if (tClass) {
327 state.T = tClass;
328 }
329 return state;
330 },
331
332
333 /**
334 Is the given token an instance of the given token class?
335 @method test
336 @param {TextToken} token
337 @param {Class} tokenClass
338 @return {Boolean}
339 */
340 test: function test(token, tokenClass) {
341 return token instanceof tokenClass;
342 }
343 });
344
345 /**
346 Given a non-empty target string, generates states (if required) for each
347 consecutive substring of characters in str starting from the beginning of
348 the string. The final state will have a special value, as specified in
349 options. All other "in between" substrings will have a default end state.
350
351 This turns the state machine into a Trie-like data structure (rather than a
352 intelligently-designed DFA).
353
354 Note that I haven't really tried these with any strings other than
355 DOMAIN.
356
357 @param {String} str
358 @param {CharacterState} start State to jump from the first character
359 @param {Class} endToken Token class to emit when the given string has been
360 matched and no more jumps exist.
361 @param {Class} defaultToken "Filler token", or which token type to emit when
362 we don't have a full match
363 @return {Array} list of newly-created states
364 */
365 function stateify(str, start, endToken, defaultToken) {
366 var i = 0,
367 len = str.length,
368 state = start,
369 newStates = [],
370 nextState = void 0;
371
372 // Find the next state without a jump to the next character
373 while (i < len && (nextState = state.next(str[i]))) {
374 state = nextState;
375 i++;
376 }
377
378 if (i >= len) {
379 return [];
380 } // no new tokens were added
381
382 while (i < len - 1) {
383 nextState = new CharacterState(defaultToken);
384 newStates.push(nextState);
385 state.on(str[i], nextState);
386 state = nextState;
387 i++;
388 }
389
390 nextState = new CharacterState(endToken);
391 newStates.push(nextState);
392 state.on(str[len - 1], nextState);
393
394 return newStates;
395 }
396
397 function createTokenClass() {
398 return function (value) {
399 if (value) {
400 this.v = value;
401 }
402 };
403 }
404
405 /******************************************************************************
406 Text Tokens
407 Tokens composed of strings
408 ******************************************************************************/
409
410 /**
411 Abstract class used for manufacturing text tokens.
412 Pass in the value this token represents
413
414 @class TextToken
415 @abstract
416 */
417 var TextToken = createTokenClass();
418 TextToken.prototype = {
419 toString: function toString() {
420 return this.v + '';
421 }
422 };
423
424 function inheritsToken(value) {
425 var props = value ? { v: value } : {};
426 return inherits(TextToken, createTokenClass(), props);
427 }
428
429 /**
430 A valid domain token
431 @class DOMAIN
432 @extends TextToken
433 */
434 var DOMAIN = inheritsToken();
435
436 /**
437 @class AT
438 @extends TextToken
439 */
440 var AT = inheritsToken('@');
441
442 /**
443 Represents a single colon `:` character
444
445 @class COLON
446 @extends TextToken
447 */
448 var COLON = inheritsToken(':');
449
450 /**
451 @class DOT
452 @extends TextToken
453 */
454 var DOT = inheritsToken('.');
455
456 /**
457 A character class that can surround the URL, but which the URL cannot begin
458 or end with. Does not include certain English punctuation like parentheses.
459
460 @class PUNCTUATION
461 @extends TextToken
462 */
463 var PUNCTUATION = inheritsToken();
464
465 /**
466 The word localhost (by itself)
467 @class LOCALHOST
468 @extends TextToken
469 */
470 var LOCALHOST = inheritsToken();
471
472 /**
473 Newline token
474 @class NL
475 @extends TextToken
476 */
477 var TNL = inheritsToken('\n');
478
479 /**
480 @class NUM
481 @extends TextToken
482 */
483 var NUM = inheritsToken();
484
485 /**
486 @class PLUS
487 @extends TextToken
488 */
489 var PLUS = inheritsToken('+');
490
491 /**
492 @class POUND
493 @extends TextToken
494 */
495 var POUND = inheritsToken('#');
496
497 /**
498 Represents a web URL protocol. Supported types include
499
500 * `http:`
501 * `https:`
502 * `ftp:`
503 * `ftps:`
504 * There's Another super weird one
505
506 @class PROTOCOL
507 @extends TextToken
508 */
509 var PROTOCOL = inheritsToken();
510
511 /**
512 @class QUERY
513 @extends TextToken
514 */
515 var QUERY = inheritsToken('?');
516
517 /**
518 @class SLASH
519 @extends TextToken
520 */
521 var SLASH = inheritsToken('/');
522
523 /**
524 @class UNDERSCORE
525 @extends TextToken
526 */
527 var UNDERSCORE = inheritsToken('_');
528
529 /**
530 One ore more non-whitespace symbol.
531 @class SYM
532 @extends TextToken
533 */
534 var SYM = inheritsToken();
535
536 /**
537 @class TLD
538 @extends TextToken
539 */
540 var TLD = inheritsToken();
541
542 /**
543 Represents a string of consecutive whitespace characters
544
545 @class WS
546 @extends TextToken
547 */
548 var WS = inheritsToken();
549
550 /**
551 Opening/closing bracket classes
552 */
553
554 var OPENBRACE = inheritsToken('{');
555 var OPENBRACKET = inheritsToken('[');
556 var OPENANGLEBRACKET = inheritsToken('<');
557 var OPENPAREN = inheritsToken('(');
558 var CLOSEBRACE = inheritsToken('}');
559 var CLOSEBRACKET = inheritsToken(']');
560 var CLOSEANGLEBRACKET = inheritsToken('>');
561 var CLOSEPAREN = inheritsToken(')');
562
563 var TOKENS = Object.freeze({
564 Base: TextToken,
565 DOMAIN: DOMAIN,
566 AT: AT,
567 COLON: COLON,
568 DOT: DOT,
569 PUNCTUATION: PUNCTUATION,
570 LOCALHOST: LOCALHOST,
571 NL: TNL,
572 NUM: NUM,
573 PLUS: PLUS,
574 POUND: POUND,
575 QUERY: QUERY,
576 PROTOCOL: PROTOCOL,
577 SLASH: SLASH,
578 UNDERSCORE: UNDERSCORE,
579 SYM: SYM,
580 TLD: TLD,
581 WS: WS,
582 OPENBRACE: OPENBRACE,
583 OPENBRACKET: OPENBRACKET,
584 OPENANGLEBRACKET: OPENANGLEBRACKET,
585 OPENPAREN: OPENPAREN,
586 CLOSEBRACE: CLOSEBRACE,
587 CLOSEBRACKET: CLOSEBRACKET,
588 CLOSEANGLEBRACKET: CLOSEANGLEBRACKET,
589 CLOSEPAREN: CLOSEPAREN
590 });
591
592 /**
593 The scanner provides an interface that takes a string of text as input, and
594 outputs an array of tokens instances that can be used for easy URL parsing.
595
596 @module linkify
597 @submodule scanner
598 @main scanner
599 */
600
601 var tlds = 'aaa|aarp|abb|abbott|abogado|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|af|afl|ag|agency|ai|aig|airforce|airtel|al|alibaba|alipay|allfinanz|alsace|am|amica|amsterdam|an|analytics|android|ao|apartments|app|apple|aq|aquarelle|ar|aramco|archi|army|arpa|arte|as|asia|associates|at|attorney|au|auction|audi|audio|author|auto|autos|avianca|aw|ax|axa|az|azure|ba|baidu|band|bank|bar|barcelona|barclaycard|barclays|bargains|bauhaus|bayern|bb|bbc|bbva|bcg|bcn|bd|be|beats|beer|bentley|berlin|best|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|black|blackfriday|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bom|bond|boo|book|boots|bosch|bostik|bot|boutique|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|camera|camp|cancerresearch|canon|capetown|capital|car|caravan|cards|care|career|careers|cars|cartier|casa|cash|casino|cat|catering|cba|cbn|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|chase|chat|cheap|chloe|christmas|chrome|church|ci|cipriani|circle|cisco|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|date|dating|datsun|day|dclk|de|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|diamonds|diet|digital|direct|directory|discount|dj|dk|dm|dnp|do|docs|dog|doha|domains|download|drive|dubai|durban|dvag|dz|earth|eat|ec|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|enterprises|epson|equipment|er|erni|es|esq|estate|et|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|fage|fail|fairwinds|faith|family|fan|fans|farm|fashion|fast|feedback|ferrero|fi|film|final|finance|financial|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|florist|flowers|flsmidth|fly|fm|fo|foo|football|ford|forex|forsale|forum|foundation|fox|fr|fresenius|frl|frogans|frontier|fund|furniture|futbol|fyi|ga|gal|gallery|gallup|game|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|gold|goldpoint|golf|goo|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|group|gs|gt|gu|gucci|guge|guide|guitars|guru|gw|gy|hamburg|hangout|haus|hdfcbank|health|healthcare|help|helsinki|here|hermes|hiphop|hitachi|hiv|hk|hm|hn|hockey|holdings|holiday|homedepot|homes|honda|horse|host|hosting|hoteles|hotmail|house|how|hr|hsbc|ht|hu|hyundai|ibm|icbc|ice|icu|id|ie|ifm|iinet|il|im|immo|immobilien|in|industries|infiniti|info|ing|ink|institute|insurance|insure|int|international|investments|io|ipiranga|iq|ir|irish|is|iselect|ist|istanbul|it|itau|iwc|jaguar|java|jcb|je|jetzt|jewelry|jlc|jll|jm|jmp|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kitchen|kiwi|km|kn|koeln|komatsu|kp|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|lamborghini|lamer|lancaster|land|landrover|lanxess|lasalle|lat|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|legal|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|limited|limo|lincoln|linde|link|live|living|lixil|lk|loan|loans|local|locus|lol|london|lotte|lotto|love|lr|ls|lt|ltd|ltda|lu|lupin|luxe|luxury|lv|ly|ma|madrid|maif|maison|makeup|man|management|mango|market|marketing|markets|marriott|mba|mc|md|me|med|media|meet|melbourne|meme|memorial|men|menu|meo|mg|mh|miami|microsoft|mil|mini|mk|ml|mm|mma|mn|mo|mobi|mobily|moda|moe|moi|mom|monash|money|montblanc|mormon|mortgage|moscow|motorcycles|mov|movie|movistar|mp|mq|mr|ms|mt|mtn|mtpc|mtr|mu|museum|mutuelle|mv|mw|mx|my|mz|na|nadex|nagoya|name|natura|navy|nc|ne|nec|net|netbank|network|neustar|new|news|nexus|nf|ng|ngo|nhk|ni|nico|nikon|ninja|nissan|nl|no|nokia|norton|nowruz|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|office|okinawa|om|omega|one|ong|onl|online|ooo|oracle|orange|org|organic|origins|osaka|otsuka|ovh|pa|page|pamperedchef|panerai|paris|pars|partners|parts|party|passagens|pe|pet|pf|pg|ph|pharmacy|philips|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pohl|poker|porn|post|pr|praxi|press|pro|prod|productions|prof|promo|properties|property|protection|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|racing|re|read|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|ricoh|rio|rip|ro|rocher|rocks|rodeo|room|rs|rsvp|ru|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|sas|saxo|sb|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scor|scot|sd|se|seat|security|seek|select|sener|services|seven|sew|sex|sexy|sfr|sg|sh|sharp|shell|shia|shiksha|shoes|show|shriram|si|singles|site|sj|sk|ski|skin|sky|skype|sl|sm|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|spot|spreadbetting|sr|srl|st|stada|star|starhub|statefarm|statoil|stc|stcgroup|stockholm|storage|store|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|taobao|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|team|tech|technology|tel|telecity|telefonica|temasek|tennis|tf|tg|th|thd|theater|theatre|tickets|tienda|tiffany|tips|tires|tirol|tj|tk|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tp|tr|trade|trading|training|travel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubs|ug|uk|unicom|university|uno|uol|us|uy|uz|va|vacations|vana|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|viking|villas|vin|vip|virgin|vision|vista|vistaprint|viva|vlaanderen|vn|vodka|volkswagen|vote|voting|voto|voyage|vu|vuelos|wales|walter|wang|wanggou|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|wme|wolterskluwer|work|works|world|ws|wtc|wtf|xbox|xerox|xin|xperia|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|youtube|yt|za|zara|zero|zip|zm|zone|zuerich|zw'.split('|'); // macro, see gulpfile.js
602
603 var NUMBERS = '0123456789'.split('');
604 var ALPHANUM = '0123456789abcdefghijklmnopqrstuvwxyz'.split('');
605 var WHITESPACE = [' ', '\f', '\r', '\t', '\v', ' ', '?', '?']; // excluding line breaks
606
607 var domainStates = []; // states that jump to DOMAIN on /[a-z0-9]/
608 var makeState = function makeState(tokenClass) {
609 return new CharacterState(tokenClass);
610 };
611
612 // Frequently used states
613 var S_START = makeState();
614 var S_NUM = makeState(NUM);
615 var S_DOMAIN = makeState(DOMAIN);
616 var S_DOMAIN_HYPHEN = makeState(); // domain followed by 1 or more hyphen characters
617 var S_WS = makeState(WS);
618
619 // States for special URL symbols
620 S_START.on('@', makeState(AT)).on('.', makeState(DOT)).on('+', makeState(PLUS)).on('#', makeState(POUND)).on('?', makeState(QUERY)).on('/', makeState(SLASH)).on('_', makeState(UNDERSCORE)).on(':', makeState(COLON)).on('{', makeState(OPENBRACE)).on('[', makeState(OPENBRACKET)).on('<', makeState(OPENANGLEBRACKET)).on('(', makeState(OPENPAREN)).on('}', makeState(CLOSEBRACE)).on(']', makeState(CLOSEBRACKET)).on('>', makeState(CLOSEANGLEBRACKET)).on(')', makeState(CLOSEPAREN)).on([',', ';', '!', '"', '\''], makeState(PUNCTUATION));
621
622 // Whitespace jumps
623 // Tokens of only non-newline whitespace are arbitrarily long
624 S_START.on('\n', makeState(TNL)).on(WHITESPACE, S_WS);
625
626 // If any whitespace except newline, more whitespace!
627 S_WS.on(WHITESPACE, S_WS);
628
629 // Generates states for top-level domains
630 // Note that this is most accurate when tlds are in alphabetical order
631 for (var i = 0; i < tlds.length; i++) {
632 var newStates = stateify(tlds[i], S_START, TLD, DOMAIN);
633 domainStates.push.apply(domainStates, newStates);
634 }
635
636 // Collect the states generated by different protocls
637 var partialProtocolFileStates = stateify('file', S_START, DOMAIN, DOMAIN);
638 var partialProtocolFtpStates = stateify('ftp', S_START, DOMAIN, DOMAIN);
639 var partialProtocolHttpStates = stateify('http', S_START, DOMAIN, DOMAIN);
640
641 // Add the states to the array of DOMAINeric states
642 domainStates.push.apply(domainStates, partialProtocolFileStates);
643 domainStates.push.apply(domainStates, partialProtocolFtpStates);
644 domainStates.push.apply(domainStates, partialProtocolHttpStates);
645
646 // Protocol states
647 var S_PROTOCOL_FILE = partialProtocolFileStates.pop();
648 var S_PROTOCOL_FTP = partialProtocolFtpStates.pop();
649 var S_PROTOCOL_HTTP = partialProtocolHttpStates.pop();
650 var S_PROTOCOL_SECURE = makeState(DOMAIN);
651 var S_FULL_PROTOCOL = makeState(PROTOCOL); // Full protocol ends with COLON
652
653 // Secure protocols (end with 's')
654 S_PROTOCOL_FTP.on('s', S_PROTOCOL_SECURE).on(':', S_FULL_PROTOCOL);
655
656 S_PROTOCOL_HTTP.on('s', S_PROTOCOL_SECURE).on(':', S_FULL_PROTOCOL);
657
658 domainStates.push(S_PROTOCOL_SECURE);
659
660 // Become protocol tokens after a COLON
661 S_PROTOCOL_FILE.on(':', S_FULL_PROTOCOL);
662 S_PROTOCOL_SECURE.on(':', S_FULL_PROTOCOL);
663
664 // Localhost
665 var partialLocalhostStates = stateify('localhost', S_START, LOCALHOST, DOMAIN);
666 domainStates.push.apply(domainStates, partialLocalhostStates);
667
668 // Everything else
669 // DOMAINs make more DOMAINs
670 // Number and character transitions
671 S_START.on(NUMBERS, S_NUM);
672 S_NUM.on('-', S_DOMAIN_HYPHEN).on(NUMBERS, S_NUM).on(ALPHANUM, S_DOMAIN); // number becomes DOMAIN
673
674 S_DOMAIN.on('-', S_DOMAIN_HYPHEN).on(ALPHANUM, S_DOMAIN);
675
676 // All the generated states should have a jump to DOMAIN
677 for (var _i = 0; _i < domainStates.length; _i++) {
678 domainStates[_i].on('-', S_DOMAIN_HYPHEN).on(ALPHANUM, S_DOMAIN);
679 }
680
681 S_DOMAIN_HYPHEN.on('-', S_DOMAIN_HYPHEN).on(NUMBERS, S_DOMAIN).on(ALPHANUM, S_DOMAIN);
682
683 // Set default transition
684 S_START.defaultTransition = makeState(SYM);
685
686 /**
687 Given a string, returns an array of TOKEN instances representing the
688 composition of that string.
689
690 @method run
691 @param {String} str Input string to scan
692 @return {Array} Array of TOKEN instances
693 */
694 var run = function run(str) {
695
696 // The state machine only looks at lowercase strings.
697 // This selective `toLowerCase` is used because lowercasing the entire
698 // string causes the length and character position to vary in some in some
699 // non-English strings. This happens only on V8-based runtimes.
700 var lowerStr = str.replace(/[A-Z]/g, function (c) {
701 return c.toLowerCase();
702 });
703 var len = str.length;
704 var tokens = []; // return value
705
706 var cursor = 0;
707
708 // Tokenize the string
709 while (cursor < len) {
710 var state = S_START;
711 var secondState = null;
712 var nextState = null;
713 var tokenLength = 0;
714 var latestAccepting = null;
715 var sinceAccepts = -1;
716
717 while (cursor < len && (nextState = state.next(lowerStr[cursor]))) {
718 secondState = null;
719 state = nextState;
720
721 // Keep track of the latest accepting state
722 if (state.accepts()) {
723 sinceAccepts = 0;
724 latestAccepting = state;
725 } else if (sinceAccepts >= 0) {
726 sinceAccepts++;
727 }
728
729 tokenLength++;
730 cursor++;
731 }
732
733 if (sinceAccepts < 0) {
734 continue;
735 } // Should never happen
736
737 // Roll back to the latest accepting state
738 cursor -= sinceAccepts;
739 tokenLength -= sinceAccepts;
740
741 // Get the class for the new token
742 var TOKEN = latestAccepting.emit(); // Current token class
743
744 // No more jumps, just make a new token
745 tokens.push(new TOKEN(str.substr(cursor - tokenLength, tokenLength)));
746 }
747
748 return tokens;
749 };
750
751 var start = S_START;
752
753 var scanner = Object.freeze({
754 State: CharacterState,
755 TOKENS: TOKENS,
756 run: run,
757 start: start
758 });
759
760 /******************************************************************************
761 Multi-Tokens
762 Tokens composed of arrays of TextTokens
763 ******************************************************************************/
764
765 // Is the given token a valid domain token?
766 // Should nums be included here?
767 function isDomainToken(token) {
768 return token instanceof DOMAIN || token instanceof TLD;
769 }
770
771 /**
772 Abstract class used for manufacturing tokens of text tokens. That is rather
773 than the value for a token being a small string of text, it's value an array
774 of text tokens.
775
776 Used for grouping together URLs, emails, hashtags, and other potential
777 creations.
778
779 @class MultiToken
780 @abstract
781 */
782 var MultiToken = createTokenClass();
783
784 MultiToken.prototype = {
785 /**
786 String representing the type for this token
787 @property type
788 @default 'TOKEN'
789 */
790 type: 'token',
791
792 /**
793 Is this multitoken a link?
794 @property isLink
795 @default false
796 */
797 isLink: false,
798
799 /**
800 Return the string this token represents.
801 @method toString
802 @return {String}
803 */
804 toString: function toString() {
805 var result = [];
806 for (var _i2 = 0; _i2 < this.v.length; _i2++) {
807 result.push(this.v[_i2].toString());
808 }
809 return result.join('');
810 },
811
812
813 /**
814 What should the value for this token be in the `href` HTML attribute?
815 Returns the `.toString` value by default.
816 @method toHref
817 @return {String}
818 */
819 toHref: function toHref() {
820 return this.toString();
821 },
822
823
824 /**
825 Returns a hash of relevant values for this token, which includes keys
826 * type - Kind of token ('url', 'email', etc.)
827 * value - Original text
828 * href - The value that should be added to the anchor tag's href
829 attribute
830 @method toObject
831 @param {String} [protocol] `'http'` by default
832 @return {Object}
833 */
834 toObject: function toObject() {
835 var protocol = arguments.length <= 0 || arguments[0] === undefined ? 'http' : arguments[0];
836
837 return {
838 type: this.type,
839 value: this.toString(),
840 href: this.toHref(protocol)
841 };
842 }
843 };
844
845 /**
846 Represents a list of tokens making up a valid email address
847 @class EMAIL
848 @extends MultiToken
849 */
850 var EMAIL = inherits(MultiToken, createTokenClass(), {
851 type: 'email',
852 isLink: true,
853 toHref: function toHref() {
854 return 'mailto:' + this.toString();
855 }
856 });
857
858 /**
859 Represents some plain text
860 @class TEXT
861 @extends MultiToken
862 */
863 var TEXT = inherits(MultiToken, createTokenClass(), { type: 'text' });
864
865 /**
866 Multi-linebreak token - represents a line break
867 @class NL
868 @extends MultiToken
869 */
870 var NL = inherits(MultiToken, createTokenClass(), { type: 'nl' });
871
872 /**
873 Represents a list of tokens making up a valid URL
874 @class URL
875 @extends MultiToken
876 */
877 var URL = inherits(MultiToken, createTokenClass(), {
878 type: 'url',
879 isLink: true,
880
881 /**
882 Lowercases relevant parts of the domain and adds the protocol if
883 required. Note that this will not escape unsafe HTML characters in the
884 URL.
885 @method href
886 @param {String} protocol
887 @return {String}
888 */
889 toHref: function toHref() {
890 var protocol = arguments.length <= 0 || arguments[0] === undefined ? 'http' : arguments[0];
891
892 var hasProtocol = false;
893 var hasSlashSlash = false;
894 var tokens = this.v;
895 var result = [];
896 var i = 0;
897
898 // Make the first part of the domain lowercase
899 // Lowercase protocol
900 while (tokens[i] instanceof PROTOCOL) {
901 hasProtocol = true;
902 result.push(tokens[i].toString().toLowerCase());
903 i++;
904 }
905
906 // Skip slash-slash
907 while (tokens[i] instanceof SLASH) {
908 hasSlashSlash = true;
909 result.push(tokens[i].toString());
910 i++;
911 }
912
913 // Lowercase all other characters in the domain
914 while (isDomainToken(tokens[i])) {
915 result.push(tokens[i].toString().toLowerCase());
916 i++;
917 }
918
919 // Leave all other characters as they were written
920 for (; i < tokens.length; i++) {
921 result.push(tokens[i].toString());
922 }
923
924 result = result.join('');
925
926 if (!(hasProtocol || hasSlashSlash)) {
927 result = protocol + '://' + result;
928 }
929
930 return result;
931 },
932 hasProtocol: function hasProtocol() {
933 return this.v[0] instanceof PROTOCOL;
934 }
935 });
936
937 var TOKENS$1 = Object.freeze({
938 Base: MultiToken,
939 EMAIL: EMAIL,
940 NL: NL,
941 TEXT: TEXT,
942 URL: URL
943 });
944
945 /**
946 Not exactly parser, more like the second-stage scanner (although we can
947 theoretically hotswap the code here with a real parser in the future... but
948 for a little URL-finding utility abstract syntax trees may be a little
949 overkill).
950
951 URL format: http://en.wikipedia.org/wiki/URI_scheme
952 Email format: http://en.wikipedia.org/wiki/Email_address (links to RFC in
953 reference)
954
955 @module linkify
956 @submodule parser
957 @main parser
958 */
959
960 var makeState$1 = function makeState$1(tokenClass) {
961 return new State(tokenClass);
962 };
963
964 // The universal starting state.
965 var S_START$1 = makeState$1();
966
967 // Intermediate states for URLs. Note that domains that begin with a protocol
968 // are treated slighly differently from those that don't.
969 var S_PROTOCOL = makeState$1(); // e.g., 'http:'
970 var S_PROTOCOL_SLASH = makeState$1(); // e.g., '/', 'http:/''
971 var S_PROTOCOL_SLASH_SLASH = makeState$1(); // e.g., '//', 'http://'
972 var S_DOMAIN$1 = makeState$1(); // parsed string ends with a potential domain name (A)
973 var S_DOMAIN_DOT = makeState$1(); // (A) domain followed by DOT
974 var S_TLD = makeState$1(URL); // (A) Simplest possible URL with no query string
975 var S_TLD_COLON = makeState$1(); // (A) URL followed by colon (potential port number here)
976 var S_TLD_PORT = makeState$1(URL); // TLD followed by a port number
977 var S_URL = makeState$1(URL); // Long URL with optional port and maybe query string
978 var S_URL_NON_ACCEPTING = makeState$1(); // URL followed by some symbols (will not be part of the final URL)
979 var S_URL_OPENBRACE = makeState$1(); // URL followed by {
980 var S_URL_OPENBRACKET = makeState$1(); // URL followed by [
981 var S_URL_OPENANGLEBRACKET = makeState$1(); // URL followed by <
982 var S_URL_OPENPAREN = makeState$1(); // URL followed by (
983 var S_URL_OPENBRACE_Q = makeState$1(URL); // URL followed by { and some symbols that the URL can end it
984 var S_URL_OPENBRACKET_Q = makeState$1(URL); // URL followed by [ and some symbols that the URL can end it
985 var S_URL_OPENANGLEBRACKET_Q = makeState$1(URL); // URL followed by < and some symbols that the URL can end it
986 var S_URL_OPENPAREN_Q = makeState$1(URL); // URL followed by ( and some symbols that the URL can end it
987 var S_URL_OPENBRACE_SYMS = makeState$1(); // S_URL_OPENBRACE_Q followed by some symbols it cannot end it
988 var S_URL_OPENBRACKET_SYMS = makeState$1(); // S_URL_OPENBRACKET_Q followed by some symbols it cannot end it
989 var S_URL_OPENANGLEBRACKET_SYMS = makeState$1(); // S_URL_OPENANGLEBRACKET_Q followed by some symbols it cannot end it
990 var S_URL_OPENPAREN_SYMS = makeState$1(); // S_URL_OPENPAREN_Q followed by some symbols it cannot end it
991 var S_EMAIL_DOMAIN = makeState$1(); // parsed string starts with local email info + @ with a potential domain name (C)
992 var S_EMAIL_DOMAIN_DOT = makeState$1(); // (C) domain followed by DOT
993 var S_EMAIL = makeState$1(EMAIL); // (C) Possible email address (could have more tlds)
994 var S_EMAIL_COLON = makeState$1(); // (C) URL followed by colon (potential port number here)
995 var S_EMAIL_PORT = makeState$1(EMAIL); // (C) Email address with a port
996 var S_LOCALPART = makeState$1(); // Local part of the email address
997 var S_LOCALPART_AT = makeState$1(); // Local part of the email address plus @
998 var S_LOCALPART_DOT = makeState$1(); // Local part of the email address plus '.' (localpart cannot end in .)
999 var S_NL = makeState$1(NL); // single new line
1000
1001 // Make path from start to protocol (with '//')
1002 S_START$1.on(TNL, S_NL).on(PROTOCOL, S_PROTOCOL).on(SLASH, S_PROTOCOL_SLASH);
1003
1004 S_PROTOCOL.on(SLASH, S_PROTOCOL_SLASH);
1005 S_PROTOCOL_SLASH.on(SLASH, S_PROTOCOL_SLASH_SLASH);
1006
1007 // The very first potential domain name
1008 S_START$1.on(TLD, S_DOMAIN$1).on(DOMAIN, S_DOMAIN$1).on(LOCALHOST, S_TLD).on(NUM, S_DOMAIN$1);
1009
1010 // Force URL for anything sane followed by protocol
1011 S_PROTOCOL_SLASH_SLASH.on(TLD, S_URL).on(DOMAIN, S_URL).on(NUM, S_URL).on(LOCALHOST, S_URL);
1012
1013 // Account for dots and hyphens
1014 // hyphens are usually parts of domain names
1015 S_DOMAIN$1.on(DOT, S_DOMAIN_DOT);
1016 S_EMAIL_DOMAIN.on(DOT, S_EMAIL_DOMAIN_DOT);
1017
1018 // Hyphen can jump back to a domain name
1019
1020 // After the first domain and a dot, we can find either a URL or another domain
1021 S_DOMAIN_DOT.on(TLD, S_TLD).on(DOMAIN, S_DOMAIN$1).on(NUM, S_DOMAIN$1).on(LOCALHOST, S_DOMAIN$1);
1022
1023 S_EMAIL_DOMAIN_DOT.on(TLD, S_EMAIL).on(DOMAIN, S_EMAIL_DOMAIN).on(NUM, S_EMAIL_DOMAIN).on(LOCALHOST, S_EMAIL_DOMAIN);
1024
1025 // S_TLD accepts! But the URL could be longer, try to find a match greedily
1026 // The `run` function should be able to "rollback" to the accepting state
1027 S_TLD.on(DOT, S_DOMAIN_DOT);
1028 S_EMAIL.on(DOT, S_EMAIL_DOMAIN_DOT);
1029
1030 // Become real URLs after `SLASH` or `COLON NUM SLASH`
1031 // Here PSS and non-PSS converge
1032 S_TLD.on(COLON, S_TLD_COLON).on(SLASH, S_URL);
1033 S_TLD_COLON.on(NUM, S_TLD_PORT);
1034 S_TLD_PORT.on(SLASH, S_URL);
1035 S_EMAIL.on(COLON, S_EMAIL_COLON);
1036 S_EMAIL_COLON.on(NUM, S_EMAIL_PORT);
1037
1038 // Types of characters the URL can definitely end in
1039 var qsAccepting = [DOMAIN, AT, LOCALHOST, NUM, PLUS, POUND, PROTOCOL, SLASH, TLD, UNDERSCORE, SYM];
1040
1041 // Types of tokens that can follow a URL and be part of the query string
1042 // but cannot be the very last characters
1043 // Characters that cannot appear in the URL at all should be excluded
1044 var qsNonAccepting = [COLON, DOT, QUERY, PUNCTUATION, CLOSEBRACE, CLOSEBRACKET, CLOSEANGLEBRACKET, CLOSEPAREN, OPENBRACE, OPENBRACKET, OPENANGLEBRACKET, OPENPAREN];
1045
1046 // These states are responsible primarily for determining whether or not to
1047 // include the final round bracket.
1048
1049 // URL, followed by an opening bracket
1050 S_URL.on(OPENBRACE, S_URL_OPENBRACE).on(OPENBRACKET, S_URL_OPENBRACKET).on(OPENANGLEBRACKET, S_URL_OPENANGLEBRACKET).on(OPENPAREN, S_URL_OPENPAREN);
1051
1052 // URL with extra symbols at the end, followed by an opening bracket
1053 S_URL_NON_ACCEPTING.on(OPENBRACE, S_URL_OPENBRACE).on(OPENBRACKET, S_URL_OPENBRACKET).on(OPENANGLEBRACKET, S_URL_OPENANGLEBRACKET).on(OPENPAREN, S_URL_OPENPAREN);
1054
1055 // Closing bracket component. This character WILL be included in the URL
1056 S_URL_OPENBRACE.on(CLOSEBRACE, S_URL);
1057 S_URL_OPENBRACKET.on(CLOSEBRACKET, S_URL);
1058 S_URL_OPENANGLEBRACKET.on(CLOSEANGLEBRACKET, S_URL);
1059 S_URL_OPENPAREN.on(CLOSEPAREN, S_URL);
1060 S_URL_OPENBRACE_Q.on(CLOSEBRACE, S_URL);
1061 S_URL_OPENBRACKET_Q.on(CLOSEBRACKET, S_URL);
1062 S_URL_OPENANGLEBRACKET_Q.on(CLOSEANGLEBRACKET, S_URL);
1063 S_URL_OPENPAREN_Q.on(CLOSEPAREN, S_URL);
1064 S_URL_OPENBRACE_SYMS.on(CLOSEBRACE, S_URL);
1065 S_URL_OPENBRACKET_SYMS.on(CLOSEBRACKET, S_URL);
1066 S_URL_OPENANGLEBRACKET_SYMS.on(CLOSEANGLEBRACKET, S_URL);
1067 S_URL_OPENPAREN_SYMS.on(CLOSEPAREN, S_URL);
1068
1069 // URL that beings with an opening bracket, followed by a symbols.
1070 // Note that the final state can still be `S_URL_OPENBRACE_Q` (if the URL only
1071 // has a single opening bracket for some reason).
1072 S_URL_OPENBRACE.on(qsAccepting, S_URL_OPENBRACE_Q);
1073 S_URL_OPENBRACKET.on(qsAccepting, S_URL_OPENBRACKET_Q);
1074 S_URL_OPENANGLEBRACKET.on(qsAccepting, S_URL_OPENANGLEBRACKET_Q);
1075 S_URL_OPENPAREN.on(qsAccepting, S_URL_OPENPAREN_Q);
1076 S_URL_OPENBRACE.on(qsNonAccepting, S_URL_OPENBRACE_SYMS);
1077 S_URL_OPENBRACKET.on(qsNonAccepting, S_URL_OPENBRACKET_SYMS);
1078 S_URL_OPENANGLEBRACKET.on(qsNonAccepting, S_URL_OPENANGLEBRACKET_SYMS);
1079 S_URL_OPENPAREN.on(qsNonAccepting, S_URL_OPENPAREN_SYMS);
1080
1081 // URL that begins with an opening bracket, followed by some symbols
1082 S_URL_OPENBRACE_Q.on(qsAccepting, S_URL_OPENBRACE_Q);
1083 S_URL_OPENBRACKET_Q.on(qsAccepting, S_URL_OPENBRACKET_Q);
1084 S_URL_OPENANGLEBRACKET_Q.on(qsAccepting, S_URL_OPENANGLEBRACKET_Q);
1085 S_URL_OPENPAREN_Q.on(qsAccepting, S_URL_OPENPAREN_Q);
1086 S_URL_OPENBRACE_Q.on(qsNonAccepting, S_URL_OPENBRACE_Q);
1087 S_URL_OPENBRACKET_Q.on(qsNonAccepting, S_URL_OPENBRACKET_Q);
1088 S_URL_OPENANGLEBRACKET_Q.on(qsNonAccepting, S_URL_OPENANGLEBRACKET_Q);
1089 S_URL_OPENPAREN_Q.on(qsNonAccepting, S_URL_OPENPAREN_Q);
1090
1091 S_URL_OPENBRACE_SYMS.on(qsAccepting, S_URL_OPENBRACE_Q);
1092 S_URL_OPENBRACKET_SYMS.on(qsAccepting, S_URL_OPENBRACKET_Q);
1093 S_URL_OPENANGLEBRACKET_SYMS.on(qsAccepting, S_URL_OPENANGLEBRACKET_Q);
1094 S_URL_OPENPAREN_SYMS.on(qsAccepting, S_URL_OPENPAREN_Q);
1095 S_URL_OPENBRACE_SYMS.on(qsNonAccepting, S_URL_OPENBRACE_SYMS);
1096 S_URL_OPENBRACKET_SYMS.on(qsNonAccepting, S_URL_OPENBRACKET_SYMS);
1097 S_URL_OPENANGLEBRACKET_SYMS.on(qsNonAccepting, S_URL_OPENANGLEBRACKET_SYMS);
1098 S_URL_OPENPAREN_SYMS.on(qsNonAccepting, S_URL_OPENPAREN_SYMS);
1099
1100 // Account for the query string
1101 S_URL.on(qsAccepting, S_URL);
1102 S_URL_NON_ACCEPTING.on(qsAccepting, S_URL);
1103
1104 S_URL.on(qsNonAccepting, S_URL_NON_ACCEPTING);
1105 S_URL_NON_ACCEPTING.on(qsNonAccepting, S_URL_NON_ACCEPTING);
1106
1107 // Email address-specific state definitions
1108 // Note: We are not allowing '/' in email addresses since this would interfere
1109 // with real URLs
1110
1111 // Tokens allowed in the localpart of the email
1112 var localpartAccepting = [DOMAIN, NUM, PLUS, POUND, QUERY, UNDERSCORE, SYM, TLD];
1113
1114 // Some of the tokens in `localpartAccepting` are already accounted for here and
1115 // will not be overwritten (don't worry)
1116 S_DOMAIN$1.on(localpartAccepting, S_LOCALPART).on(AT, S_LOCALPART_AT);
1117 S_TLD.on(localpartAccepting, S_LOCALPART).on(AT, S_LOCALPART_AT);
1118 S_DOMAIN_DOT.on(localpartAccepting, S_LOCALPART);
1119
1120 // Okay we're on a localpart. Now what?
1121 // TODO: IP addresses and what if the email starts with numbers?
1122 S_LOCALPART.on(localpartAccepting, S_LOCALPART).on(AT, S_LOCALPART_AT) // close to an email address now
1123 .on(DOT, S_LOCALPART_DOT);
1124 S_LOCALPART_DOT.on(localpartAccepting, S_LOCALPART);
1125 S_LOCALPART_AT.on(TLD, S_EMAIL_DOMAIN).on(DOMAIN, S_EMAIL_DOMAIN).on(LOCALHOST, S_EMAIL);
1126 // States following `@` defined above
1127
1128 var run$1 = function run$1(tokens) {
1129 var len = tokens.length;
1130 var cursor = 0;
1131 var multis = [];
1132 var textTokens = [];
1133
1134 while (cursor < len) {
1135 var state = S_START$1;
1136 var secondState = null;
1137 var nextState = null;
1138 var multiLength = 0;
1139 var latestAccepting = null;
1140 var sinceAccepts = -1;
1141
1142 while (cursor < len && !(secondState = state.next(tokens[cursor]))) {
1143 // Starting tokens with nowhere to jump to.
1144 // Consider these to be just plain text
1145 textTokens.push(tokens[cursor++]);
1146 }
1147
1148 while (cursor < len && (nextState = secondState || state.next(tokens[cursor]))) {
1149
1150 // Get the next state
1151 secondState = null;
1152 state = nextState;
1153
1154 // Keep track of the latest accepting state
1155 if (state.accepts()) {
1156 sinceAccepts = 0;
1157 latestAccepting = state;
1158 } else if (sinceAccepts >= 0) {
1159 sinceAccepts++;
1160 }
1161
1162 cursor++;
1163 multiLength++;
1164 }
1165
1166 if (sinceAccepts < 0) {
1167
1168 // No accepting state was found, part of a regular text token
1169 // Add all the tokens we looked at to the text tokens array
1170 for (var _i3 = cursor - multiLength; _i3 < cursor; _i3++) {
1171 textTokens.push(tokens[_i3]);
1172 }
1173 } else {
1174
1175 // Accepting state!
1176
1177 // First close off the textTokens (if available)
1178 if (textTokens.length > 0) {
1179 multis.push(new TEXT(textTokens));
1180 textTokens = [];
1181 }
1182
1183 // Roll back to the latest accepting state
1184 cursor -= sinceAccepts;
1185 multiLength -= sinceAccepts;
1186
1187 // Create a new multitoken
1188 var MULTI = latestAccepting.emit();
1189 multis.push(new MULTI(tokens.slice(cursor - multiLength, cursor)));
1190 }
1191 }
1192
1193 // Finally close off the textTokens (if available)
1194 if (textTokens.length > 0) {
1195 multis.push(new TEXT(textTokens));
1196 }
1197
1198 return multis;
1199 };
1200
1201 var parser = Object.freeze({
1202 State: State,
1203 TOKENS: TOKENS$1,
1204 run: run$1,
1205 start: S_START$1
1206 });
1207
1208 if (!Array.isArray) {
1209 Array.isArray = function (arg) {
1210 return Object.prototype.toString.call(arg) === '[object Array]';
1211 };
1212 }
1213
1214 /**
1215 Converts a string into tokens that represent linkable and non-linkable bits
1216 @method tokenize
1217 @param {String} str
1218 @return {Array} tokens
1219 */
1220 var tokenize = function tokenize(str) {
1221 return run$1(run(str));
1222 };
1223
1224 /**
1225 Returns a list of linkable items in the given string.
1226 */
1227 var find = function find(str) {
1228 var type = arguments.length <= 1 || arguments[1] === undefined ? null : arguments[1];
1229
1230 var tokens = tokenize(str);
1231 var filtered = [];
1232
1233 for (var i = 0; i < tokens.length; i++) {
1234 var token = tokens[i];
1235 if (token.isLink && (!type || token.type === type)) {
1236 filtered.push(token.toObject());
1237 }
1238 }
1239
1240 return filtered;
1241 };
1242
1243 /**
1244 Is the given string valid linkable text of some sort
1245 Note that this does not trim the text for you.
1246
1247 Optionally pass in a second `type` param, which is the type of link to test
1248 for.
1249
1250 For example,
1251
1252 test(str, 'email');
1253
1254 Will return `true` if str is a valid email.
1255 */
1256 var test = function test(str) {
1257 var type = arguments.length <= 1 || arguments[1] === undefined ? null : arguments[1];
1258
1259 var tokens = tokenize(str);
1260 return tokens.length === 1 && tokens[0].isLink && (!type || tokens[0].type === type);
1261 };
1262
1263 exports.find = find;
1264 exports.inherits = inherits;
1265 exports.options = options;
1266 exports.parser = parser;
1267 exports.scanner = scanner;
1268 exports.test = test;
1269 exports.tokenize = tokenize;
1270})(window.linkify = window.linkify || {});
1271})();