| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265 |
- /*---------------------------------------------------------------------------------------------
- * Copyright (c) Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
- import { CharacterClassifier } from '../core/characterClassifier.js';
- export class Uint8Matrix {
- constructor(rows, cols, defaultValue) {
- const data = new Uint8Array(rows * cols);
- for (let i = 0, len = rows * cols; i < len; i++) {
- data[i] = defaultValue;
- }
- this._data = data;
- this.rows = rows;
- this.cols = cols;
- }
- get(row, col) {
- return this._data[row * this.cols + col];
- }
- set(row, col, value) {
- this._data[row * this.cols + col] = value;
- }
- }
- export class StateMachine {
- constructor(edges) {
- let maxCharCode = 0;
- let maxState = 0 /* State.Invalid */;
- for (let i = 0, len = edges.length; i < len; i++) {
- const [from, chCode, to] = edges[i];
- if (chCode > maxCharCode) {
- maxCharCode = chCode;
- }
- if (from > maxState) {
- maxState = from;
- }
- if (to > maxState) {
- maxState = to;
- }
- }
- maxCharCode++;
- maxState++;
- const states = new Uint8Matrix(maxState, maxCharCode, 0 /* State.Invalid */);
- for (let i = 0, len = edges.length; i < len; i++) {
- const [from, chCode, to] = edges[i];
- states.set(from, chCode, to);
- }
- this._states = states;
- this._maxCharCode = maxCharCode;
- }
- nextState(currentState, chCode) {
- if (chCode < 0 || chCode >= this._maxCharCode) {
- return 0 /* State.Invalid */;
- }
- return this._states.get(currentState, chCode);
- }
- }
- // State machine for http:// or https:// or file://
- let _stateMachine = null;
- function getStateMachine() {
- if (_stateMachine === null) {
- _stateMachine = new StateMachine([
- [1 /* State.Start */, 104 /* CharCode.h */, 2 /* State.H */],
- [1 /* State.Start */, 72 /* CharCode.H */, 2 /* State.H */],
- [1 /* State.Start */, 102 /* CharCode.f */, 6 /* State.F */],
- [1 /* State.Start */, 70 /* CharCode.F */, 6 /* State.F */],
- [2 /* State.H */, 116 /* CharCode.t */, 3 /* State.HT */],
- [2 /* State.H */, 84 /* CharCode.T */, 3 /* State.HT */],
- [3 /* State.HT */, 116 /* CharCode.t */, 4 /* State.HTT */],
- [3 /* State.HT */, 84 /* CharCode.T */, 4 /* State.HTT */],
- [4 /* State.HTT */, 112 /* CharCode.p */, 5 /* State.HTTP */],
- [4 /* State.HTT */, 80 /* CharCode.P */, 5 /* State.HTTP */],
- [5 /* State.HTTP */, 115 /* CharCode.s */, 9 /* State.BeforeColon */],
- [5 /* State.HTTP */, 83 /* CharCode.S */, 9 /* State.BeforeColon */],
- [5 /* State.HTTP */, 58 /* CharCode.Colon */, 10 /* State.AfterColon */],
- [6 /* State.F */, 105 /* CharCode.i */, 7 /* State.FI */],
- [6 /* State.F */, 73 /* CharCode.I */, 7 /* State.FI */],
- [7 /* State.FI */, 108 /* CharCode.l */, 8 /* State.FIL */],
- [7 /* State.FI */, 76 /* CharCode.L */, 8 /* State.FIL */],
- [8 /* State.FIL */, 101 /* CharCode.e */, 9 /* State.BeforeColon */],
- [8 /* State.FIL */, 69 /* CharCode.E */, 9 /* State.BeforeColon */],
- [9 /* State.BeforeColon */, 58 /* CharCode.Colon */, 10 /* State.AfterColon */],
- [10 /* State.AfterColon */, 47 /* CharCode.Slash */, 11 /* State.AlmostThere */],
- [11 /* State.AlmostThere */, 47 /* CharCode.Slash */, 12 /* State.End */],
- ]);
- }
- return _stateMachine;
- }
- let _classifier = null;
- function getClassifier() {
- if (_classifier === null) {
- _classifier = new CharacterClassifier(0 /* CharacterClass.None */);
- // allow-any-unicode-next-line
- const FORCE_TERMINATION_CHARACTERS = ' \t<>\'\"、。。、,.:;‘〈「『〔([{「」}])〕』」〉’`~…';
- for (let i = 0; i < FORCE_TERMINATION_CHARACTERS.length; i++) {
- _classifier.set(FORCE_TERMINATION_CHARACTERS.charCodeAt(i), 1 /* CharacterClass.ForceTermination */);
- }
- const CANNOT_END_WITH_CHARACTERS = '.,;:';
- for (let i = 0; i < CANNOT_END_WITH_CHARACTERS.length; i++) {
- _classifier.set(CANNOT_END_WITH_CHARACTERS.charCodeAt(i), 2 /* CharacterClass.CannotEndIn */);
- }
- }
- return _classifier;
- }
- export class LinkComputer {
- static _createLink(classifier, line, lineNumber, linkBeginIndex, linkEndIndex) {
- // Do not allow to end link in certain characters...
- let lastIncludedCharIndex = linkEndIndex - 1;
- do {
- const chCode = line.charCodeAt(lastIncludedCharIndex);
- const chClass = classifier.get(chCode);
- if (chClass !== 2 /* CharacterClass.CannotEndIn */) {
- break;
- }
- lastIncludedCharIndex--;
- } while (lastIncludedCharIndex > linkBeginIndex);
- // Handle links enclosed in parens, square brackets and curlys.
- if (linkBeginIndex > 0) {
- const charCodeBeforeLink = line.charCodeAt(linkBeginIndex - 1);
- const lastCharCodeInLink = line.charCodeAt(lastIncludedCharIndex);
- if ((charCodeBeforeLink === 40 /* CharCode.OpenParen */ && lastCharCodeInLink === 41 /* CharCode.CloseParen */)
- || (charCodeBeforeLink === 91 /* CharCode.OpenSquareBracket */ && lastCharCodeInLink === 93 /* CharCode.CloseSquareBracket */)
- || (charCodeBeforeLink === 123 /* CharCode.OpenCurlyBrace */ && lastCharCodeInLink === 125 /* CharCode.CloseCurlyBrace */)) {
- // Do not end in ) if ( is before the link start
- // Do not end in ] if [ is before the link start
- // Do not end in } if { is before the link start
- lastIncludedCharIndex--;
- }
- }
- return {
- range: {
- startLineNumber: lineNumber,
- startColumn: linkBeginIndex + 1,
- endLineNumber: lineNumber,
- endColumn: lastIncludedCharIndex + 2
- },
- url: line.substring(linkBeginIndex, lastIncludedCharIndex + 1)
- };
- }
- static computeLinks(model, stateMachine = getStateMachine()) {
- const classifier = getClassifier();
- const result = [];
- for (let i = 1, lineCount = model.getLineCount(); i <= lineCount; i++) {
- const line = model.getLineContent(i);
- const len = line.length;
- let j = 0;
- let linkBeginIndex = 0;
- let linkBeginChCode = 0;
- let state = 1 /* State.Start */;
- let hasOpenParens = false;
- let hasOpenSquareBracket = false;
- let inSquareBrackets = false;
- let hasOpenCurlyBracket = false;
- while (j < len) {
- let resetStateMachine = false;
- const chCode = line.charCodeAt(j);
- if (state === 13 /* State.Accept */) {
- let chClass;
- switch (chCode) {
- case 40 /* CharCode.OpenParen */:
- hasOpenParens = true;
- chClass = 0 /* CharacterClass.None */;
- break;
- case 41 /* CharCode.CloseParen */:
- chClass = (hasOpenParens ? 0 /* CharacterClass.None */ : 1 /* CharacterClass.ForceTermination */);
- break;
- case 91 /* CharCode.OpenSquareBracket */:
- inSquareBrackets = true;
- hasOpenSquareBracket = true;
- chClass = 0 /* CharacterClass.None */;
- break;
- case 93 /* CharCode.CloseSquareBracket */:
- inSquareBrackets = false;
- chClass = (hasOpenSquareBracket ? 0 /* CharacterClass.None */ : 1 /* CharacterClass.ForceTermination */);
- break;
- case 123 /* CharCode.OpenCurlyBrace */:
- hasOpenCurlyBracket = true;
- chClass = 0 /* CharacterClass.None */;
- break;
- case 125 /* CharCode.CloseCurlyBrace */:
- chClass = (hasOpenCurlyBracket ? 0 /* CharacterClass.None */ : 1 /* CharacterClass.ForceTermination */);
- break;
- /* The following three rules make it that ' or " or ` are allowed inside links if the link didn't begin with them */
- case 39 /* CharCode.SingleQuote */:
- chClass = (linkBeginChCode === 39 /* CharCode.SingleQuote */ ? 1 /* CharacterClass.ForceTermination */ : 0 /* CharacterClass.None */);
- break;
- case 34 /* CharCode.DoubleQuote */:
- chClass = (linkBeginChCode === 34 /* CharCode.DoubleQuote */ ? 1 /* CharacterClass.ForceTermination */ : 0 /* CharacterClass.None */);
- break;
- case 96 /* CharCode.BackTick */:
- chClass = (linkBeginChCode === 96 /* CharCode.BackTick */ ? 1 /* CharacterClass.ForceTermination */ : 0 /* CharacterClass.None */);
- break;
- case 42 /* CharCode.Asterisk */:
- // `*` terminates a link if the link began with `*`
- chClass = (linkBeginChCode === 42 /* CharCode.Asterisk */) ? 1 /* CharacterClass.ForceTermination */ : 0 /* CharacterClass.None */;
- break;
- case 124 /* CharCode.Pipe */:
- // `|` terminates a link if the link began with `|`
- chClass = (linkBeginChCode === 124 /* CharCode.Pipe */) ? 1 /* CharacterClass.ForceTermination */ : 0 /* CharacterClass.None */;
- break;
- case 32 /* CharCode.Space */:
- // ` ` allow space in between [ and ]
- chClass = (inSquareBrackets ? 0 /* CharacterClass.None */ : 1 /* CharacterClass.ForceTermination */);
- break;
- default:
- chClass = classifier.get(chCode);
- }
- // Check if character terminates link
- if (chClass === 1 /* CharacterClass.ForceTermination */) {
- result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, j));
- resetStateMachine = true;
- }
- }
- else if (state === 12 /* State.End */) {
- let chClass;
- if (chCode === 91 /* CharCode.OpenSquareBracket */) {
- // Allow for the authority part to contain ipv6 addresses which contain [ and ]
- hasOpenSquareBracket = true;
- chClass = 0 /* CharacterClass.None */;
- }
- else {
- chClass = classifier.get(chCode);
- }
- // Check if character terminates link
- if (chClass === 1 /* CharacterClass.ForceTermination */) {
- resetStateMachine = true;
- }
- else {
- state = 13 /* State.Accept */;
- }
- }
- else {
- state = stateMachine.nextState(state, chCode);
- if (state === 0 /* State.Invalid */) {
- resetStateMachine = true;
- }
- }
- if (resetStateMachine) {
- state = 1 /* State.Start */;
- hasOpenParens = false;
- hasOpenSquareBracket = false;
- hasOpenCurlyBracket = false;
- // Record where the link started
- linkBeginIndex = j + 1;
- linkBeginChCode = chCode;
- }
- j++;
- }
- if (state === 13 /* State.Accept */) {
- result.push(LinkComputer._createLink(classifier, line, i, linkBeginIndex, len));
- }
- }
- return result;
- }
- }
- /**
- * Returns an array of all links contains in the provided
- * document. *Note* that this operation is computational
- * expensive and should not run in the UI thread.
- */
- export function computeLinks(model) {
- if (!model || typeof model.getLineCount !== 'function' || typeof model.getLineContent !== 'function') {
- // Unknown caller!
- return [];
- }
- return LinkComputer.computeLinks(model);
- }
|