| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296 |
- /*---------------------------------------------------------------------------------------------
- * Copyright (c) Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
- import { NotSupportedError } from '../../../../../base/common/errors.js';
- import { TokenMetadata } from '../../../encodedTokenAttributes.js';
- import { TextAstNode } from './ast.js';
- import { lengthAdd, lengthDiff, lengthGetColumnCountIfZeroLineCount, lengthToObj, lengthZero, toLength } from './length.js';
- import { SmallImmutableSet } from './smallImmutableSet.js';
- export class Token {
- constructor(length, kind,
- /**
- * If this token is an opening bracket, this is the id of the opening bracket.
- * If this token is a closing bracket, this is the id of the first opening bracket that is closed by this bracket.
- * Otherwise, it is -1.
- */
- bracketId,
- /**
- * If this token is an opening bracket, this just contains `bracketId`.
- * If this token is a closing bracket, this lists all opening bracket ids, that it closes.
- * Otherwise, it is empty.
- */
- bracketIds, astNode) {
- this.length = length;
- this.kind = kind;
- this.bracketId = bracketId;
- this.bracketIds = bracketIds;
- this.astNode = astNode;
- }
- }
- export class TextBufferTokenizer {
- constructor(textModel, bracketTokens) {
- this.textModel = textModel;
- this.bracketTokens = bracketTokens;
- this.reader = new NonPeekableTextBufferTokenizer(this.textModel, this.bracketTokens);
- this._offset = lengthZero;
- this.didPeek = false;
- this.peeked = null;
- this.textBufferLineCount = textModel.getLineCount();
- this.textBufferLastLineLength = textModel.getLineLength(this.textBufferLineCount);
- }
- get offset() {
- return this._offset;
- }
- get length() {
- return toLength(this.textBufferLineCount, this.textBufferLastLineLength);
- }
- skip(length) {
- this.didPeek = false;
- this._offset = lengthAdd(this._offset, length);
- const obj = lengthToObj(this._offset);
- this.reader.setPosition(obj.lineCount, obj.columnCount);
- }
- read() {
- let token;
- if (this.peeked) {
- this.didPeek = false;
- token = this.peeked;
- }
- else {
- token = this.reader.read();
- }
- if (token) {
- this._offset = lengthAdd(this._offset, token.length);
- }
- return token;
- }
- peek() {
- if (!this.didPeek) {
- this.peeked = this.reader.read();
- this.didPeek = true;
- }
- return this.peeked;
- }
- }
- /**
- * Does not support peek.
- */
- class NonPeekableTextBufferTokenizer {
- constructor(textModel, bracketTokens) {
- this.textModel = textModel;
- this.bracketTokens = bracketTokens;
- this.lineIdx = 0;
- this.line = null;
- this.lineCharOffset = 0;
- this.lineTokens = null;
- this.lineTokenOffset = 0;
- /** Must be a zero line token. The end of the document cannot be peeked. */
- this.peekedToken = null;
- this.textBufferLineCount = textModel.getLineCount();
- this.textBufferLastLineLength = textModel.getLineLength(this.textBufferLineCount);
- }
- setPosition(lineIdx, column) {
- // We must not jump into a token!
- if (lineIdx === this.lineIdx) {
- this.lineCharOffset = column;
- this.lineTokenOffset = this.lineCharOffset === 0 ? 0 : this.lineTokens.findTokenIndexAtOffset(this.lineCharOffset);
- }
- else {
- this.lineIdx = lineIdx;
- this.lineCharOffset = column;
- this.line = null;
- }
- this.peekedToken = null;
- }
- read() {
- if (this.peekedToken) {
- const token = this.peekedToken;
- this.peekedToken = null;
- this.lineCharOffset += lengthGetColumnCountIfZeroLineCount(token.length);
- return token;
- }
- if (this.lineIdx > this.textBufferLineCount - 1 || (this.lineIdx === this.textBufferLineCount - 1 && this.lineCharOffset >= this.textBufferLastLineLength)) {
- // We are after the end
- return null;
- }
- if (this.line === null) {
- this.lineTokens = this.textModel.tokenization.getLineTokens(this.lineIdx + 1);
- this.line = this.lineTokens.getLineContent();
- this.lineTokenOffset = this.lineCharOffset === 0 ? 0 : this.lineTokens.findTokenIndexAtOffset(this.lineCharOffset);
- }
- const startLineIdx = this.lineIdx;
- const startLineCharOffset = this.lineCharOffset;
- // limits the length of text tokens.
- // If text tokens get too long, incremental updates will be slow
- let lengthHeuristic = 0;
- while (true) {
- const lineTokens = this.lineTokens;
- const tokenCount = lineTokens.getCount();
- let peekedBracketToken = null;
- if (this.lineTokenOffset < tokenCount) {
- const tokenMetadata = lineTokens.getMetadata(this.lineTokenOffset);
- while (this.lineTokenOffset + 1 < tokenCount && tokenMetadata === lineTokens.getMetadata(this.lineTokenOffset + 1)) {
- // Skip tokens that are identical.
- // Sometimes, (bracket) identifiers are split up into multiple tokens.
- this.lineTokenOffset++;
- }
- const isOther = TokenMetadata.getTokenType(tokenMetadata) === 0 /* StandardTokenType.Other */;
- const containsBracketType = TokenMetadata.containsBalancedBrackets(tokenMetadata);
- const endOffset = lineTokens.getEndOffset(this.lineTokenOffset);
- // Is there a bracket token next? Only consume text.
- if (containsBracketType && isOther && this.lineCharOffset < endOffset) {
- const languageId = lineTokens.getLanguageId(this.lineTokenOffset);
- const text = this.line.substring(this.lineCharOffset, endOffset);
- const brackets = this.bracketTokens.getSingleLanguageBracketTokens(languageId);
- const regexp = brackets.regExpGlobal;
- if (regexp) {
- regexp.lastIndex = 0;
- const match = regexp.exec(text);
- if (match) {
- peekedBracketToken = brackets.getToken(match[0]);
- if (peekedBracketToken) {
- // Consume leading text of the token
- this.lineCharOffset += match.index;
- }
- }
- }
- }
- lengthHeuristic += endOffset - this.lineCharOffset;
- if (peekedBracketToken) {
- // Don't skip the entire token, as a single token could contain multiple brackets.
- if (startLineIdx !== this.lineIdx || startLineCharOffset !== this.lineCharOffset) {
- // There is text before the bracket
- this.peekedToken = peekedBracketToken;
- break;
- }
- else {
- // Consume the peeked token
- this.lineCharOffset += lengthGetColumnCountIfZeroLineCount(peekedBracketToken.length);
- return peekedBracketToken;
- }
- }
- else {
- // Skip the entire token, as the token contains no brackets at all.
- this.lineTokenOffset++;
- this.lineCharOffset = endOffset;
- }
- }
- else {
- if (this.lineIdx === this.textBufferLineCount - 1) {
- break;
- }
- this.lineIdx++;
- this.lineTokens = this.textModel.tokenization.getLineTokens(this.lineIdx + 1);
- this.lineTokenOffset = 0;
- this.line = this.lineTokens.getLineContent();
- this.lineCharOffset = 0;
- lengthHeuristic += 33; // max 1000/33 = 30 lines
- // This limits the amount of work to recompute min-indentation
- if (lengthHeuristic > 1000) {
- // only break (automatically) at the end of line.
- break;
- }
- }
- if (lengthHeuristic > 1500) {
- // Eventually break regardless of the line length so that
- // very long lines do not cause bad performance.
- // This effective limits max indentation to 500, as
- // indentation is not computed across multiple text nodes.
- break;
- }
- }
- // If a token contains some proper indentation, it also contains \n{INDENTATION+}(?!{INDENTATION}),
- // unless the line is too long.
- // Thus, the min indentation of the document is the minimum min indentation of every text node.
- const length = lengthDiff(startLineIdx, startLineCharOffset, this.lineIdx, this.lineCharOffset);
- return new Token(length, 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(length));
- }
- }
- export class FastTokenizer {
- constructor(text, brackets) {
- this.text = text;
- this._offset = lengthZero;
- this.idx = 0;
- const regExpStr = brackets.getRegExpStr();
- const regexp = regExpStr ? new RegExp(regExpStr + '|\n', 'gi') : null;
- const tokens = [];
- let match;
- let curLineCount = 0;
- let lastLineBreakOffset = 0;
- let lastTokenEndOffset = 0;
- let lastTokenEndLine = 0;
- const smallTextTokens0Line = new Array();
- for (let i = 0; i < 60; i++) {
- smallTextTokens0Line.push(new Token(toLength(0, i), 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(toLength(0, i))));
- }
- const smallTextTokens1Line = new Array();
- for (let i = 0; i < 60; i++) {
- smallTextTokens1Line.push(new Token(toLength(1, i), 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(toLength(1, i))));
- }
- if (regexp) {
- regexp.lastIndex = 0;
- // If a token contains indentation, it also contains \n{INDENTATION+}(?!{INDENTATION})
- while ((match = regexp.exec(text)) !== null) {
- const curOffset = match.index;
- const value = match[0];
- if (value === '\n') {
- curLineCount++;
- lastLineBreakOffset = curOffset + 1;
- }
- else {
- if (lastTokenEndOffset !== curOffset) {
- let token;
- if (lastTokenEndLine === curLineCount) {
- const colCount = curOffset - lastTokenEndOffset;
- if (colCount < smallTextTokens0Line.length) {
- token = smallTextTokens0Line[colCount];
- }
- else {
- const length = toLength(0, colCount);
- token = new Token(length, 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(length));
- }
- }
- else {
- const lineCount = curLineCount - lastTokenEndLine;
- const colCount = curOffset - lastLineBreakOffset;
- if (lineCount === 1 && colCount < smallTextTokens1Line.length) {
- token = smallTextTokens1Line[colCount];
- }
- else {
- const length = toLength(lineCount, colCount);
- token = new Token(length, 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(length));
- }
- }
- tokens.push(token);
- }
- // value is matched by regexp, so the token must exist
- tokens.push(brackets.getToken(value));
- lastTokenEndOffset = curOffset + value.length;
- lastTokenEndLine = curLineCount;
- }
- }
- }
- const offset = text.length;
- if (lastTokenEndOffset !== offset) {
- const length = (lastTokenEndLine === curLineCount)
- ? toLength(0, offset - lastTokenEndOffset)
- : toLength(curLineCount - lastTokenEndLine, offset - lastLineBreakOffset);
- tokens.push(new Token(length, 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(length)));
- }
- this.length = toLength(curLineCount, offset - lastLineBreakOffset);
- this.tokens = tokens;
- }
- get offset() {
- return this._offset;
- }
- read() {
- return this.tokens[this.idx++] || null;
- }
- peek() {
- return this.tokens[this.idx] || null;
- }
- skip(length) {
- throw new NotSupportedError();
- }
- }
|