| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426 |
- /*---------------------------------------------------------------------------------------------
- * Copyright (c) Microsoft Corporation. All rights reserved.
- * Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
- import * as arrays from '../../../base/common/arrays.js';
- import { onUnexpectedError } from '../../../base/common/errors.js';
- import { LineTokens } from '../tokens/lineTokens.js';
- import { TokenizationRegistry } from '../languages.js';
- import { nullTokenizeEncoded } from '../languages/nullTokenize.js';
- import { Disposable } from '../../../base/common/lifecycle.js';
- import { StopWatch } from '../../../base/common/stopwatch.js';
- import { countEOL } from '../core/eolCounter.js';
- import { ContiguousMultilineTokensBuilder } from '../tokens/contiguousMultilineTokensBuilder.js';
- import { runWhenIdle } from '../../../base/common/async.js';
- import { setTimeout0 } from '../../../base/common/platform.js';
- /**
- * An array that avoids being sparse by always
- * filling up unused indices with a default value.
- */
- class ContiguousGrowingArray {
- constructor(_default) {
- this._default = _default;
- this._store = [];
- }
- get(index) {
- if (index < this._store.length) {
- return this._store[index];
- }
- return this._default;
- }
- set(index, value) {
- while (index >= this._store.length) {
- this._store[this._store.length] = this._default;
- }
- this._store[index] = value;
- }
- delete(deleteIndex, deleteCount) {
- if (deleteCount === 0 || deleteIndex >= this._store.length) {
- return;
- }
- this._store.splice(deleteIndex, deleteCount);
- }
- insert(insertIndex, insertCount) {
- if (insertCount === 0 || insertIndex >= this._store.length) {
- return;
- }
- const arr = [];
- for (let i = 0; i < insertCount; i++) {
- arr[i] = this._default;
- }
- this._store = arrays.arrayInsert(this._store, insertIndex, arr);
- }
- }
- /**
- * Stores the states at the start of each line and keeps track of which lines
- * must be retokenized. Also uses state equality to quickly validate lines
- * that don't need to be retokenized.
- *
- * For example, when typing on a line, the line gets marked as needing to be tokenized.
- * Once the line is tokenized, the end state is checked for equality against the begin
- * state of the next line. If the states are equal, tokenization doesn't need to run
- * again over the rest of the file. If the states are not equal, the next line gets marked
- * as needing to be tokenized.
- */
- export class TokenizationStateStore {
- constructor(tokenizationSupport, initialState) {
- this.tokenizationSupport = tokenizationSupport;
- this.initialState = initialState;
- /**
- * `lineBeginState[i]` contains the begin state used to tokenize line number `i + 1`.
- */
- this._lineBeginState = new ContiguousGrowingArray(null);
- /**
- * `lineNeedsTokenization[i]` describes if line number `i + 1` needs to be tokenized.
- */
- this._lineNeedsTokenization = new ContiguousGrowingArray(true);
- this._firstLineNeedsTokenization = 0;
- this._lineBeginState.set(0, this.initialState);
- }
- get invalidLineStartIndex() {
- return this._firstLineNeedsTokenization;
- }
- markMustBeTokenized(lineIndex) {
- this._lineNeedsTokenization.set(lineIndex, true);
- this._firstLineNeedsTokenization = Math.min(this._firstLineNeedsTokenization, lineIndex);
- }
- getBeginState(lineIndex) {
- return this._lineBeginState.get(lineIndex);
- }
- setEndState(linesLength, lineIndex, endState) {
- this._lineNeedsTokenization.set(lineIndex, false);
- this._firstLineNeedsTokenization = lineIndex + 1;
- // Check if this was the last line
- if (lineIndex === linesLength - 1) {
- return;
- }
- // Check if the end state has changed
- const previousEndState = this._lineBeginState.get(lineIndex + 1);
- if (previousEndState === null || !endState.equals(previousEndState)) {
- this._lineBeginState.set(lineIndex + 1, endState);
- this.markMustBeTokenized(lineIndex + 1);
- return;
- }
- // Perhaps we can skip tokenizing some lines...
- let i = lineIndex + 1;
- while (i < linesLength) {
- if (this._lineNeedsTokenization.get(i)) {
- break;
- }
- i++;
- }
- this._firstLineNeedsTokenization = i;
- }
- //#region Editing
- applyEdits(range, eolCount) {
- this.markMustBeTokenized(range.startLineNumber - 1);
- this._lineBeginState.delete(range.startLineNumber, range.endLineNumber - range.startLineNumber);
- this._lineNeedsTokenization.delete(range.startLineNumber, range.endLineNumber - range.startLineNumber);
- this._lineBeginState.insert(range.startLineNumber, eolCount);
- this._lineNeedsTokenization.insert(range.startLineNumber, eolCount);
- }
- }
- export class TextModelTokenization extends Disposable {
- constructor(_textModel, _tokenizationPart, _languageIdCodec) {
- super();
- this._textModel = _textModel;
- this._tokenizationPart = _tokenizationPart;
- this._languageIdCodec = _languageIdCodec;
- this._isScheduled = false;
- this._isDisposed = false;
- this._tokenizationStateStore = null;
- this._register(TokenizationRegistry.onDidChange((e) => {
- const languageId = this._textModel.getLanguageId();
- if (e.changedLanguages.indexOf(languageId) === -1) {
- return;
- }
- this._resetTokenizationState();
- this._tokenizationPart.clearTokens();
- }));
- this._resetTokenizationState();
- }
- dispose() {
- this._isDisposed = true;
- super.dispose();
- }
- //#region TextModel events
- handleDidChangeContent(e) {
- if (e.isFlush) {
- this._resetTokenizationState();
- return;
- }
- if (this._tokenizationStateStore) {
- for (let i = 0, len = e.changes.length; i < len; i++) {
- const change = e.changes[i];
- const [eolCount] = countEOL(change.text);
- this._tokenizationStateStore.applyEdits(change.range, eolCount);
- }
- }
- this._beginBackgroundTokenization();
- }
- handleDidChangeAttached() {
- this._beginBackgroundTokenization();
- }
- handleDidChangeLanguage(e) {
- this._resetTokenizationState();
- this._tokenizationPart.clearTokens();
- }
- //#endregion
- _resetTokenizationState() {
- const [tokenizationSupport, initialState] = initializeTokenization(this._textModel, this._tokenizationPart);
- if (tokenizationSupport && initialState) {
- this._tokenizationStateStore = new TokenizationStateStore(tokenizationSupport, initialState);
- }
- else {
- this._tokenizationStateStore = null;
- }
- this._beginBackgroundTokenization();
- }
- _beginBackgroundTokenization() {
- if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
- return;
- }
- this._isScheduled = true;
- runWhenIdle((deadline) => {
- this._isScheduled = false;
- this._backgroundTokenizeWithDeadline(deadline);
- });
- }
- /**
- * Tokenize until the deadline occurs, but try to yield every 1-2ms.
- */
- _backgroundTokenizeWithDeadline(deadline) {
- // Read the time remaining from the `deadline` immediately because it is unclear
- // if the `deadline` object will be valid after execution leaves this function.
- const endTime = Date.now() + deadline.timeRemaining();
- const execute = () => {
- if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
- // disposed in the meantime or detached or finished
- return;
- }
- this._backgroundTokenizeForAtLeast1ms();
- if (Date.now() < endTime) {
- // There is still time before reaching the deadline, so yield to the browser and then
- // continue execution
- setTimeout0(execute);
- }
- else {
- // The deadline has been reached, so schedule a new idle callback if necessary
- this._beginBackgroundTokenization();
- }
- };
- execute();
- }
- /**
- * Tokenize for at least 1ms.
- */
- _backgroundTokenizeForAtLeast1ms() {
- const lineCount = this._textModel.getLineCount();
- const builder = new ContiguousMultilineTokensBuilder();
- const sw = StopWatch.create(false);
- do {
- if (sw.elapsed() > 1) {
- // the comparison is intentionally > 1 and not >= 1 to ensure that
- // a full millisecond has elapsed, given how microseconds are rounded
- // to milliseconds
- break;
- }
- const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
- if (tokenizedLineNumber >= lineCount) {
- break;
- }
- } while (this._hasLinesToTokenize());
- this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
- }
- tokenizeViewport(startLineNumber, endLineNumber) {
- const builder = new ContiguousMultilineTokensBuilder();
- this._tokenizeViewport(builder, startLineNumber, endLineNumber);
- this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
- }
- reset() {
- this._resetTokenizationState();
- this._tokenizationPart.clearTokens();
- }
- forceTokenization(lineNumber) {
- const builder = new ContiguousMultilineTokensBuilder();
- this._updateTokensUntilLine(builder, lineNumber);
- this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
- }
- getTokenTypeIfInsertingCharacter(position, character) {
- if (!this._tokenizationStateStore) {
- return 0 /* StandardTokenType.Other */;
- }
- this.forceTokenization(position.lineNumber);
- const lineStartState = this._tokenizationStateStore.getBeginState(position.lineNumber - 1);
- if (!lineStartState) {
- return 0 /* StandardTokenType.Other */;
- }
- const languageId = this._textModel.getLanguageId();
- const lineContent = this._textModel.getLineContent(position.lineNumber);
- // Create the text as if `character` was inserted
- const text = (lineContent.substring(0, position.column - 1)
- + character
- + lineContent.substring(position.column - 1));
- const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, lineStartState);
- const lineTokens = new LineTokens(r.tokens, text, this._languageIdCodec);
- if (lineTokens.getCount() === 0) {
- return 0 /* StandardTokenType.Other */;
- }
- const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);
- return lineTokens.getStandardTokenType(tokenIndex);
- }
- tokenizeLineWithEdit(position, length, newText) {
- const lineNumber = position.lineNumber;
- const column = position.column;
- if (!this._tokenizationStateStore) {
- return null;
- }
- this.forceTokenization(lineNumber);
- const lineStartState = this._tokenizationStateStore.getBeginState(lineNumber - 1);
- if (!lineStartState) {
- return null;
- }
- const curLineContent = this._textModel.getLineContent(lineNumber);
- const newLineContent = curLineContent.substring(0, column - 1)
- + newText + curLineContent.substring(column - 1 + length);
- const languageId = this._textModel.getLanguageIdAtPosition(lineNumber, 0);
- const result = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, newLineContent, true, lineStartState);
- const lineTokens = new LineTokens(result.tokens, newLineContent, this._languageIdCodec);
- return lineTokens;
- }
- isCheapToTokenize(lineNumber) {
- if (!this._tokenizationStateStore) {
- return true;
- }
- const firstInvalidLineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
- if (lineNumber > firstInvalidLineNumber) {
- return false;
- }
- if (lineNumber < firstInvalidLineNumber) {
- return true;
- }
- if (this._textModel.getLineLength(lineNumber) < 2048 /* Constants.CHEAP_TOKENIZATION_LENGTH_LIMIT */) {
- return true;
- }
- return false;
- }
- _hasLinesToTokenize() {
- if (!this._tokenizationStateStore) {
- return false;
- }
- return (this._tokenizationStateStore.invalidLineStartIndex < this._textModel.getLineCount());
- }
- _isTokenizationComplete() {
- if (!this._tokenizationStateStore) {
- return false;
- }
- return (this._tokenizationStateStore.invalidLineStartIndex >= this._textModel.getLineCount());
- }
- _tokenizeOneInvalidLine(builder) {
- if (!this._tokenizationStateStore || !this._hasLinesToTokenize()) {
- return this._textModel.getLineCount() + 1;
- }
- const lineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
- this._updateTokensUntilLine(builder, lineNumber);
- return lineNumber;
- }
- _updateTokensUntilLine(builder, lineNumber) {
- if (!this._tokenizationStateStore) {
- return;
- }
- const languageId = this._textModel.getLanguageId();
- const linesLength = this._textModel.getLineCount();
- const endLineIndex = lineNumber - 1;
- // Validate all states up to and including endLineIndex
- for (let lineIndex = this._tokenizationStateStore.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
- const text = this._textModel.getLineContent(lineIndex + 1);
- const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);
- const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, lineStartState);
- builder.add(lineIndex + 1, r.tokens);
- this._tokenizationStateStore.setEndState(linesLength, lineIndex, r.endState);
- lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
- }
- }
- _tokenizeViewport(builder, startLineNumber, endLineNumber) {
- if (!this._tokenizationStateStore) {
- // nothing to do
- return;
- }
- if (endLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
- // nothing to do
- return;
- }
- if (startLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
- // tokenization has reached the viewport start...
- this._updateTokensUntilLine(builder, endLineNumber);
- return;
- }
- let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(startLineNumber);
- const fakeLines = [];
- let initialState = null;
- for (let i = startLineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
- const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i);
- if (newNonWhitespaceIndex === 0) {
- continue;
- }
- if (newNonWhitespaceIndex < nonWhitespaceColumn) {
- fakeLines.push(this._textModel.getLineContent(i));
- nonWhitespaceColumn = newNonWhitespaceIndex;
- initialState = this._tokenizationStateStore.getBeginState(i - 1);
- if (initialState) {
- break;
- }
- }
- }
- if (!initialState) {
- initialState = this._tokenizationStateStore.initialState;
- }
- const languageId = this._textModel.getLanguageId();
- let state = initialState;
- for (let i = fakeLines.length - 1; i >= 0; i--) {
- const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, fakeLines[i], false, state);
- state = r.endState;
- }
- for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
- const text = this._textModel.getLineContent(lineNumber);
- const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state);
- builder.add(lineNumber, r.tokens);
- this._tokenizationStateStore.markMustBeTokenized(lineNumber - 1);
- state = r.endState;
- }
- }
- }
- function initializeTokenization(textModel, tokenizationPart) {
- if (textModel.isTooLargeForTokenization()) {
- return [null, null];
- }
- const tokenizationSupport = TokenizationRegistry.get(tokenizationPart.getLanguageId());
- if (!tokenizationSupport) {
- return [null, null];
- }
- let initialState;
- try {
- initialState = tokenizationSupport.getInitialState();
- }
- catch (e) {
- onUnexpectedError(e);
- return [null, null];
- }
- return [tokenizationSupport, initialState];
- }
- function safeTokenize(languageIdCodec, languageId, tokenizationSupport, text, hasEOL, state) {
- let r = null;
- if (tokenizationSupport) {
- try {
- r = tokenizationSupport.tokenizeEncoded(text, hasEOL, state.clone());
- }
- catch (e) {
- onUnexpectedError(e);
- }
- }
- if (!r) {
- r = nullTokenizeEncoded(languageIdCodec.encodeLanguageId(languageId), state);
- }
- LineTokens.convertToEndOffset(r.tokens, text.length);
- return r;
- }
|