ad45bd97663c1a5e6e224da089290ffd95e12509838f86a926e5c72a897bc181261f8ccb13ea1e60f7313f6f5c8cdaad33c8cdfff6b7d1d73eea7df2e3ff4c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import * as arrays from '../../../base/common/arrays.js';
  6. import { onUnexpectedError } from '../../../base/common/errors.js';
  7. import { LineTokens } from '../tokens/lineTokens.js';
  8. import { TokenizationRegistry } from '../languages.js';
  9. import { nullTokenizeEncoded } from '../languages/nullTokenize.js';
  10. import { Disposable } from '../../../base/common/lifecycle.js';
  11. import { StopWatch } from '../../../base/common/stopwatch.js';
  12. import { countEOL } from '../core/eolCounter.js';
  13. import { ContiguousMultilineTokensBuilder } from '../tokens/contiguousMultilineTokensBuilder.js';
  14. import { runWhenIdle } from '../../../base/common/async.js';
  15. import { setTimeout0 } from '../../../base/common/platform.js';
  16. /**
  17. * An array that avoids being sparse by always
  18. * filling up unused indices with a default value.
  19. */
  20. class ContiguousGrowingArray {
  21. constructor(_default) {
  22. this._default = _default;
  23. this._store = [];
  24. }
  25. get(index) {
  26. if (index < this._store.length) {
  27. return this._store[index];
  28. }
  29. return this._default;
  30. }
  31. set(index, value) {
  32. while (index >= this._store.length) {
  33. this._store[this._store.length] = this._default;
  34. }
  35. this._store[index] = value;
  36. }
  37. delete(deleteIndex, deleteCount) {
  38. if (deleteCount === 0 || deleteIndex >= this._store.length) {
  39. return;
  40. }
  41. this._store.splice(deleteIndex, deleteCount);
  42. }
  43. insert(insertIndex, insertCount) {
  44. if (insertCount === 0 || insertIndex >= this._store.length) {
  45. return;
  46. }
  47. const arr = [];
  48. for (let i = 0; i < insertCount; i++) {
  49. arr[i] = this._default;
  50. }
  51. this._store = arrays.arrayInsert(this._store, insertIndex, arr);
  52. }
  53. }
  54. /**
  55. * Stores the states at the start of each line and keeps track of which lines
  56. * must be retokenized. Also uses state equality to quickly validate lines
  57. * that don't need to be retokenized.
  58. *
  59. * For example, when typing on a line, the line gets marked as needing to be tokenized.
  60. * Once the line is tokenized, the end state is checked for equality against the begin
  61. * state of the next line. If the states are equal, tokenization doesn't need to run
  62. * again over the rest of the file. If the states are not equal, the next line gets marked
  63. * as needing to be tokenized.
  64. */
  65. export class TokenizationStateStore {
  66. constructor(tokenizationSupport, initialState) {
  67. this.tokenizationSupport = tokenizationSupport;
  68. this.initialState = initialState;
  69. /**
  70. * `lineBeginState[i]` contains the begin state used to tokenize line number `i + 1`.
  71. */
  72. this._lineBeginState = new ContiguousGrowingArray(null);
  73. /**
  74. * `lineNeedsTokenization[i]` describes if line number `i + 1` needs to be tokenized.
  75. */
  76. this._lineNeedsTokenization = new ContiguousGrowingArray(true);
  77. this._firstLineNeedsTokenization = 0;
  78. this._lineBeginState.set(0, this.initialState);
  79. }
  80. get invalidLineStartIndex() {
  81. return this._firstLineNeedsTokenization;
  82. }
  83. markMustBeTokenized(lineIndex) {
  84. this._lineNeedsTokenization.set(lineIndex, true);
  85. this._firstLineNeedsTokenization = Math.min(this._firstLineNeedsTokenization, lineIndex);
  86. }
  87. getBeginState(lineIndex) {
  88. return this._lineBeginState.get(lineIndex);
  89. }
  90. setEndState(linesLength, lineIndex, endState) {
  91. this._lineNeedsTokenization.set(lineIndex, false);
  92. this._firstLineNeedsTokenization = lineIndex + 1;
  93. // Check if this was the last line
  94. if (lineIndex === linesLength - 1) {
  95. return;
  96. }
  97. // Check if the end state has changed
  98. const previousEndState = this._lineBeginState.get(lineIndex + 1);
  99. if (previousEndState === null || !endState.equals(previousEndState)) {
  100. this._lineBeginState.set(lineIndex + 1, endState);
  101. this.markMustBeTokenized(lineIndex + 1);
  102. return;
  103. }
  104. // Perhaps we can skip tokenizing some lines...
  105. let i = lineIndex + 1;
  106. while (i < linesLength) {
  107. if (this._lineNeedsTokenization.get(i)) {
  108. break;
  109. }
  110. i++;
  111. }
  112. this._firstLineNeedsTokenization = i;
  113. }
  114. //#region Editing
  115. applyEdits(range, eolCount) {
  116. this.markMustBeTokenized(range.startLineNumber - 1);
  117. this._lineBeginState.delete(range.startLineNumber, range.endLineNumber - range.startLineNumber);
  118. this._lineNeedsTokenization.delete(range.startLineNumber, range.endLineNumber - range.startLineNumber);
  119. this._lineBeginState.insert(range.startLineNumber, eolCount);
  120. this._lineNeedsTokenization.insert(range.startLineNumber, eolCount);
  121. }
  122. }
  123. export class TextModelTokenization extends Disposable {
  124. constructor(_textModel, _tokenizationPart, _languageIdCodec) {
  125. super();
  126. this._textModel = _textModel;
  127. this._tokenizationPart = _tokenizationPart;
  128. this._languageIdCodec = _languageIdCodec;
  129. this._isScheduled = false;
  130. this._isDisposed = false;
  131. this._tokenizationStateStore = null;
  132. this._register(TokenizationRegistry.onDidChange((e) => {
  133. const languageId = this._textModel.getLanguageId();
  134. if (e.changedLanguages.indexOf(languageId) === -1) {
  135. return;
  136. }
  137. this._resetTokenizationState();
  138. this._tokenizationPart.clearTokens();
  139. }));
  140. this._resetTokenizationState();
  141. }
  142. dispose() {
  143. this._isDisposed = true;
  144. super.dispose();
  145. }
  146. //#region TextModel events
  147. handleDidChangeContent(e) {
  148. if (e.isFlush) {
  149. this._resetTokenizationState();
  150. return;
  151. }
  152. if (this._tokenizationStateStore) {
  153. for (let i = 0, len = e.changes.length; i < len; i++) {
  154. const change = e.changes[i];
  155. const [eolCount] = countEOL(change.text);
  156. this._tokenizationStateStore.applyEdits(change.range, eolCount);
  157. }
  158. }
  159. this._beginBackgroundTokenization();
  160. }
  161. handleDidChangeAttached() {
  162. this._beginBackgroundTokenization();
  163. }
  164. handleDidChangeLanguage(e) {
  165. this._resetTokenizationState();
  166. this._tokenizationPart.clearTokens();
  167. }
  168. //#endregion
  169. _resetTokenizationState() {
  170. const [tokenizationSupport, initialState] = initializeTokenization(this._textModel, this._tokenizationPart);
  171. if (tokenizationSupport && initialState) {
  172. this._tokenizationStateStore = new TokenizationStateStore(tokenizationSupport, initialState);
  173. }
  174. else {
  175. this._tokenizationStateStore = null;
  176. }
  177. this._beginBackgroundTokenization();
  178. }
  179. _beginBackgroundTokenization() {
  180. if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
  181. return;
  182. }
  183. this._isScheduled = true;
  184. runWhenIdle((deadline) => {
  185. this._isScheduled = false;
  186. this._backgroundTokenizeWithDeadline(deadline);
  187. });
  188. }
  189. /**
  190. * Tokenize until the deadline occurs, but try to yield every 1-2ms.
  191. */
  192. _backgroundTokenizeWithDeadline(deadline) {
  193. // Read the time remaining from the `deadline` immediately because it is unclear
  194. // if the `deadline` object will be valid after execution leaves this function.
  195. const endTime = Date.now() + deadline.timeRemaining();
  196. const execute = () => {
  197. if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
  198. // disposed in the meantime or detached or finished
  199. return;
  200. }
  201. this._backgroundTokenizeForAtLeast1ms();
  202. if (Date.now() < endTime) {
  203. // There is still time before reaching the deadline, so yield to the browser and then
  204. // continue execution
  205. setTimeout0(execute);
  206. }
  207. else {
  208. // The deadline has been reached, so schedule a new idle callback if necessary
  209. this._beginBackgroundTokenization();
  210. }
  211. };
  212. execute();
  213. }
  214. /**
  215. * Tokenize for at least 1ms.
  216. */
  217. _backgroundTokenizeForAtLeast1ms() {
  218. const lineCount = this._textModel.getLineCount();
  219. const builder = new ContiguousMultilineTokensBuilder();
  220. const sw = StopWatch.create(false);
  221. do {
  222. if (sw.elapsed() > 1) {
  223. // the comparison is intentionally > 1 and not >= 1 to ensure that
  224. // a full millisecond has elapsed, given how microseconds are rounded
  225. // to milliseconds
  226. break;
  227. }
  228. const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
  229. if (tokenizedLineNumber >= lineCount) {
  230. break;
  231. }
  232. } while (this._hasLinesToTokenize());
  233. this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
  234. }
  235. tokenizeViewport(startLineNumber, endLineNumber) {
  236. const builder = new ContiguousMultilineTokensBuilder();
  237. this._tokenizeViewport(builder, startLineNumber, endLineNumber);
  238. this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
  239. }
  240. reset() {
  241. this._resetTokenizationState();
  242. this._tokenizationPart.clearTokens();
  243. }
  244. forceTokenization(lineNumber) {
  245. const builder = new ContiguousMultilineTokensBuilder();
  246. this._updateTokensUntilLine(builder, lineNumber);
  247. this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
  248. }
  249. getTokenTypeIfInsertingCharacter(position, character) {
  250. if (!this._tokenizationStateStore) {
  251. return 0 /* StandardTokenType.Other */;
  252. }
  253. this.forceTokenization(position.lineNumber);
  254. const lineStartState = this._tokenizationStateStore.getBeginState(position.lineNumber - 1);
  255. if (!lineStartState) {
  256. return 0 /* StandardTokenType.Other */;
  257. }
  258. const languageId = this._textModel.getLanguageId();
  259. const lineContent = this._textModel.getLineContent(position.lineNumber);
  260. // Create the text as if `character` was inserted
  261. const text = (lineContent.substring(0, position.column - 1)
  262. + character
  263. + lineContent.substring(position.column - 1));
  264. const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, lineStartState);
  265. const lineTokens = new LineTokens(r.tokens, text, this._languageIdCodec);
  266. if (lineTokens.getCount() === 0) {
  267. return 0 /* StandardTokenType.Other */;
  268. }
  269. const tokenIndex = lineTokens.findTokenIndexAtOffset(position.column - 1);
  270. return lineTokens.getStandardTokenType(tokenIndex);
  271. }
  272. tokenizeLineWithEdit(position, length, newText) {
  273. const lineNumber = position.lineNumber;
  274. const column = position.column;
  275. if (!this._tokenizationStateStore) {
  276. return null;
  277. }
  278. this.forceTokenization(lineNumber);
  279. const lineStartState = this._tokenizationStateStore.getBeginState(lineNumber - 1);
  280. if (!lineStartState) {
  281. return null;
  282. }
  283. const curLineContent = this._textModel.getLineContent(lineNumber);
  284. const newLineContent = curLineContent.substring(0, column - 1)
  285. + newText + curLineContent.substring(column - 1 + length);
  286. const languageId = this._textModel.getLanguageIdAtPosition(lineNumber, 0);
  287. const result = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, newLineContent, true, lineStartState);
  288. const lineTokens = new LineTokens(result.tokens, newLineContent, this._languageIdCodec);
  289. return lineTokens;
  290. }
  291. isCheapToTokenize(lineNumber) {
  292. if (!this._tokenizationStateStore) {
  293. return true;
  294. }
  295. const firstInvalidLineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
  296. if (lineNumber > firstInvalidLineNumber) {
  297. return false;
  298. }
  299. if (lineNumber < firstInvalidLineNumber) {
  300. return true;
  301. }
  302. if (this._textModel.getLineLength(lineNumber) < 2048 /* Constants.CHEAP_TOKENIZATION_LENGTH_LIMIT */) {
  303. return true;
  304. }
  305. return false;
  306. }
  307. _hasLinesToTokenize() {
  308. if (!this._tokenizationStateStore) {
  309. return false;
  310. }
  311. return (this._tokenizationStateStore.invalidLineStartIndex < this._textModel.getLineCount());
  312. }
  313. _isTokenizationComplete() {
  314. if (!this._tokenizationStateStore) {
  315. return false;
  316. }
  317. return (this._tokenizationStateStore.invalidLineStartIndex >= this._textModel.getLineCount());
  318. }
  319. _tokenizeOneInvalidLine(builder) {
  320. if (!this._tokenizationStateStore || !this._hasLinesToTokenize()) {
  321. return this._textModel.getLineCount() + 1;
  322. }
  323. const lineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
  324. this._updateTokensUntilLine(builder, lineNumber);
  325. return lineNumber;
  326. }
  327. _updateTokensUntilLine(builder, lineNumber) {
  328. if (!this._tokenizationStateStore) {
  329. return;
  330. }
  331. const languageId = this._textModel.getLanguageId();
  332. const linesLength = this._textModel.getLineCount();
  333. const endLineIndex = lineNumber - 1;
  334. // Validate all states up to and including endLineIndex
  335. for (let lineIndex = this._tokenizationStateStore.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
  336. const text = this._textModel.getLineContent(lineIndex + 1);
  337. const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);
  338. const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, lineStartState);
  339. builder.add(lineIndex + 1, r.tokens);
  340. this._tokenizationStateStore.setEndState(linesLength, lineIndex, r.endState);
  341. lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
  342. }
  343. }
  344. _tokenizeViewport(builder, startLineNumber, endLineNumber) {
  345. if (!this._tokenizationStateStore) {
  346. // nothing to do
  347. return;
  348. }
  349. if (endLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
  350. // nothing to do
  351. return;
  352. }
  353. if (startLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
  354. // tokenization has reached the viewport start...
  355. this._updateTokensUntilLine(builder, endLineNumber);
  356. return;
  357. }
  358. let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(startLineNumber);
  359. const fakeLines = [];
  360. let initialState = null;
  361. for (let i = startLineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
  362. const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i);
  363. if (newNonWhitespaceIndex === 0) {
  364. continue;
  365. }
  366. if (newNonWhitespaceIndex < nonWhitespaceColumn) {
  367. fakeLines.push(this._textModel.getLineContent(i));
  368. nonWhitespaceColumn = newNonWhitespaceIndex;
  369. initialState = this._tokenizationStateStore.getBeginState(i - 1);
  370. if (initialState) {
  371. break;
  372. }
  373. }
  374. }
  375. if (!initialState) {
  376. initialState = this._tokenizationStateStore.initialState;
  377. }
  378. const languageId = this._textModel.getLanguageId();
  379. let state = initialState;
  380. for (let i = fakeLines.length - 1; i >= 0; i--) {
  381. const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, fakeLines[i], false, state);
  382. state = r.endState;
  383. }
  384. for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
  385. const text = this._textModel.getLineContent(lineNumber);
  386. const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state);
  387. builder.add(lineNumber, r.tokens);
  388. this._tokenizationStateStore.markMustBeTokenized(lineNumber - 1);
  389. state = r.endState;
  390. }
  391. }
  392. }
  393. function initializeTokenization(textModel, tokenizationPart) {
  394. if (textModel.isTooLargeForTokenization()) {
  395. return [null, null];
  396. }
  397. const tokenizationSupport = TokenizationRegistry.get(tokenizationPart.getLanguageId());
  398. if (!tokenizationSupport) {
  399. return [null, null];
  400. }
  401. let initialState;
  402. try {
  403. initialState = tokenizationSupport.getInitialState();
  404. }
  405. catch (e) {
  406. onUnexpectedError(e);
  407. return [null, null];
  408. }
  409. return [tokenizationSupport, initialState];
  410. }
  411. function safeTokenize(languageIdCodec, languageId, tokenizationSupport, text, hasEOL, state) {
  412. let r = null;
  413. if (tokenizationSupport) {
  414. try {
  415. r = tokenizationSupport.tokenizeEncoded(text, hasEOL, state.clone());
  416. }
  417. catch (e) {
  418. onUnexpectedError(e);
  419. }
  420. }
  421. if (!r) {
  422. r = nullTokenizeEncoded(languageIdCodec.encodeLanguageId(languageId), state);
  423. }
  424. LineTokens.convertToEndOffset(r.tokens, text.length);
  425. return r;
  426. }