4bcf438542a978c421775ea3de8101781bbc1c8c0c321511dfd169a6b5cafecb33650cfbd53317628dfd98eff5c99e70749048b020c6a71ba22dea4697668a 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import { NotSupportedError } from '../../../../../base/common/errors.js';
  6. import { TokenMetadata } from '../../../encodedTokenAttributes.js';
  7. import { TextAstNode } from './ast.js';
  8. import { lengthAdd, lengthDiff, lengthGetColumnCountIfZeroLineCount, lengthToObj, lengthZero, toLength } from './length.js';
  9. import { SmallImmutableSet } from './smallImmutableSet.js';
  10. export class Token {
  11. constructor(length, kind,
  12. /**
  13. * If this token is an opening bracket, this is the id of the opening bracket.
  14. * If this token is a closing bracket, this is the id of the first opening bracket that is closed by this bracket.
  15. * Otherwise, it is -1.
  16. */
  17. bracketId,
  18. /**
  19. * If this token is an opening bracket, this just contains `bracketId`.
  20. * If this token is a closing bracket, this lists all opening bracket ids, that it closes.
  21. * Otherwise, it is empty.
  22. */
  23. bracketIds, astNode) {
  24. this.length = length;
  25. this.kind = kind;
  26. this.bracketId = bracketId;
  27. this.bracketIds = bracketIds;
  28. this.astNode = astNode;
  29. }
  30. }
  31. export class TextBufferTokenizer {
  32. constructor(textModel, bracketTokens) {
  33. this.textModel = textModel;
  34. this.bracketTokens = bracketTokens;
  35. this.reader = new NonPeekableTextBufferTokenizer(this.textModel, this.bracketTokens);
  36. this._offset = lengthZero;
  37. this.didPeek = false;
  38. this.peeked = null;
  39. this.textBufferLineCount = textModel.getLineCount();
  40. this.textBufferLastLineLength = textModel.getLineLength(this.textBufferLineCount);
  41. }
  42. get offset() {
  43. return this._offset;
  44. }
  45. get length() {
  46. return toLength(this.textBufferLineCount, this.textBufferLastLineLength);
  47. }
  48. skip(length) {
  49. this.didPeek = false;
  50. this._offset = lengthAdd(this._offset, length);
  51. const obj = lengthToObj(this._offset);
  52. this.reader.setPosition(obj.lineCount, obj.columnCount);
  53. }
  54. read() {
  55. let token;
  56. if (this.peeked) {
  57. this.didPeek = false;
  58. token = this.peeked;
  59. }
  60. else {
  61. token = this.reader.read();
  62. }
  63. if (token) {
  64. this._offset = lengthAdd(this._offset, token.length);
  65. }
  66. return token;
  67. }
  68. peek() {
  69. if (!this.didPeek) {
  70. this.peeked = this.reader.read();
  71. this.didPeek = true;
  72. }
  73. return this.peeked;
  74. }
  75. }
  76. /**
  77. * Does not support peek.
  78. */
  79. class NonPeekableTextBufferTokenizer {
  80. constructor(textModel, bracketTokens) {
  81. this.textModel = textModel;
  82. this.bracketTokens = bracketTokens;
  83. this.lineIdx = 0;
  84. this.line = null;
  85. this.lineCharOffset = 0;
  86. this.lineTokens = null;
  87. this.lineTokenOffset = 0;
  88. /** Must be a zero line token. The end of the document cannot be peeked. */
  89. this.peekedToken = null;
  90. this.textBufferLineCount = textModel.getLineCount();
  91. this.textBufferLastLineLength = textModel.getLineLength(this.textBufferLineCount);
  92. }
  93. setPosition(lineIdx, column) {
  94. // We must not jump into a token!
  95. if (lineIdx === this.lineIdx) {
  96. this.lineCharOffset = column;
  97. this.lineTokenOffset = this.lineCharOffset === 0 ? 0 : this.lineTokens.findTokenIndexAtOffset(this.lineCharOffset);
  98. }
  99. else {
  100. this.lineIdx = lineIdx;
  101. this.lineCharOffset = column;
  102. this.line = null;
  103. }
  104. this.peekedToken = null;
  105. }
  106. read() {
  107. if (this.peekedToken) {
  108. const token = this.peekedToken;
  109. this.peekedToken = null;
  110. this.lineCharOffset += lengthGetColumnCountIfZeroLineCount(token.length);
  111. return token;
  112. }
  113. if (this.lineIdx > this.textBufferLineCount - 1 || (this.lineIdx === this.textBufferLineCount - 1 && this.lineCharOffset >= this.textBufferLastLineLength)) {
  114. // We are after the end
  115. return null;
  116. }
  117. if (this.line === null) {
  118. this.lineTokens = this.textModel.tokenization.getLineTokens(this.lineIdx + 1);
  119. this.line = this.lineTokens.getLineContent();
  120. this.lineTokenOffset = this.lineCharOffset === 0 ? 0 : this.lineTokens.findTokenIndexAtOffset(this.lineCharOffset);
  121. }
  122. const startLineIdx = this.lineIdx;
  123. const startLineCharOffset = this.lineCharOffset;
  124. // limits the length of text tokens.
  125. // If text tokens get too long, incremental updates will be slow
  126. let lengthHeuristic = 0;
  127. while (true) {
  128. const lineTokens = this.lineTokens;
  129. const tokenCount = lineTokens.getCount();
  130. let peekedBracketToken = null;
  131. if (this.lineTokenOffset < tokenCount) {
  132. const tokenMetadata = lineTokens.getMetadata(this.lineTokenOffset);
  133. while (this.lineTokenOffset + 1 < tokenCount && tokenMetadata === lineTokens.getMetadata(this.lineTokenOffset + 1)) {
  134. // Skip tokens that are identical.
  135. // Sometimes, (bracket) identifiers are split up into multiple tokens.
  136. this.lineTokenOffset++;
  137. }
  138. const isOther = TokenMetadata.getTokenType(tokenMetadata) === 0 /* StandardTokenType.Other */;
  139. const containsBracketType = TokenMetadata.containsBalancedBrackets(tokenMetadata);
  140. const endOffset = lineTokens.getEndOffset(this.lineTokenOffset);
  141. // Is there a bracket token next? Only consume text.
  142. if (containsBracketType && isOther && this.lineCharOffset < endOffset) {
  143. const languageId = lineTokens.getLanguageId(this.lineTokenOffset);
  144. const text = this.line.substring(this.lineCharOffset, endOffset);
  145. const brackets = this.bracketTokens.getSingleLanguageBracketTokens(languageId);
  146. const regexp = brackets.regExpGlobal;
  147. if (regexp) {
  148. regexp.lastIndex = 0;
  149. const match = regexp.exec(text);
  150. if (match) {
  151. peekedBracketToken = brackets.getToken(match[0]);
  152. if (peekedBracketToken) {
  153. // Consume leading text of the token
  154. this.lineCharOffset += match.index;
  155. }
  156. }
  157. }
  158. }
  159. lengthHeuristic += endOffset - this.lineCharOffset;
  160. if (peekedBracketToken) {
  161. // Don't skip the entire token, as a single token could contain multiple brackets.
  162. if (startLineIdx !== this.lineIdx || startLineCharOffset !== this.lineCharOffset) {
  163. // There is text before the bracket
  164. this.peekedToken = peekedBracketToken;
  165. break;
  166. }
  167. else {
  168. // Consume the peeked token
  169. this.lineCharOffset += lengthGetColumnCountIfZeroLineCount(peekedBracketToken.length);
  170. return peekedBracketToken;
  171. }
  172. }
  173. else {
  174. // Skip the entire token, as the token contains no brackets at all.
  175. this.lineTokenOffset++;
  176. this.lineCharOffset = endOffset;
  177. }
  178. }
  179. else {
  180. if (this.lineIdx === this.textBufferLineCount - 1) {
  181. break;
  182. }
  183. this.lineIdx++;
  184. this.lineTokens = this.textModel.tokenization.getLineTokens(this.lineIdx + 1);
  185. this.lineTokenOffset = 0;
  186. this.line = this.lineTokens.getLineContent();
  187. this.lineCharOffset = 0;
  188. lengthHeuristic += 33; // max 1000/33 = 30 lines
  189. // This limits the amount of work to recompute min-indentation
  190. if (lengthHeuristic > 1000) {
  191. // only break (automatically) at the end of line.
  192. break;
  193. }
  194. }
  195. if (lengthHeuristic > 1500) {
  196. // Eventually break regardless of the line length so that
  197. // very long lines do not cause bad performance.
  198. // This effective limits max indentation to 500, as
  199. // indentation is not computed across multiple text nodes.
  200. break;
  201. }
  202. }
  203. // If a token contains some proper indentation, it also contains \n{INDENTATION+}(?!{INDENTATION}),
  204. // unless the line is too long.
  205. // Thus, the min indentation of the document is the minimum min indentation of every text node.
  206. const length = lengthDiff(startLineIdx, startLineCharOffset, this.lineIdx, this.lineCharOffset);
  207. return new Token(length, 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(length));
  208. }
  209. }
  210. export class FastTokenizer {
  211. constructor(text, brackets) {
  212. this.text = text;
  213. this._offset = lengthZero;
  214. this.idx = 0;
  215. const regExpStr = brackets.getRegExpStr();
  216. const regexp = regExpStr ? new RegExp(regExpStr + '|\n', 'gi') : null;
  217. const tokens = [];
  218. let match;
  219. let curLineCount = 0;
  220. let lastLineBreakOffset = 0;
  221. let lastTokenEndOffset = 0;
  222. let lastTokenEndLine = 0;
  223. const smallTextTokens0Line = new Array();
  224. for (let i = 0; i < 60; i++) {
  225. smallTextTokens0Line.push(new Token(toLength(0, i), 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(toLength(0, i))));
  226. }
  227. const smallTextTokens1Line = new Array();
  228. for (let i = 0; i < 60; i++) {
  229. smallTextTokens1Line.push(new Token(toLength(1, i), 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(toLength(1, i))));
  230. }
  231. if (regexp) {
  232. regexp.lastIndex = 0;
  233. // If a token contains indentation, it also contains \n{INDENTATION+}(?!{INDENTATION})
  234. while ((match = regexp.exec(text)) !== null) {
  235. const curOffset = match.index;
  236. const value = match[0];
  237. if (value === '\n') {
  238. curLineCount++;
  239. lastLineBreakOffset = curOffset + 1;
  240. }
  241. else {
  242. if (lastTokenEndOffset !== curOffset) {
  243. let token;
  244. if (lastTokenEndLine === curLineCount) {
  245. const colCount = curOffset - lastTokenEndOffset;
  246. if (colCount < smallTextTokens0Line.length) {
  247. token = smallTextTokens0Line[colCount];
  248. }
  249. else {
  250. const length = toLength(0, colCount);
  251. token = new Token(length, 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(length));
  252. }
  253. }
  254. else {
  255. const lineCount = curLineCount - lastTokenEndLine;
  256. const colCount = curOffset - lastLineBreakOffset;
  257. if (lineCount === 1 && colCount < smallTextTokens1Line.length) {
  258. token = smallTextTokens1Line[colCount];
  259. }
  260. else {
  261. const length = toLength(lineCount, colCount);
  262. token = new Token(length, 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(length));
  263. }
  264. }
  265. tokens.push(token);
  266. }
  267. // value is matched by regexp, so the token must exist
  268. tokens.push(brackets.getToken(value));
  269. lastTokenEndOffset = curOffset + value.length;
  270. lastTokenEndLine = curLineCount;
  271. }
  272. }
  273. }
  274. const offset = text.length;
  275. if (lastTokenEndOffset !== offset) {
  276. const length = (lastTokenEndLine === curLineCount)
  277. ? toLength(0, offset - lastTokenEndOffset)
  278. : toLength(curLineCount - lastTokenEndLine, offset - lastLineBreakOffset);
  279. tokens.push(new Token(length, 0 /* TokenKind.Text */, -1, SmallImmutableSet.getEmpty(), new TextAstNode(length)));
  280. }
  281. this.length = toLength(curLineCount, offset - lastLineBreakOffset);
  282. this.tokens = tokens;
  283. }
  284. get offset() {
  285. return this._offset;
  286. }
  287. read() {
  288. return this.tokens[this.idx++] || null;
  289. }
  290. peek() {
  291. return this.tokens[this.idx] || null;
  292. }
  293. skip(length) {
  294. throw new NotSupportedError();
  295. }
  296. }