a1dd471d34b53377e7f6ed96004e7d42d1f7318a5326f270903a2831919bd8fa1b6cfb5697d136524df70520d431c6a0124857c10d10cb9583ff80206c0922 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
  6. var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
  7. if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
  8. else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
  9. return c > 3 && r && Object.defineProperty(target, key, r), r;
  10. };
  11. var __param = (this && this.__param) || function (paramIndex, decorator) {
  12. return function (target, key) { decorator(target, key, paramIndex); }
  13. };
  14. import * as languages from '../../../common/languages.js';
  15. import { NullState, nullTokenizeEncoded, nullTokenize } from '../../../common/languages/nullTokenize.js';
  16. import * as monarchCommon from './monarchCommon.js';
  17. import { IConfigurationService } from '../../../../platform/configuration/common/configuration.js';
  18. const CACHE_STACK_DEPTH = 5;
  19. /**
  20. * Reuse the same stack elements up to a certain depth.
  21. */
  22. class MonarchStackElementFactory {
  23. constructor(maxCacheDepth) {
  24. this._maxCacheDepth = maxCacheDepth;
  25. this._entries = Object.create(null);
  26. }
  27. static create(parent, state) {
  28. return this._INSTANCE.create(parent, state);
  29. }
  30. create(parent, state) {
  31. if (parent !== null && parent.depth >= this._maxCacheDepth) {
  32. // no caching above a certain depth
  33. return new MonarchStackElement(parent, state);
  34. }
  35. let stackElementId = MonarchStackElement.getStackElementId(parent);
  36. if (stackElementId.length > 0) {
  37. stackElementId += '|';
  38. }
  39. stackElementId += state;
  40. let result = this._entries[stackElementId];
  41. if (result) {
  42. return result;
  43. }
  44. result = new MonarchStackElement(parent, state);
  45. this._entries[stackElementId] = result;
  46. return result;
  47. }
  48. }
  49. MonarchStackElementFactory._INSTANCE = new MonarchStackElementFactory(CACHE_STACK_DEPTH);
  50. class MonarchStackElement {
  51. constructor(parent, state) {
  52. this.parent = parent;
  53. this.state = state;
  54. this.depth = (this.parent ? this.parent.depth : 0) + 1;
  55. }
  56. static getStackElementId(element) {
  57. let result = '';
  58. while (element !== null) {
  59. if (result.length > 0) {
  60. result += '|';
  61. }
  62. result += element.state;
  63. element = element.parent;
  64. }
  65. return result;
  66. }
  67. static _equals(a, b) {
  68. while (a !== null && b !== null) {
  69. if (a === b) {
  70. return true;
  71. }
  72. if (a.state !== b.state) {
  73. return false;
  74. }
  75. a = a.parent;
  76. b = b.parent;
  77. }
  78. if (a === null && b === null) {
  79. return true;
  80. }
  81. return false;
  82. }
  83. equals(other) {
  84. return MonarchStackElement._equals(this, other);
  85. }
  86. push(state) {
  87. return MonarchStackElementFactory.create(this, state);
  88. }
  89. pop() {
  90. return this.parent;
  91. }
  92. popall() {
  93. let result = this;
  94. while (result.parent) {
  95. result = result.parent;
  96. }
  97. return result;
  98. }
  99. switchTo(state) {
  100. return MonarchStackElementFactory.create(this.parent, state);
  101. }
  102. }
  103. class EmbeddedLanguageData {
  104. constructor(languageId, state) {
  105. this.languageId = languageId;
  106. this.state = state;
  107. }
  108. equals(other) {
  109. return (this.languageId === other.languageId
  110. && this.state.equals(other.state));
  111. }
  112. clone() {
  113. const stateClone = this.state.clone();
  114. // save an object
  115. if (stateClone === this.state) {
  116. return this;
  117. }
  118. return new EmbeddedLanguageData(this.languageId, this.state);
  119. }
  120. }
  121. /**
  122. * Reuse the same line states up to a certain depth.
  123. */
  124. class MonarchLineStateFactory {
  125. constructor(maxCacheDepth) {
  126. this._maxCacheDepth = maxCacheDepth;
  127. this._entries = Object.create(null);
  128. }
  129. static create(stack, embeddedLanguageData) {
  130. return this._INSTANCE.create(stack, embeddedLanguageData);
  131. }
  132. create(stack, embeddedLanguageData) {
  133. if (embeddedLanguageData !== null) {
  134. // no caching when embedding
  135. return new MonarchLineState(stack, embeddedLanguageData);
  136. }
  137. if (stack !== null && stack.depth >= this._maxCacheDepth) {
  138. // no caching above a certain depth
  139. return new MonarchLineState(stack, embeddedLanguageData);
  140. }
  141. const stackElementId = MonarchStackElement.getStackElementId(stack);
  142. let result = this._entries[stackElementId];
  143. if (result) {
  144. return result;
  145. }
  146. result = new MonarchLineState(stack, null);
  147. this._entries[stackElementId] = result;
  148. return result;
  149. }
  150. }
  151. MonarchLineStateFactory._INSTANCE = new MonarchLineStateFactory(CACHE_STACK_DEPTH);
  152. class MonarchLineState {
  153. constructor(stack, embeddedLanguageData) {
  154. this.stack = stack;
  155. this.embeddedLanguageData = embeddedLanguageData;
  156. }
  157. clone() {
  158. const embeddedlanguageDataClone = this.embeddedLanguageData ? this.embeddedLanguageData.clone() : null;
  159. // save an object
  160. if (embeddedlanguageDataClone === this.embeddedLanguageData) {
  161. return this;
  162. }
  163. return MonarchLineStateFactory.create(this.stack, this.embeddedLanguageData);
  164. }
  165. equals(other) {
  166. if (!(other instanceof MonarchLineState)) {
  167. return false;
  168. }
  169. if (!this.stack.equals(other.stack)) {
  170. return false;
  171. }
  172. if (this.embeddedLanguageData === null && other.embeddedLanguageData === null) {
  173. return true;
  174. }
  175. if (this.embeddedLanguageData === null || other.embeddedLanguageData === null) {
  176. return false;
  177. }
  178. return this.embeddedLanguageData.equals(other.embeddedLanguageData);
  179. }
  180. }
  181. class MonarchClassicTokensCollector {
  182. constructor() {
  183. this._tokens = [];
  184. this._languageId = null;
  185. this._lastTokenType = null;
  186. this._lastTokenLanguage = null;
  187. }
  188. enterLanguage(languageId) {
  189. this._languageId = languageId;
  190. }
  191. emit(startOffset, type) {
  192. if (this._lastTokenType === type && this._lastTokenLanguage === this._languageId) {
  193. return;
  194. }
  195. this._lastTokenType = type;
  196. this._lastTokenLanguage = this._languageId;
  197. this._tokens.push(new languages.Token(startOffset, type, this._languageId));
  198. }
  199. nestedLanguageTokenize(embeddedLanguageLine, hasEOL, embeddedLanguageData, offsetDelta) {
  200. const nestedLanguageId = embeddedLanguageData.languageId;
  201. const embeddedModeState = embeddedLanguageData.state;
  202. const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
  203. if (!nestedLanguageTokenizationSupport) {
  204. this.enterLanguage(nestedLanguageId);
  205. this.emit(offsetDelta, '');
  206. return embeddedModeState;
  207. }
  208. const nestedResult = nestedLanguageTokenizationSupport.tokenize(embeddedLanguageLine, hasEOL, embeddedModeState);
  209. if (offsetDelta !== 0) {
  210. for (const token of nestedResult.tokens) {
  211. this._tokens.push(new languages.Token(token.offset + offsetDelta, token.type, token.language));
  212. }
  213. }
  214. else {
  215. this._tokens = this._tokens.concat(nestedResult.tokens);
  216. }
  217. this._lastTokenType = null;
  218. this._lastTokenLanguage = null;
  219. this._languageId = null;
  220. return nestedResult.endState;
  221. }
  222. finalize(endState) {
  223. return new languages.TokenizationResult(this._tokens, endState);
  224. }
  225. }
  226. class MonarchModernTokensCollector {
  227. constructor(languageService, theme) {
  228. this._languageService = languageService;
  229. this._theme = theme;
  230. this._prependTokens = null;
  231. this._tokens = [];
  232. this._currentLanguageId = 0 /* LanguageId.Null */;
  233. this._lastTokenMetadata = 0;
  234. }
  235. enterLanguage(languageId) {
  236. this._currentLanguageId = this._languageService.languageIdCodec.encodeLanguageId(languageId);
  237. }
  238. emit(startOffset, type) {
  239. const metadata = this._theme.match(this._currentLanguageId, type);
  240. if (this._lastTokenMetadata === metadata) {
  241. return;
  242. }
  243. this._lastTokenMetadata = metadata;
  244. this._tokens.push(startOffset);
  245. this._tokens.push(metadata);
  246. }
  247. static _merge(a, b, c) {
  248. const aLen = (a !== null ? a.length : 0);
  249. const bLen = b.length;
  250. const cLen = (c !== null ? c.length : 0);
  251. if (aLen === 0 && bLen === 0 && cLen === 0) {
  252. return new Uint32Array(0);
  253. }
  254. if (aLen === 0 && bLen === 0) {
  255. return c;
  256. }
  257. if (bLen === 0 && cLen === 0) {
  258. return a;
  259. }
  260. const result = new Uint32Array(aLen + bLen + cLen);
  261. if (a !== null) {
  262. result.set(a);
  263. }
  264. for (let i = 0; i < bLen; i++) {
  265. result[aLen + i] = b[i];
  266. }
  267. if (c !== null) {
  268. result.set(c, aLen + bLen);
  269. }
  270. return result;
  271. }
  272. nestedLanguageTokenize(embeddedLanguageLine, hasEOL, embeddedLanguageData, offsetDelta) {
  273. const nestedLanguageId = embeddedLanguageData.languageId;
  274. const embeddedModeState = embeddedLanguageData.state;
  275. const nestedLanguageTokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
  276. if (!nestedLanguageTokenizationSupport) {
  277. this.enterLanguage(nestedLanguageId);
  278. this.emit(offsetDelta, '');
  279. return embeddedModeState;
  280. }
  281. const nestedResult = nestedLanguageTokenizationSupport.tokenizeEncoded(embeddedLanguageLine, hasEOL, embeddedModeState);
  282. if (offsetDelta !== 0) {
  283. for (let i = 0, len = nestedResult.tokens.length; i < len; i += 2) {
  284. nestedResult.tokens[i] += offsetDelta;
  285. }
  286. }
  287. this._prependTokens = MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, nestedResult.tokens);
  288. this._tokens = [];
  289. this._currentLanguageId = 0;
  290. this._lastTokenMetadata = 0;
  291. return nestedResult.endState;
  292. }
  293. finalize(endState) {
  294. return new languages.EncodedTokenizationResult(MonarchModernTokensCollector._merge(this._prependTokens, this._tokens, null), endState);
  295. }
  296. }
  297. let MonarchTokenizer = class MonarchTokenizer {
  298. constructor(languageService, standaloneThemeService, languageId, lexer, _configurationService) {
  299. this._configurationService = _configurationService;
  300. this._languageService = languageService;
  301. this._standaloneThemeService = standaloneThemeService;
  302. this._languageId = languageId;
  303. this._lexer = lexer;
  304. this._embeddedLanguages = Object.create(null);
  305. this.embeddedLoaded = Promise.resolve(undefined);
  306. // Set up listening for embedded modes
  307. let emitting = false;
  308. this._tokenizationRegistryListener = languages.TokenizationRegistry.onDidChange((e) => {
  309. if (emitting) {
  310. return;
  311. }
  312. let isOneOfMyEmbeddedModes = false;
  313. for (let i = 0, len = e.changedLanguages.length; i < len; i++) {
  314. const language = e.changedLanguages[i];
  315. if (this._embeddedLanguages[language]) {
  316. isOneOfMyEmbeddedModes = true;
  317. break;
  318. }
  319. }
  320. if (isOneOfMyEmbeddedModes) {
  321. emitting = true;
  322. languages.TokenizationRegistry.fire([this._languageId]);
  323. emitting = false;
  324. }
  325. });
  326. this._maxTokenizationLineLength = this._configurationService.getValue('editor.maxTokenizationLineLength', {
  327. overrideIdentifier: this._languageId
  328. });
  329. this._configurationService.onDidChangeConfiguration(e => {
  330. if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {
  331. this._maxTokenizationLineLength = this._configurationService.getValue('editor.maxTokenizationLineLength', {
  332. overrideIdentifier: this._languageId
  333. });
  334. }
  335. });
  336. }
  337. dispose() {
  338. this._tokenizationRegistryListener.dispose();
  339. }
  340. getLoadStatus() {
  341. const promises = [];
  342. for (const nestedLanguageId in this._embeddedLanguages) {
  343. const tokenizationSupport = languages.TokenizationRegistry.get(nestedLanguageId);
  344. if (tokenizationSupport) {
  345. // The nested language is already loaded
  346. if (tokenizationSupport instanceof MonarchTokenizer) {
  347. const nestedModeStatus = tokenizationSupport.getLoadStatus();
  348. if (nestedModeStatus.loaded === false) {
  349. promises.push(nestedModeStatus.promise);
  350. }
  351. }
  352. continue;
  353. }
  354. if (!languages.TokenizationRegistry.isResolved(nestedLanguageId)) {
  355. // The nested language is in the process of being loaded
  356. promises.push(languages.TokenizationRegistry.getOrCreate(nestedLanguageId));
  357. }
  358. }
  359. if (promises.length === 0) {
  360. return {
  361. loaded: true
  362. };
  363. }
  364. return {
  365. loaded: false,
  366. promise: Promise.all(promises).then(_ => undefined)
  367. };
  368. }
  369. getInitialState() {
  370. const rootState = MonarchStackElementFactory.create(null, this._lexer.start);
  371. return MonarchLineStateFactory.create(rootState, null);
  372. }
  373. tokenize(line, hasEOL, lineState) {
  374. if (line.length >= this._maxTokenizationLineLength) {
  375. return nullTokenize(this._languageId, lineState);
  376. }
  377. const tokensCollector = new MonarchClassicTokensCollector();
  378. const endLineState = this._tokenize(line, hasEOL, lineState, tokensCollector);
  379. return tokensCollector.finalize(endLineState);
  380. }
  381. tokenizeEncoded(line, hasEOL, lineState) {
  382. if (line.length >= this._maxTokenizationLineLength) {
  383. return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);
  384. }
  385. const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);
  386. const endLineState = this._tokenize(line, hasEOL, lineState, tokensCollector);
  387. return tokensCollector.finalize(endLineState);
  388. }
  389. _tokenize(line, hasEOL, lineState, collector) {
  390. if (lineState.embeddedLanguageData) {
  391. return this._nestedTokenize(line, hasEOL, lineState, 0, collector);
  392. }
  393. else {
  394. return this._myTokenize(line, hasEOL, lineState, 0, collector);
  395. }
  396. }
  397. _findLeavingNestedLanguageOffset(line, state) {
  398. let rules = this._lexer.tokenizer[state.stack.state];
  399. if (!rules) {
  400. rules = monarchCommon.findRules(this._lexer, state.stack.state); // do parent matching
  401. if (!rules) {
  402. throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state.stack.state);
  403. }
  404. }
  405. let popOffset = -1;
  406. let hasEmbeddedPopRule = false;
  407. for (const rule of rules) {
  408. if (!monarchCommon.isIAction(rule.action) || rule.action.nextEmbedded !== '@pop') {
  409. continue;
  410. }
  411. hasEmbeddedPopRule = true;
  412. let regex = rule.regex;
  413. const regexSource = rule.regex.source;
  414. if (regexSource.substr(0, 4) === '^(?:' && regexSource.substr(regexSource.length - 1, 1) === ')') {
  415. const flags = (regex.ignoreCase ? 'i' : '') + (regex.unicode ? 'u' : '');
  416. regex = new RegExp(regexSource.substr(4, regexSource.length - 5), flags);
  417. }
  418. const result = line.search(regex);
  419. if (result === -1 || (result !== 0 && rule.matchOnlyAtLineStart)) {
  420. continue;
  421. }
  422. if (popOffset === -1 || result < popOffset) {
  423. popOffset = result;
  424. }
  425. }
  426. if (!hasEmbeddedPopRule) {
  427. throw monarchCommon.createError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state.stack.state);
  428. }
  429. return popOffset;
  430. }
  431. _nestedTokenize(line, hasEOL, lineState, offsetDelta, tokensCollector) {
  432. const popOffset = this._findLeavingNestedLanguageOffset(line, lineState);
  433. if (popOffset === -1) {
  434. // tokenization will not leave nested language
  435. const nestedEndState = tokensCollector.nestedLanguageTokenize(line, hasEOL, lineState.embeddedLanguageData, offsetDelta);
  436. return MonarchLineStateFactory.create(lineState.stack, new EmbeddedLanguageData(lineState.embeddedLanguageData.languageId, nestedEndState));
  437. }
  438. const nestedLanguageLine = line.substring(0, popOffset);
  439. if (nestedLanguageLine.length > 0) {
  440. // tokenize with the nested language
  441. tokensCollector.nestedLanguageTokenize(nestedLanguageLine, false, lineState.embeddedLanguageData, offsetDelta);
  442. }
  443. const restOfTheLine = line.substring(popOffset);
  444. return this._myTokenize(restOfTheLine, hasEOL, lineState, offsetDelta + popOffset, tokensCollector);
  445. }
  446. _safeRuleName(rule) {
  447. if (rule) {
  448. return rule.name;
  449. }
  450. return '(unknown)';
  451. }
  452. _myTokenize(lineWithoutLF, hasEOL, lineState, offsetDelta, tokensCollector) {
  453. tokensCollector.enterLanguage(this._languageId);
  454. const lineWithoutLFLength = lineWithoutLF.length;
  455. const line = (hasEOL && this._lexer.includeLF ? lineWithoutLF + '\n' : lineWithoutLF);
  456. const lineLength = line.length;
  457. let embeddedLanguageData = lineState.embeddedLanguageData;
  458. let stack = lineState.stack;
  459. let pos = 0;
  460. let groupMatching = null;
  461. // See https://github.com/microsoft/monaco-editor/issues/1235
  462. // Evaluate rules at least once for an empty line
  463. let forceEvaluation = true;
  464. while (forceEvaluation || pos < lineLength) {
  465. const pos0 = pos;
  466. const stackLen0 = stack.depth;
  467. const groupLen0 = groupMatching ? groupMatching.groups.length : 0;
  468. const state = stack.state;
  469. let matches = null;
  470. let matched = null;
  471. let action = null;
  472. let rule = null;
  473. let enteringEmbeddedLanguage = null;
  474. // check if we need to process group matches first
  475. if (groupMatching) {
  476. matches = groupMatching.matches;
  477. const groupEntry = groupMatching.groups.shift();
  478. matched = groupEntry.matched;
  479. action = groupEntry.action;
  480. rule = groupMatching.rule;
  481. // cleanup if necessary
  482. if (groupMatching.groups.length === 0) {
  483. groupMatching = null;
  484. }
  485. }
  486. else {
  487. // otherwise we match on the token stream
  488. if (!forceEvaluation && pos >= lineLength) {
  489. // nothing to do
  490. break;
  491. }
  492. forceEvaluation = false;
  493. // get the rules for this state
  494. let rules = this._lexer.tokenizer[state];
  495. if (!rules) {
  496. rules = monarchCommon.findRules(this._lexer, state); // do parent matching
  497. if (!rules) {
  498. throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state);
  499. }
  500. }
  501. // try each rule until we match
  502. const restOfLine = line.substr(pos);
  503. for (const rule of rules) {
  504. if (pos === 0 || !rule.matchOnlyAtLineStart) {
  505. matches = restOfLine.match(rule.regex);
  506. if (matches) {
  507. matched = matches[0];
  508. action = rule.action;
  509. break;
  510. }
  511. }
  512. }
  513. }
  514. // We matched 'rule' with 'matches' and 'action'
  515. if (!matches) {
  516. matches = [''];
  517. matched = '';
  518. }
  519. if (!action) {
  520. // bad: we didn't match anything, and there is no action to take
  521. // we need to advance the stream or we get progress trouble
  522. if (pos < lineLength) {
  523. matches = [line.charAt(pos)];
  524. matched = matches[0];
  525. }
  526. action = this._lexer.defaultToken;
  527. }
  528. if (matched === null) {
  529. // should never happen, needed for strict null checking
  530. break;
  531. }
  532. // advance stream
  533. pos += matched.length;
  534. // maybe call action function (used for 'cases')
  535. while (monarchCommon.isFuzzyAction(action) && monarchCommon.isIAction(action) && action.test) {
  536. action = action.test(matched, matches, state, pos === lineLength);
  537. }
  538. let result = null;
  539. // set the result: either a string or an array of actions
  540. if (typeof action === 'string' || Array.isArray(action)) {
  541. result = action;
  542. }
  543. else if (action.group) {
  544. result = action.group;
  545. }
  546. else if (action.token !== null && action.token !== undefined) {
  547. // do $n replacements?
  548. if (action.tokenSubst) {
  549. result = monarchCommon.substituteMatches(this._lexer, action.token, matched, matches, state);
  550. }
  551. else {
  552. result = action.token;
  553. }
  554. // enter embedded language?
  555. if (action.nextEmbedded) {
  556. if (action.nextEmbedded === '@pop') {
  557. if (!embeddedLanguageData) {
  558. throw monarchCommon.createError(this._lexer, 'cannot pop embedded language if not inside one');
  559. }
  560. embeddedLanguageData = null;
  561. }
  562. else if (embeddedLanguageData) {
  563. throw monarchCommon.createError(this._lexer, 'cannot enter embedded language from within an embedded language');
  564. }
  565. else {
  566. enteringEmbeddedLanguage = monarchCommon.substituteMatches(this._lexer, action.nextEmbedded, matched, matches, state);
  567. }
  568. }
  569. // state transformations
  570. if (action.goBack) { // back up the stream..
  571. pos = Math.max(0, pos - action.goBack);
  572. }
  573. if (action.switchTo && typeof action.switchTo === 'string') {
  574. let nextState = monarchCommon.substituteMatches(this._lexer, action.switchTo, matched, matches, state); // switch state without a push...
  575. if (nextState[0] === '@') {
  576. nextState = nextState.substr(1); // peel off starting '@'
  577. }
  578. if (!monarchCommon.findRules(this._lexer, nextState)) {
  579. throw monarchCommon.createError(this._lexer, 'trying to switch to a state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
  580. }
  581. else {
  582. stack = stack.switchTo(nextState);
  583. }
  584. }
  585. else if (action.transform && typeof action.transform === 'function') {
  586. throw monarchCommon.createError(this._lexer, 'action.transform not supported');
  587. }
  588. else if (action.next) {
  589. if (action.next === '@push') {
  590. if (stack.depth >= this._lexer.maxStack) {
  591. throw monarchCommon.createError(this._lexer, 'maximum tokenizer stack size reached: [' +
  592. stack.state + ',' + stack.parent.state + ',...]');
  593. }
  594. else {
  595. stack = stack.push(state);
  596. }
  597. }
  598. else if (action.next === '@pop') {
  599. if (stack.depth <= 1) {
  600. throw monarchCommon.createError(this._lexer, 'trying to pop an empty stack in rule: ' + this._safeRuleName(rule));
  601. }
  602. else {
  603. stack = stack.pop();
  604. }
  605. }
  606. else if (action.next === '@popall') {
  607. stack = stack.popall();
  608. }
  609. else {
  610. let nextState = monarchCommon.substituteMatches(this._lexer, action.next, matched, matches, state);
  611. if (nextState[0] === '@') {
  612. nextState = nextState.substr(1); // peel off starting '@'
  613. }
  614. if (!monarchCommon.findRules(this._lexer, nextState)) {
  615. throw monarchCommon.createError(this._lexer, 'trying to set a next state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
  616. }
  617. else {
  618. stack = stack.push(nextState);
  619. }
  620. }
  621. }
  622. if (action.log && typeof (action.log) === 'string') {
  623. monarchCommon.log(this._lexer, this._lexer.languageId + ': ' + monarchCommon.substituteMatches(this._lexer, action.log, matched, matches, state));
  624. }
  625. }
  626. // check result
  627. if (result === null) {
  628. throw monarchCommon.createError(this._lexer, 'lexer rule has no well-defined action in rule: ' + this._safeRuleName(rule));
  629. }
  630. const computeNewStateForEmbeddedLanguage = (enteringEmbeddedLanguage) => {
  631. // support language names, mime types, and language ids
  632. const languageId = (this._languageService.getLanguageIdByLanguageName(enteringEmbeddedLanguage)
  633. || this._languageService.getLanguageIdByMimeType(enteringEmbeddedLanguage)
  634. || enteringEmbeddedLanguage);
  635. const embeddedLanguageData = this._getNestedEmbeddedLanguageData(languageId);
  636. if (pos < lineLength) {
  637. // there is content from the embedded language on this line
  638. const restOfLine = lineWithoutLF.substr(pos);
  639. return this._nestedTokenize(restOfLine, hasEOL, MonarchLineStateFactory.create(stack, embeddedLanguageData), offsetDelta + pos, tokensCollector);
  640. }
  641. else {
  642. return MonarchLineStateFactory.create(stack, embeddedLanguageData);
  643. }
  644. };
  645. // is the result a group match?
  646. if (Array.isArray(result)) {
  647. if (groupMatching && groupMatching.groups.length > 0) {
  648. throw monarchCommon.createError(this._lexer, 'groups cannot be nested: ' + this._safeRuleName(rule));
  649. }
  650. if (matches.length !== result.length + 1) {
  651. throw monarchCommon.createError(this._lexer, 'matched number of groups does not match the number of actions in rule: ' + this._safeRuleName(rule));
  652. }
  653. let totalLen = 0;
  654. for (let i = 1; i < matches.length; i++) {
  655. totalLen += matches[i].length;
  656. }
  657. if (totalLen !== matched.length) {
  658. throw monarchCommon.createError(this._lexer, 'with groups, all characters should be matched in consecutive groups in rule: ' + this._safeRuleName(rule));
  659. }
  660. groupMatching = {
  661. rule: rule,
  662. matches: matches,
  663. groups: []
  664. };
  665. for (let i = 0; i < result.length; i++) {
  666. groupMatching.groups[i] = {
  667. action: result[i],
  668. matched: matches[i + 1]
  669. };
  670. }
  671. pos -= matched.length;
  672. // call recursively to initiate first result match
  673. continue;
  674. }
  675. else {
  676. // regular result
  677. // check for '@rematch'
  678. if (result === '@rematch') {
  679. pos -= matched.length;
  680. matched = ''; // better set the next state too..
  681. matches = null;
  682. result = '';
  683. // Even though `@rematch` was specified, if `nextEmbedded` also specified,
  684. // a state transition should occur.
  685. if (enteringEmbeddedLanguage !== null) {
  686. return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);
  687. }
  688. }
  689. // check progress
  690. if (matched.length === 0) {
  691. if (lineLength === 0 || stackLen0 !== stack.depth || state !== stack.state || (!groupMatching ? 0 : groupMatching.groups.length) !== groupLen0) {
  692. continue;
  693. }
  694. else {
  695. throw monarchCommon.createError(this._lexer, 'no progress in tokenizer in rule: ' + this._safeRuleName(rule));
  696. }
  697. }
  698. // return the result (and check for brace matching)
  699. // todo: for efficiency we could pre-sanitize tokenPostfix and substitutions
  700. let tokenType = null;
  701. if (monarchCommon.isString(result) && result.indexOf('@brackets') === 0) {
  702. const rest = result.substr('@brackets'.length);
  703. const bracket = findBracket(this._lexer, matched);
  704. if (!bracket) {
  705. throw monarchCommon.createError(this._lexer, '@brackets token returned but no bracket defined as: ' + matched);
  706. }
  707. tokenType = monarchCommon.sanitize(bracket.token + rest);
  708. }
  709. else {
  710. const token = (result === '' ? '' : result + this._lexer.tokenPostfix);
  711. tokenType = monarchCommon.sanitize(token);
  712. }
  713. if (pos0 < lineWithoutLFLength) {
  714. tokensCollector.emit(pos0 + offsetDelta, tokenType);
  715. }
  716. }
  717. if (enteringEmbeddedLanguage !== null) {
  718. return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);
  719. }
  720. }
  721. return MonarchLineStateFactory.create(stack, embeddedLanguageData);
  722. }
  723. _getNestedEmbeddedLanguageData(languageId) {
  724. if (!this._languageService.isRegisteredLanguageId(languageId)) {
  725. return new EmbeddedLanguageData(languageId, NullState);
  726. }
  727. if (languageId !== this._languageId) {
  728. // Fire language loading event
  729. languages.TokenizationRegistry.getOrCreate(languageId);
  730. this._embeddedLanguages[languageId] = true;
  731. }
  732. const tokenizationSupport = languages.TokenizationRegistry.get(languageId);
  733. if (tokenizationSupport) {
  734. return new EmbeddedLanguageData(languageId, tokenizationSupport.getInitialState());
  735. }
  736. return new EmbeddedLanguageData(languageId, NullState);
  737. }
  738. };
  739. MonarchTokenizer = __decorate([
  740. __param(4, IConfigurationService)
  741. ], MonarchTokenizer);
  742. export { MonarchTokenizer };
  743. /**
  744. * Searches for a bracket in the 'brackets' attribute that matches the input.
  745. */
  746. function findBracket(lexer, matched) {
  747. if (!matched) {
  748. return null;
  749. }
  750. matched = monarchCommon.fixCase(lexer, matched);
  751. const brackets = lexer.brackets;
  752. for (const bracket of brackets) {
  753. if (bracket.open === matched) {
  754. return { token: bracket.token, bracketType: 1 /* monarchCommon.MonarchBracket.Open */ };
  755. }
  756. else if (bracket.close === matched) {
  757. return { token: bracket.token, bracketType: -1 /* monarchCommon.MonarchBracket.Close */ };
  758. }
  759. }
  760. return null;
  761. }