9583e8fa9c2d0416d2e1f6fb14afcc782a5ff0cdeadf843012e5ca0b9787717747d8a542b5599f06cdac9f920de7ac83fa7c02488c665a9179b19f01b90197 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import { Iterable } from '../../../base/common/iterator.js';
  6. import { LinkedList } from '../../../base/common/linkedList.js';
  7. export const USUAL_WORD_SEPARATORS = '`~!@#$%^&*()-=+[{]}\\|;:\'",.<>/?';
  8. /**
  9. * Create a word definition regular expression based on default word separators.
  10. * Optionally provide allowed separators that should be included in words.
  11. *
  12. * The default would look like this:
  13. * /(-?\d*\.\d\w*)|([^\`\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s]+)/g
  14. */
  15. function createWordRegExp(allowInWords = '') {
  16. let source = '(-?\\d*\\.\\d\\w*)|([^';
  17. for (const sep of USUAL_WORD_SEPARATORS) {
  18. if (allowInWords.indexOf(sep) >= 0) {
  19. continue;
  20. }
  21. source += '\\' + sep;
  22. }
  23. source += '\\s]+)';
  24. return new RegExp(source, 'g');
  25. }
  26. // catches numbers (including floating numbers) in the first group, and alphanum in the second
  27. export const DEFAULT_WORD_REGEXP = createWordRegExp();
  28. export function ensureValidWordDefinition(wordDefinition) {
  29. let result = DEFAULT_WORD_REGEXP;
  30. if (wordDefinition && (wordDefinition instanceof RegExp)) {
  31. if (!wordDefinition.global) {
  32. let flags = 'g';
  33. if (wordDefinition.ignoreCase) {
  34. flags += 'i';
  35. }
  36. if (wordDefinition.multiline) {
  37. flags += 'm';
  38. }
  39. if (wordDefinition.unicode) {
  40. flags += 'u';
  41. }
  42. result = new RegExp(wordDefinition.source, flags);
  43. }
  44. else {
  45. result = wordDefinition;
  46. }
  47. }
  48. result.lastIndex = 0;
  49. return result;
  50. }
  51. const _defaultConfig = new LinkedList();
  52. _defaultConfig.unshift({
  53. maxLen: 1000,
  54. windowSize: 15,
  55. timeBudget: 150
  56. });
  57. export function getWordAtText(column, wordDefinition, text, textOffset, config) {
  58. if (!config) {
  59. config = Iterable.first(_defaultConfig);
  60. }
  61. if (text.length > config.maxLen) {
  62. // don't throw strings that long at the regexp
  63. // but use a sub-string in which a word must occur
  64. let start = column - config.maxLen / 2;
  65. if (start < 0) {
  66. start = 0;
  67. }
  68. else {
  69. textOffset += start;
  70. }
  71. text = text.substring(start, column + config.maxLen / 2);
  72. return getWordAtText(column, wordDefinition, text, textOffset, config);
  73. }
  74. const t1 = Date.now();
  75. const pos = column - 1 - textOffset;
  76. let prevRegexIndex = -1;
  77. let match = null;
  78. for (let i = 1;; i++) {
  79. // check time budget
  80. if (Date.now() - t1 >= config.timeBudget) {
  81. break;
  82. }
  83. // reset the index at which the regexp should start matching, also know where it
  84. // should stop so that subsequent search don't repeat previous searches
  85. const regexIndex = pos - config.windowSize * i;
  86. wordDefinition.lastIndex = Math.max(0, regexIndex);
  87. const thisMatch = _findRegexMatchEnclosingPosition(wordDefinition, text, pos, prevRegexIndex);
  88. if (!thisMatch && match) {
  89. // stop: we have something
  90. break;
  91. }
  92. match = thisMatch;
  93. // stop: searched at start
  94. if (regexIndex <= 0) {
  95. break;
  96. }
  97. prevRegexIndex = regexIndex;
  98. }
  99. if (match) {
  100. const result = {
  101. word: match[0],
  102. startColumn: textOffset + 1 + match.index,
  103. endColumn: textOffset + 1 + match.index + match[0].length
  104. };
  105. wordDefinition.lastIndex = 0;
  106. return result;
  107. }
  108. return null;
  109. }
  110. function _findRegexMatchEnclosingPosition(wordDefinition, text, pos, stopPos) {
  111. let match;
  112. while (match = wordDefinition.exec(text)) {
  113. const matchIndex = match.index || 0;
  114. if (matchIndex <= pos && wordDefinition.lastIndex >= pos) {
  115. return match;
  116. }
  117. else if (stopPos > 0 && matchIndex > stopPos) {
  118. return null;
  119. }
  120. }
  121. return null;
  122. }