fd1465a6b5daf0ae3389bd80977da9ec0b4a354ca7d032030a5447ac9f676fba37f0587eb4501615bbab626835ded2c497c4663eaf486e89b24417799b6830 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import * as strings from '../../../base/common/strings.js';
  6. import { getMapForWordSeparators } from '../core/wordCharacterClassifier.js';
  7. import { Position } from '../core/position.js';
  8. import { Range } from '../core/range.js';
  9. import { FindMatch, SearchData } from '../model.js';
  10. const LIMIT_FIND_COUNT = 999;
  11. export class SearchParams {
  12. constructor(searchString, isRegex, matchCase, wordSeparators) {
  13. this.searchString = searchString;
  14. this.isRegex = isRegex;
  15. this.matchCase = matchCase;
  16. this.wordSeparators = wordSeparators;
  17. }
  18. parseSearchRequest() {
  19. if (this.searchString === '') {
  20. return null;
  21. }
  22. // Try to create a RegExp out of the params
  23. let multiline;
  24. if (this.isRegex) {
  25. multiline = isMultilineRegexSource(this.searchString);
  26. }
  27. else {
  28. multiline = (this.searchString.indexOf('\n') >= 0);
  29. }
  30. let regex = null;
  31. try {
  32. regex = strings.createRegExp(this.searchString, this.isRegex, {
  33. matchCase: this.matchCase,
  34. wholeWord: false,
  35. multiline: multiline,
  36. global: true,
  37. unicode: true
  38. });
  39. }
  40. catch (err) {
  41. return null;
  42. }
  43. if (!regex) {
  44. return null;
  45. }
  46. let canUseSimpleSearch = (!this.isRegex && !multiline);
  47. if (canUseSimpleSearch && this.searchString.toLowerCase() !== this.searchString.toUpperCase()) {
  48. // casing might make a difference
  49. canUseSimpleSearch = this.matchCase;
  50. }
  51. return new SearchData(regex, this.wordSeparators ? getMapForWordSeparators(this.wordSeparators) : null, canUseSimpleSearch ? this.searchString : null);
  52. }
  53. }
  54. export function isMultilineRegexSource(searchString) {
  55. if (!searchString || searchString.length === 0) {
  56. return false;
  57. }
  58. for (let i = 0, len = searchString.length; i < len; i++) {
  59. const chCode = searchString.charCodeAt(i);
  60. if (chCode === 10 /* CharCode.LineFeed */) {
  61. return true;
  62. }
  63. if (chCode === 92 /* CharCode.Backslash */) {
  64. // move to next char
  65. i++;
  66. if (i >= len) {
  67. // string ends with a \
  68. break;
  69. }
  70. const nextChCode = searchString.charCodeAt(i);
  71. if (nextChCode === 110 /* CharCode.n */ || nextChCode === 114 /* CharCode.r */ || nextChCode === 87 /* CharCode.W */) {
  72. return true;
  73. }
  74. }
  75. }
  76. return false;
  77. }
  78. export function createFindMatch(range, rawMatches, captureMatches) {
  79. if (!captureMatches) {
  80. return new FindMatch(range, null);
  81. }
  82. const matches = [];
  83. for (let i = 0, len = rawMatches.length; i < len; i++) {
  84. matches[i] = rawMatches[i];
  85. }
  86. return new FindMatch(range, matches);
  87. }
  88. class LineFeedCounter {
  89. constructor(text) {
  90. const lineFeedsOffsets = [];
  91. let lineFeedsOffsetsLen = 0;
  92. for (let i = 0, textLen = text.length; i < textLen; i++) {
  93. if (text.charCodeAt(i) === 10 /* CharCode.LineFeed */) {
  94. lineFeedsOffsets[lineFeedsOffsetsLen++] = i;
  95. }
  96. }
  97. this._lineFeedsOffsets = lineFeedsOffsets;
  98. }
  99. findLineFeedCountBeforeOffset(offset) {
  100. const lineFeedsOffsets = this._lineFeedsOffsets;
  101. let min = 0;
  102. let max = lineFeedsOffsets.length - 1;
  103. if (max === -1) {
  104. // no line feeds
  105. return 0;
  106. }
  107. if (offset <= lineFeedsOffsets[0]) {
  108. // before first line feed
  109. return 0;
  110. }
  111. while (min < max) {
  112. const mid = min + ((max - min) / 2 >> 0);
  113. if (lineFeedsOffsets[mid] >= offset) {
  114. max = mid - 1;
  115. }
  116. else {
  117. if (lineFeedsOffsets[mid + 1] >= offset) {
  118. // bingo!
  119. min = mid;
  120. max = mid;
  121. }
  122. else {
  123. min = mid + 1;
  124. }
  125. }
  126. }
  127. return min + 1;
  128. }
  129. }
  130. export class TextModelSearch {
  131. static findMatches(model, searchParams, searchRange, captureMatches, limitResultCount) {
  132. const searchData = searchParams.parseSearchRequest();
  133. if (!searchData) {
  134. return [];
  135. }
  136. if (searchData.regex.multiline) {
  137. return this._doFindMatchesMultiline(model, searchRange, new Searcher(searchData.wordSeparators, searchData.regex), captureMatches, limitResultCount);
  138. }
  139. return this._doFindMatchesLineByLine(model, searchRange, searchData, captureMatches, limitResultCount);
  140. }
  141. /**
  142. * Multiline search always executes on the lines concatenated with \n.
  143. * We must therefore compensate for the count of \n in case the model is CRLF
  144. */
  145. static _getMultilineMatchRange(model, deltaOffset, text, lfCounter, matchIndex, match0) {
  146. let startOffset;
  147. let lineFeedCountBeforeMatch = 0;
  148. if (lfCounter) {
  149. lineFeedCountBeforeMatch = lfCounter.findLineFeedCountBeforeOffset(matchIndex);
  150. startOffset = deltaOffset + matchIndex + lineFeedCountBeforeMatch /* add as many \r as there were \n */;
  151. }
  152. else {
  153. startOffset = deltaOffset + matchIndex;
  154. }
  155. let endOffset;
  156. if (lfCounter) {
  157. const lineFeedCountBeforeEndOfMatch = lfCounter.findLineFeedCountBeforeOffset(matchIndex + match0.length);
  158. const lineFeedCountInMatch = lineFeedCountBeforeEndOfMatch - lineFeedCountBeforeMatch;
  159. endOffset = startOffset + match0.length + lineFeedCountInMatch /* add as many \r as there were \n */;
  160. }
  161. else {
  162. endOffset = startOffset + match0.length;
  163. }
  164. const startPosition = model.getPositionAt(startOffset);
  165. const endPosition = model.getPositionAt(endOffset);
  166. return new Range(startPosition.lineNumber, startPosition.column, endPosition.lineNumber, endPosition.column);
  167. }
  168. static _doFindMatchesMultiline(model, searchRange, searcher, captureMatches, limitResultCount) {
  169. const deltaOffset = model.getOffsetAt(searchRange.getStartPosition());
  170. // We always execute multiline search over the lines joined with \n
  171. // This makes it that \n will match the EOL for both CRLF and LF models
  172. // We compensate for offset errors in `_getMultilineMatchRange`
  173. const text = model.getValueInRange(searchRange, 1 /* EndOfLinePreference.LF */);
  174. const lfCounter = (model.getEOL() === '\r\n' ? new LineFeedCounter(text) : null);
  175. const result = [];
  176. let counter = 0;
  177. let m;
  178. searcher.reset(0);
  179. while ((m = searcher.next(text))) {
  180. result[counter++] = createFindMatch(this._getMultilineMatchRange(model, deltaOffset, text, lfCounter, m.index, m[0]), m, captureMatches);
  181. if (counter >= limitResultCount) {
  182. return result;
  183. }
  184. }
  185. return result;
  186. }
  187. static _doFindMatchesLineByLine(model, searchRange, searchData, captureMatches, limitResultCount) {
  188. const result = [];
  189. let resultLen = 0;
  190. // Early case for a search range that starts & stops on the same line number
  191. if (searchRange.startLineNumber === searchRange.endLineNumber) {
  192. const text = model.getLineContent(searchRange.startLineNumber).substring(searchRange.startColumn - 1, searchRange.endColumn - 1);
  193. resultLen = this._findMatchesInLine(searchData, text, searchRange.startLineNumber, searchRange.startColumn - 1, resultLen, result, captureMatches, limitResultCount);
  194. return result;
  195. }
  196. // Collect results from first line
  197. const text = model.getLineContent(searchRange.startLineNumber).substring(searchRange.startColumn - 1);
  198. resultLen = this._findMatchesInLine(searchData, text, searchRange.startLineNumber, searchRange.startColumn - 1, resultLen, result, captureMatches, limitResultCount);
  199. // Collect results from middle lines
  200. for (let lineNumber = searchRange.startLineNumber + 1; lineNumber < searchRange.endLineNumber && resultLen < limitResultCount; lineNumber++) {
  201. resultLen = this._findMatchesInLine(searchData, model.getLineContent(lineNumber), lineNumber, 0, resultLen, result, captureMatches, limitResultCount);
  202. }
  203. // Collect results from last line
  204. if (resultLen < limitResultCount) {
  205. const text = model.getLineContent(searchRange.endLineNumber).substring(0, searchRange.endColumn - 1);
  206. resultLen = this._findMatchesInLine(searchData, text, searchRange.endLineNumber, 0, resultLen, result, captureMatches, limitResultCount);
  207. }
  208. return result;
  209. }
  210. static _findMatchesInLine(searchData, text, lineNumber, deltaOffset, resultLen, result, captureMatches, limitResultCount) {
  211. const wordSeparators = searchData.wordSeparators;
  212. if (!captureMatches && searchData.simpleSearch) {
  213. const searchString = searchData.simpleSearch;
  214. const searchStringLen = searchString.length;
  215. const textLength = text.length;
  216. let lastMatchIndex = -searchStringLen;
  217. while ((lastMatchIndex = text.indexOf(searchString, lastMatchIndex + searchStringLen)) !== -1) {
  218. if (!wordSeparators || isValidMatch(wordSeparators, text, textLength, lastMatchIndex, searchStringLen)) {
  219. result[resultLen++] = new FindMatch(new Range(lineNumber, lastMatchIndex + 1 + deltaOffset, lineNumber, lastMatchIndex + 1 + searchStringLen + deltaOffset), null);
  220. if (resultLen >= limitResultCount) {
  221. return resultLen;
  222. }
  223. }
  224. }
  225. return resultLen;
  226. }
  227. const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
  228. let m;
  229. // Reset regex to search from the beginning
  230. searcher.reset(0);
  231. do {
  232. m = searcher.next(text);
  233. if (m) {
  234. result[resultLen++] = createFindMatch(new Range(lineNumber, m.index + 1 + deltaOffset, lineNumber, m.index + 1 + m[0].length + deltaOffset), m, captureMatches);
  235. if (resultLen >= limitResultCount) {
  236. return resultLen;
  237. }
  238. }
  239. } while (m);
  240. return resultLen;
  241. }
  242. static findNextMatch(model, searchParams, searchStart, captureMatches) {
  243. const searchData = searchParams.parseSearchRequest();
  244. if (!searchData) {
  245. return null;
  246. }
  247. const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
  248. if (searchData.regex.multiline) {
  249. return this._doFindNextMatchMultiline(model, searchStart, searcher, captureMatches);
  250. }
  251. return this._doFindNextMatchLineByLine(model, searchStart, searcher, captureMatches);
  252. }
  253. static _doFindNextMatchMultiline(model, searchStart, searcher, captureMatches) {
  254. const searchTextStart = new Position(searchStart.lineNumber, 1);
  255. const deltaOffset = model.getOffsetAt(searchTextStart);
  256. const lineCount = model.getLineCount();
  257. // We always execute multiline search over the lines joined with \n
  258. // This makes it that \n will match the EOL for both CRLF and LF models
  259. // We compensate for offset errors in `_getMultilineMatchRange`
  260. const text = model.getValueInRange(new Range(searchTextStart.lineNumber, searchTextStart.column, lineCount, model.getLineMaxColumn(lineCount)), 1 /* EndOfLinePreference.LF */);
  261. const lfCounter = (model.getEOL() === '\r\n' ? new LineFeedCounter(text) : null);
  262. searcher.reset(searchStart.column - 1);
  263. const m = searcher.next(text);
  264. if (m) {
  265. return createFindMatch(this._getMultilineMatchRange(model, deltaOffset, text, lfCounter, m.index, m[0]), m, captureMatches);
  266. }
  267. if (searchStart.lineNumber !== 1 || searchStart.column !== 1) {
  268. // Try again from the top
  269. return this._doFindNextMatchMultiline(model, new Position(1, 1), searcher, captureMatches);
  270. }
  271. return null;
  272. }
  273. static _doFindNextMatchLineByLine(model, searchStart, searcher, captureMatches) {
  274. const lineCount = model.getLineCount();
  275. const startLineNumber = searchStart.lineNumber;
  276. // Look in first line
  277. const text = model.getLineContent(startLineNumber);
  278. const r = this._findFirstMatchInLine(searcher, text, startLineNumber, searchStart.column, captureMatches);
  279. if (r) {
  280. return r;
  281. }
  282. for (let i = 1; i <= lineCount; i++) {
  283. const lineIndex = (startLineNumber + i - 1) % lineCount;
  284. const text = model.getLineContent(lineIndex + 1);
  285. const r = this._findFirstMatchInLine(searcher, text, lineIndex + 1, 1, captureMatches);
  286. if (r) {
  287. return r;
  288. }
  289. }
  290. return null;
  291. }
  292. static _findFirstMatchInLine(searcher, text, lineNumber, fromColumn, captureMatches) {
  293. // Set regex to search from column
  294. searcher.reset(fromColumn - 1);
  295. const m = searcher.next(text);
  296. if (m) {
  297. return createFindMatch(new Range(lineNumber, m.index + 1, lineNumber, m.index + 1 + m[0].length), m, captureMatches);
  298. }
  299. return null;
  300. }
  301. static findPreviousMatch(model, searchParams, searchStart, captureMatches) {
  302. const searchData = searchParams.parseSearchRequest();
  303. if (!searchData) {
  304. return null;
  305. }
  306. const searcher = new Searcher(searchData.wordSeparators, searchData.regex);
  307. if (searchData.regex.multiline) {
  308. return this._doFindPreviousMatchMultiline(model, searchStart, searcher, captureMatches);
  309. }
  310. return this._doFindPreviousMatchLineByLine(model, searchStart, searcher, captureMatches);
  311. }
  312. static _doFindPreviousMatchMultiline(model, searchStart, searcher, captureMatches) {
  313. const matches = this._doFindMatchesMultiline(model, new Range(1, 1, searchStart.lineNumber, searchStart.column), searcher, captureMatches, 10 * LIMIT_FIND_COUNT);
  314. if (matches.length > 0) {
  315. return matches[matches.length - 1];
  316. }
  317. const lineCount = model.getLineCount();
  318. if (searchStart.lineNumber !== lineCount || searchStart.column !== model.getLineMaxColumn(lineCount)) {
  319. // Try again with all content
  320. return this._doFindPreviousMatchMultiline(model, new Position(lineCount, model.getLineMaxColumn(lineCount)), searcher, captureMatches);
  321. }
  322. return null;
  323. }
  324. static _doFindPreviousMatchLineByLine(model, searchStart, searcher, captureMatches) {
  325. const lineCount = model.getLineCount();
  326. const startLineNumber = searchStart.lineNumber;
  327. // Look in first line
  328. const text = model.getLineContent(startLineNumber).substring(0, searchStart.column - 1);
  329. const r = this._findLastMatchInLine(searcher, text, startLineNumber, captureMatches);
  330. if (r) {
  331. return r;
  332. }
  333. for (let i = 1; i <= lineCount; i++) {
  334. const lineIndex = (lineCount + startLineNumber - i - 1) % lineCount;
  335. const text = model.getLineContent(lineIndex + 1);
  336. const r = this._findLastMatchInLine(searcher, text, lineIndex + 1, captureMatches);
  337. if (r) {
  338. return r;
  339. }
  340. }
  341. return null;
  342. }
  343. static _findLastMatchInLine(searcher, text, lineNumber, captureMatches) {
  344. let bestResult = null;
  345. let m;
  346. searcher.reset(0);
  347. while ((m = searcher.next(text))) {
  348. bestResult = createFindMatch(new Range(lineNumber, m.index + 1, lineNumber, m.index + 1 + m[0].length), m, captureMatches);
  349. }
  350. return bestResult;
  351. }
  352. }
  353. function leftIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength) {
  354. if (matchStartIndex === 0) {
  355. // Match starts at start of string
  356. return true;
  357. }
  358. const charBefore = text.charCodeAt(matchStartIndex - 1);
  359. if (wordSeparators.get(charBefore) !== 0 /* WordCharacterClass.Regular */) {
  360. // The character before the match is a word separator
  361. return true;
  362. }
  363. if (charBefore === 13 /* CharCode.CarriageReturn */ || charBefore === 10 /* CharCode.LineFeed */) {
  364. // The character before the match is line break or carriage return.
  365. return true;
  366. }
  367. if (matchLength > 0) {
  368. const firstCharInMatch = text.charCodeAt(matchStartIndex);
  369. if (wordSeparators.get(firstCharInMatch) !== 0 /* WordCharacterClass.Regular */) {
  370. // The first character inside the match is a word separator
  371. return true;
  372. }
  373. }
  374. return false;
  375. }
  376. function rightIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength) {
  377. if (matchStartIndex + matchLength === textLength) {
  378. // Match ends at end of string
  379. return true;
  380. }
  381. const charAfter = text.charCodeAt(matchStartIndex + matchLength);
  382. if (wordSeparators.get(charAfter) !== 0 /* WordCharacterClass.Regular */) {
  383. // The character after the match is a word separator
  384. return true;
  385. }
  386. if (charAfter === 13 /* CharCode.CarriageReturn */ || charAfter === 10 /* CharCode.LineFeed */) {
  387. // The character after the match is line break or carriage return.
  388. return true;
  389. }
  390. if (matchLength > 0) {
  391. const lastCharInMatch = text.charCodeAt(matchStartIndex + matchLength - 1);
  392. if (wordSeparators.get(lastCharInMatch) !== 0 /* WordCharacterClass.Regular */) {
  393. // The last character in the match is a word separator
  394. return true;
  395. }
  396. }
  397. return false;
  398. }
  399. export function isValidMatch(wordSeparators, text, textLength, matchStartIndex, matchLength) {
  400. return (leftIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength)
  401. && rightIsWordBounday(wordSeparators, text, textLength, matchStartIndex, matchLength));
  402. }
  403. export class Searcher {
  404. constructor(wordSeparators, searchRegex) {
  405. this._wordSeparators = wordSeparators;
  406. this._searchRegex = searchRegex;
  407. this._prevMatchStartIndex = -1;
  408. this._prevMatchLength = 0;
  409. }
  410. reset(lastIndex) {
  411. this._searchRegex.lastIndex = lastIndex;
  412. this._prevMatchStartIndex = -1;
  413. this._prevMatchLength = 0;
  414. }
  415. next(text) {
  416. const textLength = text.length;
  417. let m;
  418. do {
  419. if (this._prevMatchStartIndex + this._prevMatchLength === textLength) {
  420. // Reached the end of the line
  421. return null;
  422. }
  423. m = this._searchRegex.exec(text);
  424. if (!m) {
  425. return null;
  426. }
  427. const matchStartIndex = m.index;
  428. const matchLength = m[0].length;
  429. if (matchStartIndex === this._prevMatchStartIndex && matchLength === this._prevMatchLength) {
  430. if (matchLength === 0) {
  431. // the search result is an empty string and won't advance `regex.lastIndex`, so `regex.exec` will stuck here
  432. // we attempt to recover from that by advancing by two if surrogate pair found and by one otherwise
  433. if (strings.getNextCodePoint(text, textLength, this._searchRegex.lastIndex) > 0xFFFF) {
  434. this._searchRegex.lastIndex += 2;
  435. }
  436. else {
  437. this._searchRegex.lastIndex += 1;
  438. }
  439. continue;
  440. }
  441. // Exit early if the regex matches the same range twice
  442. return null;
  443. }
  444. this._prevMatchStartIndex = matchStartIndex;
  445. this._prevMatchLength = matchLength;
  446. if (!this._wordSeparators || isValidMatch(this._wordSeparators, text, textLength, matchStartIndex, matchLength)) {
  447. return m;
  448. }
  449. } while (m);
  450. return null;
  451. }
  452. }