uri.js 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import * as paths from './path.js';
  6. import { isWindows } from './platform.js';
  7. const _schemePattern = /^\w[\w\d+.-]*$/;
  8. const _singleSlashStart = /^\//;
  9. const _doubleSlashStart = /^\/\//;
  10. function _validateUri(ret, _strict) {
  11. // scheme, must be set
  12. if (!ret.scheme && _strict) {
  13. throw new Error(`[UriError]: Scheme is missing: {scheme: "", authority: "${ret.authority}", path: "${ret.path}", query: "${ret.query}", fragment: "${ret.fragment}"}`);
  14. }
  15. // scheme, https://tools.ietf.org/html/rfc3986#section-3.1
  16. // ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  17. if (ret.scheme && !_schemePattern.test(ret.scheme)) {
  18. throw new Error('[UriError]: Scheme contains illegal characters.');
  19. }
  20. // path, http://tools.ietf.org/html/rfc3986#section-3.3
  21. // If a URI contains an authority component, then the path component
  22. // must either be empty or begin with a slash ("/") character. If a URI
  23. // does not contain an authority component, then the path cannot begin
  24. // with two slash characters ("//").
  25. if (ret.path) {
  26. if (ret.authority) {
  27. if (!_singleSlashStart.test(ret.path)) {
  28. throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character');
  29. }
  30. }
  31. else {
  32. if (_doubleSlashStart.test(ret.path)) {
  33. throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")');
  34. }
  35. }
  36. }
  37. }
  38. // for a while we allowed uris *without* schemes and this is the migration
  39. // for them, e.g. an uri without scheme and without strict-mode warns and falls
  40. // back to the file-scheme. that should cause the least carnage and still be a
  41. // clear warning
  42. function _schemeFix(scheme, _strict) {
  43. if (!scheme && !_strict) {
  44. return 'file';
  45. }
  46. return scheme;
  47. }
  48. // implements a bit of https://tools.ietf.org/html/rfc3986#section-5
  49. function _referenceResolution(scheme, path) {
  50. // the slash-character is our 'default base' as we don't
  51. // support constructing URIs relative to other URIs. This
  52. // also means that we alter and potentially break paths.
  53. // see https://tools.ietf.org/html/rfc3986#section-5.1.4
  54. switch (scheme) {
  55. case 'https':
  56. case 'http':
  57. case 'file':
  58. if (!path) {
  59. path = _slash;
  60. }
  61. else if (path[0] !== _slash) {
  62. path = _slash + path;
  63. }
  64. break;
  65. }
  66. return path;
  67. }
  68. const _empty = '';
  69. const _slash = '/';
  70. const _regexp = /^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;
  71. /**
  72. * Uniform Resource Identifier (URI) http://tools.ietf.org/html/rfc3986.
  73. * This class is a simple parser which creates the basic component parts
  74. * (http://tools.ietf.org/html/rfc3986#section-3) with minimal validation
  75. * and encoding.
  76. *
  77. * ```txt
  78. * foo://example.com:8042/over/there?name=ferret#nose
  79. * \_/ \______________/\_________/ \_________/ \__/
  80. * | | | | |
  81. * scheme authority path query fragment
  82. * | _____________________|__
  83. * / \ / \
  84. * urn:example:animal:ferret:nose
  85. * ```
  86. */
  87. export class URI {
  88. static isUri(thing) {
  89. if (thing instanceof URI) {
  90. return true;
  91. }
  92. if (!thing) {
  93. return false;
  94. }
  95. return typeof thing.authority === 'string'
  96. && typeof thing.fragment === 'string'
  97. && typeof thing.path === 'string'
  98. && typeof thing.query === 'string'
  99. && typeof thing.scheme === 'string'
  100. && typeof thing.fsPath === 'string'
  101. && typeof thing.with === 'function'
  102. && typeof thing.toString === 'function';
  103. }
  104. /**
  105. * @internal
  106. */
  107. constructor(schemeOrData, authority, path, query, fragment, _strict = false) {
  108. if (typeof schemeOrData === 'object') {
  109. this.scheme = schemeOrData.scheme || _empty;
  110. this.authority = schemeOrData.authority || _empty;
  111. this.path = schemeOrData.path || _empty;
  112. this.query = schemeOrData.query || _empty;
  113. this.fragment = schemeOrData.fragment || _empty;
  114. // no validation because it's this URI
  115. // that creates uri components.
  116. // _validateUri(this);
  117. }
  118. else {
  119. this.scheme = _schemeFix(schemeOrData, _strict);
  120. this.authority = authority || _empty;
  121. this.path = _referenceResolution(this.scheme, path || _empty);
  122. this.query = query || _empty;
  123. this.fragment = fragment || _empty;
  124. _validateUri(this, _strict);
  125. }
  126. }
  127. // ---- filesystem path -----------------------
  128. /**
  129. * Returns a string representing the corresponding file system path of this URI.
  130. * Will handle UNC paths, normalizes windows drive letters to lower-case, and uses the
  131. * platform specific path separator.
  132. *
  133. * * Will *not* validate the path for invalid characters and semantics.
  134. * * Will *not* look at the scheme of this URI.
  135. * * The result shall *not* be used for display purposes but for accessing a file on disk.
  136. *
  137. *
  138. * The *difference* to `URI#path` is the use of the platform specific separator and the handling
  139. * of UNC paths. See the below sample of a file-uri with an authority (UNC path).
  140. *
  141. * ```ts
  142. const u = URI.parse('file://server/c$/folder/file.txt')
  143. u.authority === 'server'
  144. u.path === '/shares/c$/file.txt'
  145. u.fsPath === '\\server\c$\folder\file.txt'
  146. ```
  147. *
  148. * Using `URI#path` to read a file (using fs-apis) would not be enough because parts of the path,
  149. * namely the server name, would be missing. Therefore `URI#fsPath` exists - it's sugar to ease working
  150. * with URIs that represent files on disk (`file` scheme).
  151. */
  152. get fsPath() {
  153. // if (this.scheme !== 'file') {
  154. // console.warn(`[UriError] calling fsPath with scheme ${this.scheme}`);
  155. // }
  156. return uriToFsPath(this, false);
  157. }
  158. // ---- modify to new -------------------------
  159. with(change) {
  160. if (!change) {
  161. return this;
  162. }
  163. let { scheme, authority, path, query, fragment } = change;
  164. if (scheme === undefined) {
  165. scheme = this.scheme;
  166. }
  167. else if (scheme === null) {
  168. scheme = _empty;
  169. }
  170. if (authority === undefined) {
  171. authority = this.authority;
  172. }
  173. else if (authority === null) {
  174. authority = _empty;
  175. }
  176. if (path === undefined) {
  177. path = this.path;
  178. }
  179. else if (path === null) {
  180. path = _empty;
  181. }
  182. if (query === undefined) {
  183. query = this.query;
  184. }
  185. else if (query === null) {
  186. query = _empty;
  187. }
  188. if (fragment === undefined) {
  189. fragment = this.fragment;
  190. }
  191. else if (fragment === null) {
  192. fragment = _empty;
  193. }
  194. if (scheme === this.scheme
  195. && authority === this.authority
  196. && path === this.path
  197. && query === this.query
  198. && fragment === this.fragment) {
  199. return this;
  200. }
  201. return new Uri(scheme, authority, path, query, fragment);
  202. }
  203. // ---- parse & validate ------------------------
  204. /**
  205. * Creates a new URI from a string, e.g. `http://www.example.com/some/path`,
  206. * `file:///usr/home`, or `scheme:with/path`.
  207. *
  208. * @param value A string which represents an URI (see `URI#toString`).
  209. */
  210. static parse(value, _strict = false) {
  211. const match = _regexp.exec(value);
  212. if (!match) {
  213. return new Uri(_empty, _empty, _empty, _empty, _empty);
  214. }
  215. return new Uri(match[2] || _empty, percentDecode(match[4] || _empty), percentDecode(match[5] || _empty), percentDecode(match[7] || _empty), percentDecode(match[9] || _empty), _strict);
  216. }
  217. /**
  218. * Creates a new URI from a file system path, e.g. `c:\my\files`,
  219. * `/usr/home`, or `\\server\share\some\path`.
  220. *
  221. * The *difference* between `URI#parse` and `URI#file` is that the latter treats the argument
  222. * as path, not as stringified-uri. E.g. `URI.file(path)` is **not the same as**
  223. * `URI.parse('file://' + path)` because the path might contain characters that are
  224. * interpreted (# and ?). See the following sample:
  225. * ```ts
  226. const good = URI.file('/coding/c#/project1');
  227. good.scheme === 'file';
  228. good.path === '/coding/c#/project1';
  229. good.fragment === '';
  230. const bad = URI.parse('file://' + '/coding/c#/project1');
  231. bad.scheme === 'file';
  232. bad.path === '/coding/c'; // path is now broken
  233. bad.fragment === '/project1';
  234. ```
  235. *
  236. * @param path A file system path (see `URI#fsPath`)
  237. */
  238. static file(path) {
  239. let authority = _empty;
  240. // normalize to fwd-slashes on windows,
  241. // on other systems bwd-slashes are valid
  242. // filename character, eg /f\oo/ba\r.txt
  243. if (isWindows) {
  244. path = path.replace(/\\/g, _slash);
  245. }
  246. // check for authority as used in UNC shares
  247. // or use the path as given
  248. if (path[0] === _slash && path[1] === _slash) {
  249. const idx = path.indexOf(_slash, 2);
  250. if (idx === -1) {
  251. authority = path.substring(2);
  252. path = _slash;
  253. }
  254. else {
  255. authority = path.substring(2, idx);
  256. path = path.substring(idx) || _slash;
  257. }
  258. }
  259. return new Uri('file', authority, path, _empty, _empty);
  260. }
  261. static from(components) {
  262. const result = new Uri(components.scheme, components.authority, components.path, components.query, components.fragment);
  263. _validateUri(result, true);
  264. return result;
  265. }
  266. /**
  267. * Join a URI path with path fragments and normalizes the resulting path.
  268. *
  269. * @param uri The input URI.
  270. * @param pathFragment The path fragment to add to the URI path.
  271. * @returns The resulting URI.
  272. */
  273. static joinPath(uri, ...pathFragment) {
  274. if (!uri.path) {
  275. throw new Error(`[UriError]: cannot call joinPath on URI without path`);
  276. }
  277. let newPath;
  278. if (isWindows && uri.scheme === 'file') {
  279. newPath = URI.file(paths.win32.join(uriToFsPath(uri, true), ...pathFragment)).path;
  280. }
  281. else {
  282. newPath = paths.posix.join(uri.path, ...pathFragment);
  283. }
  284. return uri.with({ path: newPath });
  285. }
  286. // ---- printing/externalize ---------------------------
  287. /**
  288. * Creates a string representation for this URI. It's guaranteed that calling
  289. * `URI.parse` with the result of this function creates an URI which is equal
  290. * to this URI.
  291. *
  292. * * The result shall *not* be used for display purposes but for externalization or transport.
  293. * * The result will be encoded using the percentage encoding and encoding happens mostly
  294. * ignore the scheme-specific encoding rules.
  295. *
  296. * @param skipEncoding Do not encode the result, default is `false`
  297. */
  298. toString(skipEncoding = false) {
  299. return _asFormatted(this, skipEncoding);
  300. }
  301. toJSON() {
  302. return this;
  303. }
  304. static revive(data) {
  305. if (!data) {
  306. return data;
  307. }
  308. else if (data instanceof URI) {
  309. return data;
  310. }
  311. else {
  312. const result = new Uri(data);
  313. result._formatted = data.external;
  314. result._fsPath = data._sep === _pathSepMarker ? data.fsPath : null;
  315. return result;
  316. }
  317. }
  318. }
  319. const _pathSepMarker = isWindows ? 1 : undefined;
  320. // This class exists so that URI is compatible with vscode.Uri (API).
  321. class Uri extends URI {
  322. constructor() {
  323. super(...arguments);
  324. this._formatted = null;
  325. this._fsPath = null;
  326. }
  327. get fsPath() {
  328. if (!this._fsPath) {
  329. this._fsPath = uriToFsPath(this, false);
  330. }
  331. return this._fsPath;
  332. }
  333. toString(skipEncoding = false) {
  334. if (!skipEncoding) {
  335. if (!this._formatted) {
  336. this._formatted = _asFormatted(this, false);
  337. }
  338. return this._formatted;
  339. }
  340. else {
  341. // we don't cache that
  342. return _asFormatted(this, true);
  343. }
  344. }
  345. toJSON() {
  346. const res = {
  347. $mid: 1 /* MarshalledId.Uri */
  348. };
  349. // cached state
  350. if (this._fsPath) {
  351. res.fsPath = this._fsPath;
  352. res._sep = _pathSepMarker;
  353. }
  354. if (this._formatted) {
  355. res.external = this._formatted;
  356. }
  357. // uri components
  358. if (this.path) {
  359. res.path = this.path;
  360. }
  361. if (this.scheme) {
  362. res.scheme = this.scheme;
  363. }
  364. if (this.authority) {
  365. res.authority = this.authority;
  366. }
  367. if (this.query) {
  368. res.query = this.query;
  369. }
  370. if (this.fragment) {
  371. res.fragment = this.fragment;
  372. }
  373. return res;
  374. }
  375. }
  376. // reserved characters: https://tools.ietf.org/html/rfc3986#section-2.2
  377. const encodeTable = {
  378. [58 /* CharCode.Colon */]: '%3A',
  379. [47 /* CharCode.Slash */]: '%2F',
  380. [63 /* CharCode.QuestionMark */]: '%3F',
  381. [35 /* CharCode.Hash */]: '%23',
  382. [91 /* CharCode.OpenSquareBracket */]: '%5B',
  383. [93 /* CharCode.CloseSquareBracket */]: '%5D',
  384. [64 /* CharCode.AtSign */]: '%40',
  385. [33 /* CharCode.ExclamationMark */]: '%21',
  386. [36 /* CharCode.DollarSign */]: '%24',
  387. [38 /* CharCode.Ampersand */]: '%26',
  388. [39 /* CharCode.SingleQuote */]: '%27',
  389. [40 /* CharCode.OpenParen */]: '%28',
  390. [41 /* CharCode.CloseParen */]: '%29',
  391. [42 /* CharCode.Asterisk */]: '%2A',
  392. [43 /* CharCode.Plus */]: '%2B',
  393. [44 /* CharCode.Comma */]: '%2C',
  394. [59 /* CharCode.Semicolon */]: '%3B',
  395. [61 /* CharCode.Equals */]: '%3D',
  396. [32 /* CharCode.Space */]: '%20',
  397. };
  398. function encodeURIComponentFast(uriComponent, isPath, isAuthority) {
  399. let res = undefined;
  400. let nativeEncodePos = -1;
  401. for (let pos = 0; pos < uriComponent.length; pos++) {
  402. const code = uriComponent.charCodeAt(pos);
  403. // unreserved characters: https://tools.ietf.org/html/rfc3986#section-2.3
  404. if ((code >= 97 /* CharCode.a */ && code <= 122 /* CharCode.z */)
  405. || (code >= 65 /* CharCode.A */ && code <= 90 /* CharCode.Z */)
  406. || (code >= 48 /* CharCode.Digit0 */ && code <= 57 /* CharCode.Digit9 */)
  407. || code === 45 /* CharCode.Dash */
  408. || code === 46 /* CharCode.Period */
  409. || code === 95 /* CharCode.Underline */
  410. || code === 126 /* CharCode.Tilde */
  411. || (isPath && code === 47 /* CharCode.Slash */)
  412. || (isAuthority && code === 91 /* CharCode.OpenSquareBracket */)
  413. || (isAuthority && code === 93 /* CharCode.CloseSquareBracket */)
  414. || (isAuthority && code === 58 /* CharCode.Colon */)) {
  415. // check if we are delaying native encode
  416. if (nativeEncodePos !== -1) {
  417. res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
  418. nativeEncodePos = -1;
  419. }
  420. // check if we write into a new string (by default we try to return the param)
  421. if (res !== undefined) {
  422. res += uriComponent.charAt(pos);
  423. }
  424. }
  425. else {
  426. // encoding needed, we need to allocate a new string
  427. if (res === undefined) {
  428. res = uriComponent.substr(0, pos);
  429. }
  430. // check with default table first
  431. const escaped = encodeTable[code];
  432. if (escaped !== undefined) {
  433. // check if we are delaying native encode
  434. if (nativeEncodePos !== -1) {
  435. res += encodeURIComponent(uriComponent.substring(nativeEncodePos, pos));
  436. nativeEncodePos = -1;
  437. }
  438. // append escaped variant to result
  439. res += escaped;
  440. }
  441. else if (nativeEncodePos === -1) {
  442. // use native encode only when needed
  443. nativeEncodePos = pos;
  444. }
  445. }
  446. }
  447. if (nativeEncodePos !== -1) {
  448. res += encodeURIComponent(uriComponent.substring(nativeEncodePos));
  449. }
  450. return res !== undefined ? res : uriComponent;
  451. }
  452. function encodeURIComponentMinimal(path) {
  453. let res = undefined;
  454. for (let pos = 0; pos < path.length; pos++) {
  455. const code = path.charCodeAt(pos);
  456. if (code === 35 /* CharCode.Hash */ || code === 63 /* CharCode.QuestionMark */) {
  457. if (res === undefined) {
  458. res = path.substr(0, pos);
  459. }
  460. res += encodeTable[code];
  461. }
  462. else {
  463. if (res !== undefined) {
  464. res += path[pos];
  465. }
  466. }
  467. }
  468. return res !== undefined ? res : path;
  469. }
  470. /**
  471. * Compute `fsPath` for the given uri
  472. */
  473. export function uriToFsPath(uri, keepDriveLetterCasing) {
  474. let value;
  475. if (uri.authority && uri.path.length > 1 && uri.scheme === 'file') {
  476. // unc path: file://shares/c$/far/boo
  477. value = `//${uri.authority}${uri.path}`;
  478. }
  479. else if (uri.path.charCodeAt(0) === 47 /* CharCode.Slash */
  480. && (uri.path.charCodeAt(1) >= 65 /* CharCode.A */ && uri.path.charCodeAt(1) <= 90 /* CharCode.Z */ || uri.path.charCodeAt(1) >= 97 /* CharCode.a */ && uri.path.charCodeAt(1) <= 122 /* CharCode.z */)
  481. && uri.path.charCodeAt(2) === 58 /* CharCode.Colon */) {
  482. if (!keepDriveLetterCasing) {
  483. // windows drive letter: file:///c:/far/boo
  484. value = uri.path[1].toLowerCase() + uri.path.substr(2);
  485. }
  486. else {
  487. value = uri.path.substr(1);
  488. }
  489. }
  490. else {
  491. // other path
  492. value = uri.path;
  493. }
  494. if (isWindows) {
  495. value = value.replace(/\//g, '\\');
  496. }
  497. return value;
  498. }
  499. /**
  500. * Create the external version of a uri
  501. */
  502. function _asFormatted(uri, skipEncoding) {
  503. const encoder = !skipEncoding
  504. ? encodeURIComponentFast
  505. : encodeURIComponentMinimal;
  506. let res = '';
  507. let { scheme, authority, path, query, fragment } = uri;
  508. if (scheme) {
  509. res += scheme;
  510. res += ':';
  511. }
  512. if (authority || scheme === 'file') {
  513. res += _slash;
  514. res += _slash;
  515. }
  516. if (authority) {
  517. let idx = authority.indexOf('@');
  518. if (idx !== -1) {
  519. // <user>@<auth>
  520. const userinfo = authority.substr(0, idx);
  521. authority = authority.substr(idx + 1);
  522. idx = userinfo.lastIndexOf(':');
  523. if (idx === -1) {
  524. res += encoder(userinfo, false, false);
  525. }
  526. else {
  527. // <user>:<pass>@<auth>
  528. res += encoder(userinfo.substr(0, idx), false, false);
  529. res += ':';
  530. res += encoder(userinfo.substr(idx + 1), false, true);
  531. }
  532. res += '@';
  533. }
  534. authority = authority.toLowerCase();
  535. idx = authority.lastIndexOf(':');
  536. if (idx === -1) {
  537. res += encoder(authority, false, true);
  538. }
  539. else {
  540. // <auth>:<port>
  541. res += encoder(authority.substr(0, idx), false, true);
  542. res += authority.substr(idx);
  543. }
  544. }
  545. if (path) {
  546. // lower-case windows drive letters in /C:/fff or C:/fff
  547. if (path.length >= 3 && path.charCodeAt(0) === 47 /* CharCode.Slash */ && path.charCodeAt(2) === 58 /* CharCode.Colon */) {
  548. const code = path.charCodeAt(1);
  549. if (code >= 65 /* CharCode.A */ && code <= 90 /* CharCode.Z */) {
  550. path = `/${String.fromCharCode(code + 32)}:${path.substr(3)}`; // "/c:".length === 3
  551. }
  552. }
  553. else if (path.length >= 2 && path.charCodeAt(1) === 58 /* CharCode.Colon */) {
  554. const code = path.charCodeAt(0);
  555. if (code >= 65 /* CharCode.A */ && code <= 90 /* CharCode.Z */) {
  556. path = `${String.fromCharCode(code + 32)}:${path.substr(2)}`; // "/c:".length === 3
  557. }
  558. }
  559. // encode the rest of the path
  560. res += encoder(path, true, false);
  561. }
  562. if (query) {
  563. res += '?';
  564. res += encoder(query, false, false);
  565. }
  566. if (fragment) {
  567. res += '#';
  568. res += !skipEncoding ? encodeURIComponentFast(fragment, false, false) : fragment;
  569. }
  570. return res;
  571. }
  572. // --- decode
  573. function decodeURIComponentGraceful(str) {
  574. try {
  575. return decodeURIComponent(str);
  576. }
  577. catch (_a) {
  578. if (str.length > 3) {
  579. return str.substr(0, 3) + decodeURIComponentGraceful(str.substr(3));
  580. }
  581. else {
  582. return str;
  583. }
  584. }
  585. }
  586. const _rEncodedAsHex = /(%[0-9A-Za-z][0-9A-Za-z])+/g;
  587. function percentDecode(str) {
  588. if (!str.match(_rEncodedAsHex)) {
  589. return str;
  590. }
  591. return str.replace(_rEncodedAsHex, (match) => decodeURIComponentGraceful(match));
  592. }