-
-
Notifications
You must be signed in to change notification settings - Fork 32k
url: handle "unsafe" characters properly in pathToFileURL
#54545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0bf1481
a962bb8
ff4bc8e
c2e4091
551907e
6cf5db1
7dd02ee
87f2f61
ce38d50
27b6477
fcfd1f7
94ce4a2
8e2ebb9
dcb75fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1498,44 +1498,75 @@ function fileURLToPath(path, options = kEmptyObject) { | |
return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path); | ||
} | ||
|
||
// The following characters are percent-encoded when converting from file path | ||
// to URL: | ||
// - %: The percent character is the only character not encoded by the | ||
// `pathname` setter. | ||
// - \: Backslash is encoded on non-windows platforms since it's a valid | ||
// character but the `pathname` setters replaces it by a forward slash. | ||
// - LF: The newline character is stripped out by the `pathname` setter. | ||
// (See whatwg/url#419) | ||
// - CR: The carriage return character is also stripped out by the `pathname` | ||
// setter. | ||
// - TAB: The tab character is also stripped out by the `pathname` setter. | ||
// RFC1738 defines the following chars as "unsafe" for URLs | ||
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues | ||
const percentRegEx = /%/g; | ||
const backslashRegEx = /\\/g; | ||
const newlineRegEx = /\n/g; | ||
const carriageReturnRegEx = /\r/g; | ||
const tabRegEx = /\t/g; | ||
const questionRegex = /\?/g; | ||
const quoteRegEx = /"/g; | ||
const hashRegex = /#/g; | ||
const spaceRegEx = / /g; | ||
const questionMarkRegex = /\?/g; | ||
const openSquareBracketRegEx = /\[/g; | ||
const backslashRegEx = /\\/g; | ||
const closeSquareBracketRegEx = /]/g; | ||
const caretRegEx = /\^/g; | ||
const verticalBarRegEx = /\|/g; | ||
const tildeRegEx = /~/g; | ||
|
||
function encodePathChars(filepath, options = kEmptyObject) { | ||
const windows = options?.windows; | ||
if (StringPrototypeIndexOf(filepath, '%') !== -1) | ||
if (StringPrototypeIncludes(filepath, '%')) { | ||
filepath = RegExpPrototypeSymbolReplace(percentRegEx, filepath, '%25'); | ||
// In posix, backslash is a valid character in paths: | ||
if (!(windows ?? isWindows) && StringPrototypeIndexOf(filepath, '\\') !== -1) | ||
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C'); | ||
if (StringPrototypeIndexOf(filepath, '\n') !== -1) | ||
} | ||
|
||
if (StringPrototypeIncludes(filepath, '\t')) { | ||
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09'); | ||
} | ||
if (StringPrototypeIncludes(filepath, '\n')) { | ||
filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A'); | ||
if (StringPrototypeIndexOf(filepath, '\r') !== -1) | ||
} | ||
if (StringPrototypeIncludes(filepath, '\r')) { | ||
filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D'); | ||
if (StringPrototypeIndexOf(filepath, '\t') !== -1) | ||
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09'); | ||
} | ||
if (StringPrototypeIncludes(filepath, ' ')) { | ||
filepath = RegExpPrototypeSymbolReplace(spaceRegEx, filepath, '%20'); | ||
} | ||
if (StringPrototypeIncludes(filepath, '"')) { | ||
filepath = RegExpPrototypeSymbolReplace(quoteRegEx, filepath, '%22'); | ||
} | ||
if (StringPrototypeIncludes(filepath, '#')) { | ||
filepath = RegExpPrototypeSymbolReplace(hashRegex, filepath, '%23'); | ||
} | ||
if (StringPrototypeIncludes(filepath, '?')) { | ||
filepath = RegExpPrototypeSymbolReplace(questionMarkRegex, filepath, '%3F'); | ||
} | ||
if (StringPrototypeIncludes(filepath, '[')) { | ||
filepath = RegExpPrototypeSymbolReplace(openSquareBracketRegEx, filepath, '%5B'); | ||
} | ||
// Back-slashes must be special-cased on Windows, where they are treated as path separator. | ||
if (!options.windows && StringPrototypeIncludes(filepath, '\\')) { | ||
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C'); | ||
} | ||
if (StringPrototypeIncludes(filepath, ']')) { | ||
filepath = RegExpPrototypeSymbolReplace(closeSquareBracketRegEx, filepath, '%5D'); | ||
} | ||
if (StringPrototypeIncludes(filepath, '^')) { | ||
filepath = RegExpPrototypeSymbolReplace(caretRegEx, filepath, '%5E'); | ||
} | ||
if (StringPrototypeIncludes(filepath, '|')) { | ||
filepath = RegExpPrototypeSymbolReplace(verticalBarRegEx, filepath, '%7C'); | ||
} | ||
if (StringPrototypeIncludes(filepath, '~')) { | ||
filepath = RegExpPrototypeSymbolReplace(tildeRegEx, filepath, '%7E'); | ||
} | ||
|
||
return filepath; | ||
} | ||
|
||
function pathToFileURL(filepath, options = kEmptyObject) { | ||
const windows = options?.windows; | ||
if ((windows ?? isWindows) && StringPrototypeStartsWith(filepath, '\\\\')) { | ||
const windows = options?.windows ?? isWindows; | ||
if (windows && StringPrototypeStartsWith(filepath, '\\\\')) { | ||
const outURL = new URL('file://'); | ||
// UNC path format: \\server\share\resource | ||
// Handle extended UNC path and standard UNC path | ||
|
@@ -1566,20 +1597,9 @@ function pathToFileURL(filepath, options = kEmptyObject) { | |
); | ||
return outURL; | ||
} | ||
let resolved = (windows ?? isWindows) ? path.win32.resolve(filepath) : path.posix.resolve(filepath); | ||
|
||
// Call encodePathChars first to avoid encoding % again for ? and #. | ||
resolved = encodePathChars(resolved, { windows }); | ||
const resolved = windows ? path.win32.resolve(filepath) : path.posix.resolve(filepath); | ||
|
||
// Question and hash character should be included in pathname. | ||
// Therefore, encoding is required to eliminate parsing them in different states. | ||
// This is done as an optimization to not creating a URL instance and | ||
// later triggering pathname setter, which impacts performance | ||
if (StringPrototypeIndexOf(resolved, '?') !== -1) | ||
resolved = RegExpPrototypeSymbolReplace(questionRegex, resolved, '%3F'); | ||
if (StringPrototypeIndexOf(resolved, '#') !== -1) | ||
resolved = RegExpPrototypeSymbolReplace(hashRegex, resolved, '%23'); | ||
return new URL(`file://${resolved}`); | ||
return new URL(`file://${encodePathChars(resolved, { windows })}`); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having a third option in URL constructor, preferably a symbol, and later doing the same operation in C++ would be the fastest solution imho. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're probably correct, but that's very much out of my depth :) |
||
} | ||
|
||
function toPathIfFileURL(fileURLOrPath) { | ||
|
Uh oh!
There was an error while loading. Please reload this page.