nsURLHelper.cpp - mozsearch

mozilla-central/netwerk/base/nsURLHelper.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim:set ts=4 sw=2 sts=2 et cindent: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsURLHelper.h"

#include "mozilla/Encoding.h"

#include "mozilla/RangedPtr.h"

#include "mozilla/TextUtils.h"

#include <algorithm>

#include <iterator>

#include "nsASCIIMask.h"

#include "nsIFile.h"

#include "nsIURLParser.h"

#include "nsCOMPtr.h"

#include "nsCRT.h"

#include "nsNetCID.h"

#include "mozilla/Preferences.h"

#include "prnetdb.h"

#include "mozilla/StaticPrefs_network.h"

#include "mozilla/Tokenizer.h"

#include "nsEscape.h"

#include "nsDOMString.h"

#include "mozilla/net/rust_helper.h"

#include "mozilla/net/DNS.h"

using namespace mozilla;

//----------------------------------------------------------------------------

// Init/Shutdown

//----------------------------------------------------------------------------

static bool gInitialized = false;

static StaticRefPtr<nsIURLParser> gNoAuthURLParser;

static StaticRefPtr<nsIURLParser> gAuthURLParser;

static StaticRefPtr<nsIURLParser> gStdURLParser;

static void InitGlobals() {

  nsCOMPtr<nsIURLParser> parser;

  parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);

  NS_ASSERTION(parser, "failed getting 'noauth' url parser");

  if (parser) {

    gNoAuthURLParser = parser;

  parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);

  NS_ASSERTION(parser, "failed getting 'auth' url parser");

  if (parser) {

    gAuthURLParser = parser;

  parser = do_GetService(NS_STDURLPARSER_CONTRACTID);

  NS_ASSERTION(parser, "failed getting 'std' url parser");

  if (parser) {

    gStdURLParser = parser;

  gInitialized = true;

void net_ShutdownURLHelper() {

  if (gInitialized) {

    gInitialized = false;

  gNoAuthURLParser = nullptr;

  gAuthURLParser = nullptr;

  gStdURLParser = nullptr;

//----------------------------------------------------------------------------

// nsIURLParser getters

//----------------------------------------------------------------------------

nsIURLParser* net_GetAuthURLParser() {

  if (!gInitialized) InitGlobals();

  return gAuthURLParser;

nsIURLParser* net_GetNoAuthURLParser() {

  if (!gInitialized) InitGlobals();

  return gNoAuthURLParser;

nsIURLParser* net_GetStdURLParser() {

  if (!gInitialized) InitGlobals();

  return gStdURLParser;

//---------------------------------------------------------------------------

// GetFileFromURLSpec implementations

//---------------------------------------------------------------------------

nsresult net_GetURLSpecFromDir(nsIFile* aFile, nsACString& result) {

  nsAutoCString escPath;

  nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);

  if (NS_FAILED(rv)) return rv;

  if (escPath.Last() != '/') {

    escPath += '/';

  result = escPath;

  return NS_OK;

nsresult net_GetURLSpecFromFile(nsIFile* aFile, nsACString& result) {

  nsAutoCString escPath;

  nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);

  if (NS_FAILED(rv)) return rv;

  // if this file references a directory, then we need to ensure that the

  // URL ends with a slash.  this is important since it affects the rules

  // for relative URL resolution when this URL is used as a base URL.

  // if the file does not exist, then we make no assumption about its type,

  // and simply leave the URL unmodified.

  if (escPath.Last() != '/') {

    bool dir;

    rv = aFile->IsDirectory(&dir);

    if (NS_SUCCEEDED(rv) && dir) escPath += '/';

  result = escPath;

  return NS_OK;

//----------------------------------------------------------------------------

// file:// URL parsing

//----------------------------------------------------------------------------

nsresult net_ParseFileURL(const nsACString& inURL, nsACString& outDirectory,

                          nsACString& outFileBaseName,

                          nsACString& outFileExtension) {

  nsresult rv;

  if (inURL.Length() >

      (uint32_t)StaticPrefs::network_standard_url_max_length()) {

    return NS_ERROR_MALFORMED_URI;

  outDirectory.Truncate();

  outFileBaseName.Truncate();

  outFileExtension.Truncate();

  const nsPromiseFlatCString& flatURL = PromiseFlatCString(inURL);

  const char* url = flatURL.get();

  nsAutoCString scheme;

  rv = net_ExtractURLScheme(flatURL, scheme);

  if (NS_FAILED(rv)) return rv;

  if (!scheme.EqualsLiteral("file")) {

    NS_ERROR("must be a file:// url");

    return NS_ERROR_UNEXPECTED;

  nsIURLParser* parser = net_GetNoAuthURLParser();

  NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);

  uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;

  int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;

  // invoke the parser to extract the URL path

  rv = parser->ParseURL(url, flatURL.Length(), nullptr,

                        nullptr,           // don't care about scheme

                        nullptr, nullptr,  // don't care about authority

                        &pathPos, &pathLen);

  if (NS_FAILED(rv)) return rv;

  // invoke the parser to extract filepath from the path

  rv = parser->ParsePath(url + pathPos, pathLen, &filepathPos, &filepathLen,

                         nullptr, nullptr,   // don't care about query

                         nullptr, nullptr);  // don't care about ref

  if (NS_FAILED(rv)) return rv;

  filepathPos += pathPos;

  // invoke the parser to extract the directory and filename from filepath

  rv = parser->ParseFilePath(url + filepathPos, filepathLen, &directoryPos,

                             &directoryLen, &basenamePos, &basenameLen,

                             &extensionPos, &extensionLen);

  if (NS_FAILED(rv)) return rv;

  if (directoryLen > 0) {

    outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);

  if (basenameLen > 0) {

    outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);

  if (extensionLen > 0) {

    outFileExtension =

        Substring(inURL, filepathPos + extensionPos, extensionLen);

  // since we are using a no-auth url parser, there will never be a host

  // XXX not strictly true... file://localhost/foo/bar.html is a valid URL

  return NS_OK;

//----------------------------------------------------------------------------

// path manipulation functions

//----------------------------------------------------------------------------

// Replace all /./ with a / while resolving URLs

// But only till #?

void net_CoalesceDirs(netCoalesceFlags flags, char* path) {

  /* Stolen from the old netlib's mkparse.c.

   * modifies a url of the form   /foo/../foo1  ->  /foo1

   *                       and    /foo/./foo1   ->  /foo/foo1

   *                       and    /foo/foo1/..  ->  /foo/

*/

  char* fwdPtr = path;

  char* urlPtr = path;

  char* lastslash = path;

  uint32_t traversal = 0;

  uint32_t special_ftp_len = 0;

  MOZ_ASSERT(*path == '/', "We expect the path to begin with /");

  if (*path != '/') {

    return;

  /* Remember if this url is a special ftp one: */

  if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) {

    /* some schemes (for example ftp) have the speciality that

       the path can begin // or /%2F to mark the root of the

       servers filesystem, a simple / only marks the root relative

       to the user loging in. We remember the length of the marker */

    if (nsCRT::strncasecmp(path, "/%2F", 4) == 0) {

      special_ftp_len = 4;

    } else if (strncmp(path, "//", 2) == 0) {

      special_ftp_len = 2;

  /* find the last slash before # or ? */

  for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#'); ++fwdPtr) {

  /* found nothing, but go back one only */

  /* if there is something to go back to */

  if (fwdPtr != path && *fwdPtr == '\0') {

    --fwdPtr;

  /* search the slash */

  for (; (fwdPtr != path) && (*fwdPtr != '/'); --fwdPtr) {

  lastslash = fwdPtr;

  fwdPtr = path;

  /* replace all %2E or %2e with . in the path */

  /* but stop at lastslash if non null */

  for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#') &&

         (*lastslash == '\0' || fwdPtr != lastslash);

       ++fwdPtr) {

    if (*fwdPtr == '%' && *(fwdPtr + 1) == '2' &&

        (*(fwdPtr + 2) == 'E' || *(fwdPtr + 2) == 'e')) {

      *urlPtr++ = '.';

      ++fwdPtr;

      ++fwdPtr;

    } else {

      *urlPtr++ = *fwdPtr;

  // Copy remaining stuff past the #?;

  for (; *fwdPtr != '\0'; ++fwdPtr) {

    *urlPtr++ = *fwdPtr;

  *urlPtr = '\0';  // terminate the url

  // start again, this time for real

  fwdPtr = path;

  urlPtr = path;

  for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#'); ++fwdPtr) {

    if (*fwdPtr == '/' && *(fwdPtr + 1) == '.' && *(fwdPtr + 2) == '/') {

      // remove . followed by slash

      ++fwdPtr;

    } else if (*fwdPtr == '/' && *(fwdPtr + 1) == '.' && *(fwdPtr + 2) == '.' &&

               (*(fwdPtr + 3) == '/' ||

                *(fwdPtr + 3) == '\0' ||  // This will take care of

                *(fwdPtr + 3) == '?' ||   // something like foo/bar/..#sometag

                *(fwdPtr + 3) == '#')) {

      // remove foo/..

      // reverse the urlPtr to the previous slash if possible

      // if url does not allow relative root then drop .. above root

      // otherwise retain them in the path

      if (traversal > 0 || !(flags & NET_COALESCE_ALLOW_RELATIVE_ROOT)) {

        if (urlPtr != path) urlPtr--;  // we must be going back at least by one

        for (; *urlPtr != '/' && urlPtr != path; urlPtr--) {

          ;  // null body

        --traversal;  // count back

        // forward the fwdPtr past the ../

        fwdPtr += 2;

        // if we have reached the beginning of the path

        // while searching for the previous / and we remember

        // that it is an url that begins with /%2F then

        // advance urlPtr again by 3 chars because /%2F already

        // marks the root of the path

        if (urlPtr == path && special_ftp_len > 3) {

          ++urlPtr;

          ++urlPtr;

          ++urlPtr;

        // special case if we have reached the end

        // to preserve the last /

        if (*fwdPtr == '.' && *(fwdPtr + 1) == '\0') ++urlPtr;

      } else {

        // there are to much /.. in this path, just copy them instead.

        // forward the urlPtr past the /.. and copying it

        // However if we remember it is an url that starts with

        // /%2F and urlPtr just points at the "F" of "/%2F" then do

        // not overwrite it with the /, just copy .. and move forward

        // urlPtr.

        if (special_ftp_len > 3 && urlPtr == path + special_ftp_len - 1) {

          ++urlPtr;

        } else {

          *urlPtr++ = *fwdPtr;

        ++fwdPtr;

        *urlPtr++ = *fwdPtr;

        ++fwdPtr;

        *urlPtr++ = *fwdPtr;

    } else {

      // count the hierachie, but only if we do not have reached

      // the root of some special urls with a special root marker

      if (*fwdPtr == '/' && *(fwdPtr + 1) != '.' &&

          (special_ftp_len != 2 || *(fwdPtr + 1) != '/')) {

        traversal++;

      // copy the url incrementaly

      *urlPtr++ = *fwdPtr;

/*

   *  Now lets remove trailing . case

   *     /foo/foo1/.   ->  /foo/foo1/

*/

  if ((urlPtr > (path + 1)) && (*(urlPtr - 1) == '.') &&

      (*(urlPtr - 2) == '/')) {

    urlPtr--;

  // Before we start copying past ?#, we must make sure we don't overwrite

  // the first / character.  If fwdPtr is also unchanged, just copy everything

  // (this shouldn't happen unless we could get in here without a leading

  // slash).

  if (urlPtr == path && fwdPtr != path) {

    urlPtr++;

  // Copy remaining stuff past the #?;

  for (; *fwdPtr != '\0'; ++fwdPtr) {

    *urlPtr++ = *fwdPtr;

  *urlPtr = '\0';  // terminate the url

//----------------------------------------------------------------------------

// scheme fu

//----------------------------------------------------------------------------

static bool net_IsValidSchemeChar(const char aChar) {

  return mozilla::net::rust_net_is_valid_scheme_char(aChar);

/* Extract URI-Scheme if possible */

nsresult net_ExtractURLScheme(const nsACString& inURI, nsACString& scheme) {

  nsACString::const_iterator start, end;

  inURI.BeginReading(start);

  inURI.EndReading(end);

  // Strip C0 and space from begining

  while (start != end) {

    if ((uint8_t)*start > 0x20) {

      break;

    start++;

  Tokenizer p(Substring(start, end), "\r\n\t");

  p.Record();

  if (!p.CheckChar(IsAsciiAlpha)) {

    // First char must be alpha

    return NS_ERROR_MALFORMED_URI;

  while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {

    // Skip valid scheme characters or \r\n\t

  if (!p.CheckChar(':')) {

    return NS_ERROR_MALFORMED_URI;

  p.Claim(scheme);

  scheme.StripTaggedASCII(ASCIIMask::MaskCRLFTab());

  ToLowerCase(scheme);

  return NS_OK;

bool net_IsValidScheme(const nsACString& scheme) {

  return mozilla::net::rust_net_is_valid_scheme(&scheme);

bool net_IsAbsoluteURL(const nsACString& uri) {

  nsACString::const_iterator start, end;

  uri.BeginReading(start);

  uri.EndReading(end);

  // Strip C0 and space from begining

  while (start != end) {

    if ((uint8_t)*start > 0x20) {

      break;

    start++;

  Tokenizer p(Substring(start, end), "\r\n\t");

  // First char must be alpha

  if (!p.CheckChar(IsAsciiAlpha)) {

    return false;

  while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {

    // Skip valid scheme characters or \r\n\t

  if (!p.CheckChar(':')) {

    return false;

  p.SkipWhites();

  if (!p.CheckChar('/')) {

    return false;

  p.SkipWhites();

  if (p.CheckChar('/')) {

    // aSpec is really absolute. Ignore aBaseURI in this case

    return true;

  return false;

void net_FilterURIString(const nsACString& input, nsACString& result) {

  result.Truncate();

  const auto* start = input.BeginReading();

  const auto* end = input.EndReading();

  // Trim off leading and trailing invalid chars.

  auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };

  const auto* newStart = std::find_if(start, end, charFilter);

  const auto* newEnd =

      std::find_if(std::reverse_iterator<decltype(end)>(end),

                   std::reverse_iterator<decltype(newStart)>(newStart),

                   charFilter)

          .base();

  // Check if chars need to be stripped.

  bool needsStrip = false;

  const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab();

  for (const auto* itr = start; itr != end; ++itr) {

    if (ASCIIMask::IsMasked(mask, *itr)) {

      needsStrip = true;

      break;

  // Just use the passed in string rather than creating new copies if no

  // changes are necessary.

  if (newStart == start && newEnd == end && !needsStrip) {

    result = input;

    return;

  result.Assign(Substring(newStart, newEnd));

  if (needsStrip) {

    result.StripTaggedASCII(mask);

nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags,

                                const ASCIIMaskArray& aFilterMask,

                                nsACString& aResult) {

  aResult.Truncate();

  const auto* start = aInput.BeginReading();

  const auto* end = aInput.EndReading();

  // Trim off leading and trailing invalid chars.

  auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };

  const auto* newStart = std::find_if(start, end, charFilter);

  const auto* newEnd =

      std::find_if(std::reverse_iterator<decltype(end)>(end),

                   std::reverse_iterator<decltype(newStart)>(newStart),

                   charFilter)

          .base();

  return NS_EscapeAndFilterURL(Substring(newStart, newEnd), aFlags,

                               &aFilterMask, aResult, fallible);

#if defined(XP_WIN)

bool net_NormalizeFileURL(const nsACString& aURL, nsCString& aResultBuf) {

  bool writing = false;

  nsACString::const_iterator beginIter, endIter;

  aURL.BeginReading(beginIter);

  aURL.EndReading(endIter);

  const char *s, *begin = beginIter.get();

  for (s = begin; s != endIter.get(); ++s) {

    if (*s == '\\') {

      writing = true;

      if (s > begin) aResultBuf.Append(begin, s - begin);

      aResultBuf += '/';

      begin = s + 1;

    if (*s == '#') {

      // Don't normalize any backslashes following the hash.

      s = endIter.get();

      break;

  if (writing && s > begin) aResultBuf.Append(begin, s - begin);

  return writing;

#endif

//----------------------------------------------------------------------------

// miscellaneous (i.e., stuff that should really be elsewhere)

//----------------------------------------------------------------------------

static inline void ToLower(char& c) {

  if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A')) c += 'a' - 'A';

void net_ToLowerCase(char* str, uint32_t length) {

  for (char* end = str + length; str < end; ++str) ToLower(*str);

void net_ToLowerCase(char* str) {

  for (; *str; ++str) ToLower(*str);

char* net_FindCharInSet(const char* iter, const char* stop, const char* set) {

  for (; iter != stop && *iter; ++iter) {

    for (const char* s = set; *s; ++s) {

      if (*iter == *s) return (char*)iter;

  return (char*)iter;

char* net_FindCharNotInSet(const char* iter, const char* stop,

                           const char* set) {

repeat:

  for (const char* s = set; *s; ++s) {

    if (*iter == *s) {

      if (++iter == stop) break;

      goto repeat;

  return (char*)iter;

char* net_RFindCharNotInSet(const char* stop, const char* iter,

                            const char* set) {

  --iter;

  --stop;

  if (iter == stop) return (char*)iter;

repeat:

  for (const char* s = set; *s; ++s) {

    if (*iter == *s) {

      if (--iter == stop) break;

      goto repeat;

  return (char*)iter;

#define HTTP_LWS " \t"

// Return the index of the closing quote of the string, if any

static uint32_t net_FindStringEnd(const nsCString& flatStr,

                                  uint32_t stringStart, char stringDelim) {

  NS_ASSERTION(stringStart < flatStr.Length() &&

                   flatStr.CharAt(stringStart) == stringDelim &&

                   (stringDelim == '"' || stringDelim == '\''),

               "Invalid stringStart");

  const char set[] = {stringDelim, '\\', '\0'};

  do {

    // stringStart points to either the start quote or the last

    // escaped char (the char following a '\\')

    // Write to searchStart here, so that when we get back to the

    // top of the loop right outside this one we search from the

    // right place.

    uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);

    if (stringEnd == uint32_t(kNotFound)) return flatStr.Length();

    if (flatStr.CharAt(stringEnd) == '\\') {

      // Hit a backslash-escaped char.  Need to skip over it.

      stringStart = stringEnd + 1;

      if (stringStart == flatStr.Length()) return stringStart;

      // Go back to looking for the next escape or the string end

      continue;

    return stringEnd;

  } while (true);

  MOZ_ASSERT_UNREACHABLE("How did we get here?");

  return flatStr.Length();

static uint32_t net_FindMediaDelimiter(const nsCString& flatStr,

                                       uint32_t searchStart, char delimiter) {

  do {

    // searchStart points to the spot from which we should start looking

    // for the delimiter.

    const char delimStr[] = {delimiter, '"', '\0'};

    uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);

    if (curDelimPos == uint32_t(kNotFound)) return flatStr.Length();

    char ch = flatStr.CharAt(curDelimPos);

    if (ch == delimiter) {

      // Found delimiter

      return curDelimPos;

    // We hit the start of a quoted string.  Look for its end.

    searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);

    if (searchStart == flatStr.Length()) return searchStart;

    ++searchStart;

    // searchStart now points to the first char after the end of the

    // string, so just go back to the top of the loop and look for

    // |delimiter| again.

  } while (true);

  MOZ_ASSERT_UNREACHABLE("How did we get here?");

  return flatStr.Length();

// aOffset should be added to aCharsetStart and aCharsetEnd if this

// function sets them.

static void net_ParseMediaType(const nsACString& aMediaTypeStr,

                               nsACString& aContentType,

                               nsACString& aContentCharset, int32_t aOffset,

                               bool* aHadCharset, int32_t* aCharsetStart,

                               int32_t* aCharsetEnd, bool aStrict) {

  const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);

  const char* start = flatStr.get();

  const char* end = start + flatStr.Length();

  // Trim LWS leading and trailing whitespace from type.

  const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);

  const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";");

  const char* charset = "";

  const char* charsetEnd = charset;

  int32_t charsetParamStart = 0;

  int32_t charsetParamEnd = 0;

  uint32_t consumed = typeEnd - type;

  // Iterate over parameters

  bool typeHasCharset = false;

  uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);

  if (paramStart != uint32_t(kNotFound)) {

    // We have parameters.  Iterate over them.

    uint32_t curParamStart = paramStart + 1;

    do {

      uint32_t curParamEnd =

          net_FindMediaDelimiter(flatStr, curParamStart, ';');

      const char* paramName = net_FindCharNotInSet(

          start + curParamStart, start + curParamEnd, HTTP_LWS);

      static const char charsetStr[] = "charset=";

      if (nsCRT::strncasecmp(paramName, charsetStr, sizeof(charsetStr) - 1) ==

          0) {

        charset = paramName + sizeof(charsetStr) - 1;

        charsetEnd = start + curParamEnd;

        typeHasCharset = true;

        charsetParamStart = curParamStart - 1;

        charsetParamEnd = curParamEnd;

      consumed = curParamEnd;

      curParamStart = curParamEnd + 1;

    } while (curParamStart < flatStr.Length());

  bool charsetNeedsQuotedStringUnescaping = false;

  if (typeHasCharset) {

    // Trim LWS leading and trailing whitespace from charset.

    charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);

    if (*charset == '"') {

      charsetNeedsQuotedStringUnescaping = true;

      charsetEnd =

          start + net_FindStringEnd(flatStr, charset - start, *charset);

      charset++;

      NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");

    } else {

      charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";");

  // if the server sent "*/*", it is meaningless, so do not store it.

  // also, if type is the same as aContentType, then just update the

  // charset.  however, if charset is empty and aContentType hasn't

  // changed, then don't wipe-out an existing aContentCharset.  We

  // also want to reject a mime-type if it does not include a slash.

  // some servers give junk after the charset parameter, which may

  // include a comma, so this check makes us a bit more tolerant.

  if (type != typeEnd && memchr(type, '/', typeEnd - type) != nullptr &&

      (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end)

               : (strncmp(type, "*/*", typeEnd - type) != 0))) {

    // Common case here is that aContentType is empty

    bool eq = !aContentType.IsEmpty() &&

              aContentType.Equals(Substring(type, typeEnd),

                                  nsCaseInsensitiveCStringComparator);

    if (!eq) {

      aContentType.Assign(type, typeEnd - type);

      ToLowerCase(aContentType);

    if ((!eq && *aHadCharset) || typeHasCharset) {

      *aHadCharset = true;

      if (charsetNeedsQuotedStringUnescaping) {

        // parameters using the "quoted-string" syntax need

        // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)

        aContentCharset.Truncate();

        for (const char* c = charset; c != charsetEnd; c++) {

          if (*c == '\\' && c + 1 != charsetEnd) {

            // eat escape

            c++;

          aContentCharset.Append(*c);

      } else {

        aContentCharset.Assign(charset, charsetEnd - charset);

      if (typeHasCharset) {

        *aCharsetStart = charsetParamStart + aOffset;

        *aCharsetEnd = charsetParamEnd + aOffset;

    // Only set a new charset position if this is a different type

    // from the last one we had and it doesn't already have a

    // charset param.  If this is the same type, we probably want

    // to leave the charset position on its first occurrence.

    if (!eq && !typeHasCharset) {

      int32_t charsetStart = int32_t(paramStart);

      if (charsetStart == kNotFound) charsetStart = flatStr.Length();

      *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;

#undef HTTP_LWS

void net_ParseContentType(const nsACString& aHeaderStr,

                          nsACString& aContentType, nsACString& aContentCharset,

                          bool* aHadCharset) {

  int32_t dummy1, dummy2;

  net_ParseContentType(aHeaderStr, aContentType, aContentCharset, aHadCharset,

                       &dummy1, &dummy2);

void net_ParseContentType(const nsACString& aHeaderStr,

                          nsACString& aContentType, nsACString& aContentCharset,

                          bool* aHadCharset, int32_t* aCharsetStart,

                          int32_t* aCharsetEnd) {

//

  // Augmented BNF (from RFC 2616 section 3.7):

//

  //   header-value = media-type *( LWS "," LWS media-type )

  //   media-type   = type "/" subtype *( LWS ";" LWS parameter )

  //   type         = token

  //   subtype      = token

  //   parameter    = attribute "=" value

  //   attribute    = token

  //   value        = token | quoted-string

//

//

  // Examples:

//

  //   text/html

  //   text/html, text/html

  //   text/html,text/html; charset=ISO-8859-1

  //   text/html,text/html; charset="ISO-8859-1"

  //   text/html;charset=ISO-8859-1, text/html

  //   text/html;charset='ISO-8859-1', text/html

  //   application/octet-stream

//

  *aHadCharset = false;

  const nsCString& flatStr = PromiseFlatCString(aHeaderStr);

  // iterate over media-types.  Note that ',' characters can happen

  // inside quoted strings, so we need to watch out for that.

  uint32_t curTypeStart = 0;

  do {

    // curTypeStart points to the start of the current media-type.  We want

    // to look for its end.

    uint32_t curTypeEnd = net_FindMediaDelimiter(flatStr, curTypeStart, ',');

    // At this point curTypeEnd points to the spot where the media-type

    // starting at curTypeEnd ends.  Time to parse that!

    net_ParseMediaType(

        Substring(flatStr, curTypeStart, curTypeEnd - curTypeStart),

        aContentType, aContentCharset, curTypeStart, aHadCharset, aCharsetStart,

        aCharsetEnd, false);

    // And let's move on to the next media-type

    curTypeStart = curTypeEnd + 1;

  } while (curTypeStart < flatStr.Length());

void net_ParseRequestContentType(const nsACString& aHeaderStr,

                                 nsACString& aContentType,

                                 nsACString& aContentCharset,

                                 bool* aHadCharset) {

//

  // Augmented BNF (from RFC 7231 section 3.1.1.1):

//

  //   media-type   = type "/" subtype *( OWS ";" OWS parameter )

  //   type         = token

  //   subtype      = token

  //   parameter    = token "=" ( token / quoted-string )

//

  // Examples:

//

  //   text/html

  //   text/html; charset=ISO-8859-1

  //   text/html; charset="ISO-8859-1"

  //   application/octet-stream

//

  aContentType.Truncate();

  aContentCharset.Truncate();

  *aHadCharset = false;

  const nsCString& flatStr = PromiseFlatCString(aHeaderStr);

  // At this point curTypeEnd points to the spot where the media-type

  // starting at curTypeEnd ends.  Time to parse that!

  nsAutoCString contentType, contentCharset;

  bool hadCharset = false;

  int32_t dummy1, dummy2;

  uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ',');

  if (typeEnd != flatStr.Length()) {

    // We have some stuff left at the end, so this is not a valid

    // request Content-Type header.

    return;

  net_ParseMediaType(flatStr, contentType, contentCharset, 0, &hadCharset,

                     &dummy1, &dummy2, true);

  aContentType = contentType;

  aContentCharset = contentCharset;

  *aHadCharset = hadCharset;

bool net_IsValidHostName(const nsACString& host) {

  // The host name is limited to 253 ascii characters.

  if (host.Length() > 253) {

    return false;

  const char* end = host.EndReading();

  // Use explicit whitelists to select which characters we are

  // willing to send to lower-level DNS logic. This is more

  // self-documenting, and can also be slightly faster than the

  // blacklist approach, since DNS names are the common case, and

  // the commonest characters will tend to be near the start of

  // the list.

  // Whitelist for DNS names (RFC 1035) with extra characters added

  // for pragmatic reasons "$+_"

  // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2

  if (net_FindCharNotInSet(host.BeginReading(), end,

                           "abcdefghijklmnopqrstuvwxyz"

                           ".-0123456789"

                           "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end) {

    return true;

  // Might be a valid IPv6 link-local address containing a percent sign

  return mozilla::net::HostIsIPLiteral(host);

bool net_IsValidIPv4Addr(const nsACString& aAddr) {

  return mozilla::net::rust_net_is_valid_ipv4_addr(&aAddr);

bool net_IsValidIPv6Addr(const nsACString& aAddr) {

  return mozilla::net::rust_net_is_valid_ipv6_addr(&aAddr);

bool net_GetDefaultStatusTextForCode(uint16_t aCode, nsACString& aOutText) {

  switch (aCode) {

      // start with the most common

    case 200:

      aOutText.AssignLiteral("OK");

      break;

    case 404:

      aOutText.AssignLiteral("Not Found");

      break;

    case 301:

      aOutText.AssignLiteral("Moved Permanently");

      break;

    case 304:

      aOutText.AssignLiteral("Not Modified");

      break;

    case 307:

      aOutText.AssignLiteral("Temporary Redirect");

      break;

    case 500:

      aOutText.AssignLiteral("Internal Server Error");

      break;

      // also well known

    case 100:

      aOutText.AssignLiteral("Continue");

      break;

    case 101:

      aOutText.AssignLiteral("Switching Protocols");

      break;

    case 201:

      aOutText.AssignLiteral("Created");

      break;

    case 202:

      aOutText.AssignLiteral("Accepted");

      break;

    case 203:

      aOutText.AssignLiteral("Non Authoritative");

      break;

    case 204:

      aOutText.AssignLiteral("No Content");

      break;

    case 205:

      aOutText.AssignLiteral("Reset Content");

      break;

    case 206:

      aOutText.AssignLiteral("Partial Content");

      break;

    case 207:

      aOutText.AssignLiteral("Multi-Status");

      break;

    case 208:

      aOutText.AssignLiteral("Already Reported");

      break;

    case 300:

      aOutText.AssignLiteral("Multiple Choices");

      break;

    case 302:

      aOutText.AssignLiteral("Found");

      break;

    case 303:

      aOutText.AssignLiteral("See Other");

      break;

    case 305:

      aOutText.AssignLiteral("Use Proxy");

      break;

    case 308:

      aOutText.AssignLiteral("Permanent Redirect");

      break;

    case 400:

      aOutText.AssignLiteral("Bad Request");

      break;

    case 401:

      aOutText.AssignLiteral("Unauthorized");

      break;

    case 402:

      aOutText.AssignLiteral("Payment Required");

      break;

    case 403:

      aOutText.AssignLiteral("Forbidden");

      break;

    case 405:

      aOutText.AssignLiteral("Method Not Allowed");

      break;

    case 406:

      aOutText.AssignLiteral("Not Acceptable");

      break;

    case 407:

      aOutText.AssignLiteral("Proxy Authentication Required");

      break;

    case 408:

      aOutText.AssignLiteral("Request Timeout");

      break;

    case 409:

      aOutText.AssignLiteral("Conflict");

      break;

    case 410:

      aOutText.AssignLiteral("Gone");

      break;

    case 411:

      aOutText.AssignLiteral("Length Required");

      break;

    case 412:

      aOutText.AssignLiteral("Precondition Failed");

      break;

    case 413:

      aOutText.AssignLiteral("Request Entity Too Large");

      break;

    case 414:

      aOutText.AssignLiteral("Request URI Too Long");

      break;

    case 415:

      aOutText.AssignLiteral("Unsupported Media Type");

      break;

    case 416:

      aOutText.AssignLiteral("Requested Range Not Satisfiable");

      break;

    case 417:

      aOutText.AssignLiteral("Expectation Failed");

      break;

    case 418:

      aOutText.AssignLiteral("I'm a teapot");

      break;

    case 421:

      aOutText.AssignLiteral("Misdirected Request");

      break;

    case 422:

      aOutText.AssignLiteral("Unprocessable Entity");

      break;

    case 423:

      aOutText.AssignLiteral("Locked");

      break;

    case 424:

      aOutText.AssignLiteral("Failed Dependency");

      break;

    case 425:

      aOutText.AssignLiteral("Too Early");

      break;

    case 426:

      aOutText.AssignLiteral("Upgrade Required");

      break;

    case 428:

      aOutText.AssignLiteral("Precondition Required");

      break;

    case 429:

      aOutText.AssignLiteral("Too Many Requests");

      break;

    case 431:

      aOutText.AssignLiteral("Request Header Fields Too Large");

      break;

    case 451:

      aOutText.AssignLiteral("Unavailable For Legal Reasons");

      break;

    case 501:

      aOutText.AssignLiteral("Not Implemented");

      break;

    case 502:

      aOutText.AssignLiteral("Bad Gateway");

      break;

    case 503:

      aOutText.AssignLiteral("Service Unavailable");

      break;

    case 504:

      aOutText.AssignLiteral("Gateway Timeout");

      break;

    case 505:

      aOutText.AssignLiteral("HTTP Version Unsupported");

      break;

    case 506:

      aOutText.AssignLiteral("Variant Also Negotiates");

      break;

    case 507:

      aOutText.AssignLiteral("Insufficient Storage ");

      break;

    case 508:

      aOutText.AssignLiteral("Loop Detected");

      break;

    case 510:

      aOutText.AssignLiteral("Not Extended");

      break;

    case 511:

      aOutText.AssignLiteral("Network Authentication Required");

      break;

    default:

      aOutText.AssignLiteral("No Reason Phrase");

      return false;

  return true;

static auto MakeNameMatcher(const nsACString& aName) {

  return [&aName](const auto& param) { return param.mKey.Equals(aName); };

static void AssignMaybeInvalidUTF8String(const nsACString& aSource,

                                         nsACString& aDest) {

  if (NS_FAILED(UTF_8_ENCODING->DecodeWithoutBOMHandling(aSource, aDest))) {

    MOZ_CRASH("Out of memory when converting URL params.");

namespace mozilla {

bool URLParams::Has(const nsACString& aName) {

  return std::any_of(mParams.cbegin(), mParams.cend(), MakeNameMatcher(aName));

bool URLParams::Has(const nsACString& aName, const nsACString& aValue) {

  return std::any_of(

      mParams.cbegin(), mParams.cend(), [&aName, &aValue](const auto& param) {

        return param.mKey.Equals(aName) && param.mValue.Equals(aValue);

});

void URLParams::Get(const nsACString& aName, nsACString& aRetval) {

  aRetval.SetIsVoid(true);

  const auto end = mParams.cend();

  const auto it = std::find_if(mParams.cbegin(), end, MakeNameMatcher(aName));

  if (it != end) {

    aRetval.Assign(it->mValue);

void URLParams::GetAll(const nsACString& aName, nsTArray<nsCString>& aRetval) {

  aRetval.Clear();

  for (uint32_t i = 0, len = mParams.Length(); i < len; ++i) {

    if (mParams[i].mKey.Equals(aName)) {

      aRetval.AppendElement(mParams[i].mValue);

void URLParams::Append(const nsACString& aName, const nsACString& aValue) {

  Param* param = mParams.AppendElement();

  param->mKey = aName;

  param->mValue = aValue;

void URLParams::Set(const nsACString& aName, const nsACString& aValue) {

  Param* param = nullptr;

  for (uint32_t i = 0, len = mParams.Length(); i < len;) {

    if (!mParams[i].mKey.Equals(aName)) {

      ++i;

      continue;

    if (!param) {

      param = &mParams[i];

      ++i;

      continue;

    // Remove duplicates.

    mParams.RemoveElementAt(i);

    --len;

  if (!param) {

    param = mParams.AppendElement();

    param->mKey = aName;

  param->mValue = aValue;

void URLParams::Delete(const nsACString& aName) {

  mParams.RemoveElementsBy(

      [&aName](const auto& param) { return param.mKey.Equals(aName); });

void URLParams::Delete(const nsACString& aName, const nsACString& aValue) {

  mParams.RemoveElementsBy([&aName, &aValue](const auto& param) {

    return param.mKey.Equals(aName) && param.mValue.Equals(aValue);

});

/* static */

void URLParams::DecodeString(const nsACString& aInput, nsACString& aOutput) {

  const char* const end = aInput.EndReading();

  for (const char* iter = aInput.BeginReading(); iter != end;) {

    // replace '+' with U+0020

    if (*iter == '+') {

      aOutput.Append(' ');

      ++iter;

      continue;

    // Percent decode algorithm

    if (*iter == '%') {

      const char* const first = iter + 1;

      const char* const second = first + 1;

      const auto asciiHexDigit = [](char x) {

        return (x >= 0x41 && x <= 0x46) || (x >= 0x61 && x <= 0x66) ||

               (x >= 0x30 && x <= 0x39);

};

      const auto hexDigit = [](char x) {

        return x >= 0x30 && x <= 0x39

                   ? x - 0x30

                   : (x >= 0x41 && x <= 0x46 ? x - 0x37 : x - 0x57);

};

      if (first != end && second != end && asciiHexDigit(*first) &&

          asciiHexDigit(*second)) {

        aOutput.Append(hexDigit(*first) * 16 + hexDigit(*second));

        iter = second + 1;

      } else {

        aOutput.Append('%');

        ++iter;

      continue;

    aOutput.Append(*iter);

    ++iter;

  AssignMaybeInvalidUTF8String(aOutput, aOutput);

/* static */

bool URLParams::ParseNextInternal(const char*& aStart, const char* const aEnd,

                                  bool aShouldDecode, nsACString* aOutputName,

                                  nsACString* aOutputValue) {

  nsDependentCSubstring string;

  const char* const iter = std::find(aStart, aEnd, '&');

  if (iter != aEnd) {

    string.Rebind(aStart, iter);

    aStart = iter + 1;

  } else {

    string.Rebind(aStart, aEnd);

    aStart = aEnd;

  if (string.IsEmpty()) {

    return false;

  const auto* const eqStart = string.BeginReading();

  const auto* const eqEnd = string.EndReading();

  const auto* const eqIter = std::find(eqStart, eqEnd, '=');

  nsDependentCSubstring name;

  nsDependentCSubstring value;

  if (eqIter != eqEnd) {

    name.Rebind(eqStart, eqIter);

    value.Rebind(eqIter + 1, eqEnd);

  } else {

    name.Rebind(string, 0);

  if (aShouldDecode) {

    DecodeString(name, *aOutputName);

    DecodeString(value, *aOutputValue);

    return true;

  AssignMaybeInvalidUTF8String(name, *aOutputName);

  AssignMaybeInvalidUTF8String(value, *aOutputValue);

  return true;

/* static */

bool URLParams::Extract(const nsACString& aInput, const nsACString& aName,

                        nsACString& aValue) {

  aValue.SetIsVoid(true);

  return !URLParams::Parse(

      aInput, true,

      [&aName, &aValue](const nsACString& name, nsCString&& value) {

        if (aName == name) {

          aValue = std::move(value);

          return false;

        return true;

});

void URLParams::ParseInput(const nsACString& aInput) {

  // Remove all the existing data before parsing a new input.

  DeleteAll();

  URLParams::Parse(aInput, true, [this](nsCString&& name, nsCString&& value) {

    mParams.AppendElement(Param{std::move(name), std::move(value)});

    return true;

});

void URLParams::SerializeString(const nsACString& aInput, nsACString& aValue) {

  const unsigned char* p = (const unsigned char*)aInput.BeginReading();

  const unsigned char* end = p + aInput.Length();

  while (p != end) {

    // ' ' to '+'

    if (*p == 0x20) {

      aValue.Append(0x2B);

      // Percent Encode algorithm

    } else if (*p == 0x2A || *p == 0x2D || *p == 0x2E ||

               (*p >= 0x30 && *p <= 0x39) || (*p >= 0x41 && *p <= 0x5A) ||

               *p == 0x5F || (*p >= 0x61 && *p <= 0x7A)) {

      aValue.Append(*p);

    } else {

      aValue.AppendPrintf("%%%.2X", *p);

    ++p;

void URLParams::Serialize(nsACString& aValue, bool aEncode) const {

  aValue.Truncate();

  bool first = true;

  for (uint32_t i = 0, len = mParams.Length(); i < len; ++i) {

    if (first) {

      first = false;

    } else {

      aValue.Append('&');

    // XXX Actually, it's not necessary to build a new string object. Generally,

    // such cases could just convert each codepoint one-by-one.

    if (aEncode) {

      SerializeString(mParams[i].mKey, aValue);

      aValue.Append('=');

      SerializeString(mParams[i].mValue, aValue);

    } else {

      aValue.Append(mParams[i].mKey);

      aValue.Append('=');

      aValue.Append(mParams[i].mValue);

void URLParams::Sort() {

  mParams.StableSort([](const Param& lhs, const Param& rhs) {

    // FIXME(emilio, bug 1888901): The URLSearchParams.sort() spec requires

    // comparing by utf-16 code points... That's a bit unfortunate, maybe we

    // can optimize the string conversions here?

    return Compare(NS_ConvertUTF8toUTF16(lhs.mKey),

                   NS_ConvertUTF8toUTF16(rhs.mKey));

});

}  // namespace mozilla