nsFind.cpp - mozsearch

mozilla-central/toolkit/components/find/nsFind.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim: set ts=8 sts=2 et sw=2 tw=80: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// #define DEBUG_FIND 1

#include "nsFind.h"

#include "mozilla/Likely.h"

#include "nsIContent.h"

#include "nsINode.h"

#include "nsIFrame.h"

#include "nsIFormControl.h"

#include "nsTextFragment.h"

#include "nsString.h"

#include "nsAtom.h"

#include "nsServiceManagerUtils.h"

#include "nsUnicharUtils.h"

#include "nsUnicodeProperties.h"

#include "nsCRT.h"

#include "nsRange.h"

#include "nsReadableUtils.h"

#include "nsContentUtils.h"

#include "mozilla/DebugOnly.h"

#include "mozilla/TextEditor.h"

#include "mozilla/dom/ChildIterator.h"

#include "mozilla/dom/TreeIterator.h"

#include "mozilla/dom/Element.h"

#include "mozilla/dom/HTMLOptionElement.h"

#include "mozilla/dom/HTMLSelectElement.h"

#include "mozilla/dom/Text.h"

#include "mozilla/intl/Segmenter.h"

#include "mozilla/intl/UnicodeProperties.h"

#include "mozilla/StaticPrefs_browser.h"

using namespace mozilla;

using namespace mozilla::dom;

using namespace mozilla::unicode;

// Yikes!  Casting a char to unichar can fill with ones!

#define CHAR_TO_UNICHAR(c) ((char16_t)(unsigned char)c)

#define CH_SHY ((char16_t)0xAD)

// nsFind::Find casts CH_SHY to char before calling StripChars

// This works correctly if and only if CH_SHY <= 255

static_assert(CH_SHY <= 255, "CH_SHY is not an ascii character");

NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsFind)

  NS_INTERFACE_MAP_ENTRY(nsIFind)

  NS_INTERFACE_MAP_ENTRY(nsISupports)

NS_INTERFACE_MAP_END

NS_IMPL_CYCLE_COLLECTING_ADDREF(nsFind)

NS_IMPL_CYCLE_COLLECTING_RELEASE(nsFind)

NS_IMPL_CYCLE_COLLECTION(nsFind)

#ifdef DEBUG_FIND

#  define DEBUG_FIND_PRINTF(...) printf(__VA_ARGS__)

#else

#  define DEBUG_FIND_PRINTF(...) /* nothing */

#endif

static nsIContent& AnonymousSubtreeRootParentOrHost(const nsINode& aNode) {

  MOZ_ASSERT(aNode.IsInNativeAnonymousSubtree());

  return *aNode.GetClosestNativeAnonymousSubtreeRootParentOrHost();

static void DumpNode(const nsINode* aNode) {

#ifdef DEBUG_FIND

  if (!aNode) {

    printf(">>>> Node: NULL\n");

    return;

  nsString nodeName = aNode->NodeName();

  if (aNode->IsText()) {

    nsAutoString newText;

    aNode->AsText()->AppendTextTo(newText);

    printf(">>>> Text node (node name %s): '%s'\n",

           NS_LossyConvertUTF16toASCII(nodeName).get(),

           NS_LossyConvertUTF16toASCII(newText).get());

  } else {

    printf(">>>> Node: %s\n", NS_LossyConvertUTF16toASCII(nodeName).get());

#endif

static bool IsBlockNode(const nsIContent* aContent) {

  if (aContent->IsElement() && aContent->AsElement()->IsDisplayContents()) {

    return false;

  // FIXME(emilio): This is dubious...

  if (aContent->IsAnyOfHTMLElements(nsGkAtoms::img, nsGkAtoms::hr,

                                    nsGkAtoms::th, nsGkAtoms::td)) {

    return true;

  nsIFrame* frame = aContent->GetPrimaryFrame();

  if (!frame) {

    return false;

  const auto& disp = *frame->StyleDisplay();

  // We also treat internal table frames as "blocks" for the purpose of

  // locating boundaries for searches (see

  // https://bugzilla.mozilla.org/show_bug.cgi?id=1645990).

  return disp.IsBlockOutsideStyle() || disp.IsInternalTableStyleExceptCell();

static bool IsDisplayedNode(const nsINode* aNode) {

  if (!aNode->IsContent()) {

    return false;

  if (aNode->AsContent()->GetPrimaryFrame()) {

    return true;

  // If there's no frame, it's not displayed, unless it's display: contents.

  return aNode->IsElement() && aNode->AsElement()->IsDisplayContents();

static bool IsRubyAnnotationNode(const nsINode* aNode) {

  if (!aNode->IsContent()) {

    return false;

  nsIFrame* frame = aNode->AsContent()->GetPrimaryFrame();

  if (!frame) {

    return false;

  StyleDisplay display = frame->StyleDisplay()->mDisplay;

  return StyleDisplay::RubyText == display ||

         StyleDisplay::RubyTextContainer == display;

static bool IsFindableNode(const nsINode* aNode) {

  if (!IsDisplayedNode(aNode)) {

    return false;

  nsIFrame* frame = aNode->AsContent()->GetPrimaryFrame();

  if (!frame) {

    // display: contents

    return true;

  if (frame->StyleUI()->IsInert()) {

    return false;

  if (!frame->StyleVisibility()->IsVisible()) {

    return false;

  const bool isContentVisibilityHidden =

      frame->HidesContent(nsIFrame::IncludeContentVisibility::Hidden) ||

      frame->IsHiddenByContentVisibilityOnAnyAncestor(

          nsIFrame::IncludeContentVisibility::Hidden);

  if (isContentVisibilityHidden) {

    return frame->IsHiddenUntilFoundOrClosedDetails();

  return true;

static bool ShouldFindAnonymousContent(const nsIContent& aContent) {

  MOZ_ASSERT(aContent.IsInNativeAnonymousSubtree());

  nsIContent& host = AnonymousSubtreeRootParentOrHost(aContent);

  if (const auto* formControl = nsIFormControl::FromNode(&host)) {

    if (formControl->IsTextControl(/* aExcludePassword = */ true)) {

      // Only editable NAC in textfields should be findable. That is, we want to

      // find "bar" in `<input value="bar">`, but not in `<input

      // placeholder="bar">`.

//

      // TODO(emilio): Ideally we could lift this restriction, but we hide the

      // placeholder text at paint-time instead of with CSS visibility, which

      // means that we won't skip it even if invisible. We should probably fix

      // that.

      return aContent.IsEditable();

    // We want to avoid finding in password inputs anyway, as it is confusing.

    if (formControl->ControlType() == FormControlType::InputPassword) {

      return false;

  return true;

static bool SkipNode(const nsIContent* aContent) {

  const nsIContent* content = aContent;

  while (content) {

    if (!IsDisplayedNode(content) || content->IsComment() ||

        content->IsAnyOfHTMLElements(nsGkAtoms::select)) {

      DEBUG_FIND_PRINTF("Skipping node: ");

      DumpNode(content);

      return true;

    // Skip option nodes if their select is a combo box, or if they

    // have no select (somehow).

    if (const auto* option = HTMLOptionElement::FromNode(content)) {

      auto* select = HTMLSelectElement::FromNodeOrNull(option->GetParent());

      if (!select || select->IsCombobox()) {

        DEBUG_FIND_PRINTF("Skipping node: ");

        DumpNode(content);

        return true;

    if (StaticPrefs::browser_find_ignore_ruby_annotations() &&

        IsRubyAnnotationNode(content)) {

      DEBUG_FIND_PRINTF("Skipping node: ");

      DumpNode(content);

      return true;

    if (content->IsInNativeAnonymousSubtree() &&

        !ShouldFindAnonymousContent(*content)) {

      DEBUG_FIND_PRINTF("Skipping node: ");

      DumpNode(content);

      return true;

    // Only climb to the nearest block node

    if (IsBlockNode(content)) {

      return false;

    content = content->GetFlattenedTreeParent();

  return false;

static const nsIContent* GetBlockParent(const Text& aNode) {

  for (const nsIContent* current = aNode.GetFlattenedTreeParent(); current;

       current = current->GetFlattenedTreeParent()) {

    if (IsBlockNode(current)) {

      return current;

  return nullptr;

static bool NonTextNodeForcesBreak(const nsINode& aNode) {

  nsIFrame* frame =

      aNode.IsContent() ? aNode.AsContent()->GetPrimaryFrame() : nullptr;

  // TODO(emilio): Maybe we should treat <br> more like a space instead of a

  // forced break? Unclear...

  return frame && frame->IsBrFrame();

static bool ForceBreakBetweenText(const Text& aPrevious, const Text& aNext) {

  return GetBlockParent(aPrevious) != GetBlockParent(aNext);

struct nsFind::State final {

  State(bool aFindBackward, nsIContent& aRoot, const RangeBoundary& aStartPoint)

      : mFindBackward(aFindBackward),

        mInitialized(false),

        mFoundBreak(false),

        mIterOffset(-1),

        mIterator(aRoot),

        mStartPoint(aStartPoint) {}

  void PositionAt(Text& aNode) { mIterator.Seek(aNode); }

  bool ForcedBreak() const { return mFoundBreak; }

  Text* GetCurrentNode() const {

    if (MOZ_UNLIKELY(!mInitialized)) {

      return nullptr;

    nsINode* node = mIterator.GetCurrent();

    MOZ_ASSERT(!node || node->IsText());

    return node ? node->GetAsText() : nullptr;

  Text* GetNextNode(bool aAlreadyMatching) {

    if (MOZ_UNLIKELY(!mInitialized)) {

      MOZ_ASSERT(!aAlreadyMatching);

      Initialize();

    } else {

      Advance(Initializing::No, aAlreadyMatching);

      mIterOffset = -1;  // mIterOffset only really applies to the first node.

    return GetCurrentNode();

 private:

  enum class Initializing { No, Yes };

  // Advance to the next visible text-node.

  void Advance(Initializing, bool aAlreadyMatching);

  // Sets up the first node position and offset.

  void Initialize();

  // Returns whether the node should be used (true) or skipped over (false)

  static bool AnalyzeNode(const nsINode& aNode, const Text* aPrev,

                          bool aAlreadyMatching, bool* aForcedBreak) {

    if (!aNode.IsText()) {

      *aForcedBreak = *aForcedBreak || NonTextNodeForcesBreak(aNode);

      return false;

    if (SkipNode(aNode.AsText())) {

      return false;

    *aForcedBreak = *aForcedBreak ||

                    (aPrev && ForceBreakBetweenText(*aPrev, *aNode.AsText()));

    if (*aForcedBreak) {

      // If we've already found a break, we can stop searching and just use this

      // node, regardless of the subtree we're on. There's no point to continue

      // a match across different blocks, regardless of which subtree you're

      // looking into.

      return true;

    // TODO(emilio): We can't represent ranges that span native anonymous /

    // shadow tree boundaries, but if we did the following check could / should

    // be removed.

    if (aAlreadyMatching && aPrev &&

        !nsContentUtils::IsInSameAnonymousTree(&aNode, aPrev)) {

      // As an optimization, if we were finding inside an native-anonymous

      // subtree (like a pseudo-element), we know those trees are "atomic" and

      // can't have any other subtrees in between, so we can just break the

      // match here.

      if (aPrev->IsInNativeAnonymousSubtree()) {

        *aForcedBreak = true;

        return true;

      // Otherwise we can skip the node and keep looking past this subtree.

      return false;

    return true;

  const bool mFindBackward;

  // Whether we've called GetNextNode() at least once.

  bool mInitialized;

 public:

  // Whether we've found a forced break from the last node to the current one.

  bool mFoundBreak;

  // An offset into the text of the first node we're starting to search at.

  int mIterOffset;

  TreeIterator<StyleChildrenIterator> mIterator;

  // These are only needed for the first GetNextNode() call.

  const RangeBoundary& mStartPoint;

};

void nsFind::State::Advance(Initializing aInitializing, bool aAlreadyMatching) {

  MOZ_ASSERT(mInitialized);

  // The Advance() call during Initialize() calls us in a partial state, where

  // mIterator may not be pointing to a text node yet. aInitializing prevents

  // tripping the invariants of GetCurrentNode().

  const Text* prev =

      aInitializing == Initializing::Yes ? nullptr : GetCurrentNode();

  mFoundBreak = false;

  while (true) {

    nsIContent* current =

        mFindBackward ? mIterator.GetPrev() : mIterator.GetNext();

    if (!current) {

      return;

    if (AnalyzeNode(*current, prev, aAlreadyMatching, &mFoundBreak)) {

      break;

void nsFind::State::Initialize() {

  MOZ_ASSERT(!mInitialized);

  mInitialized = true;

  mIterOffset = mFindBackward ? -1 : 0;

  nsINode* container = mStartPoint.GetContainer();

  // Set up ourselves at the first node we want to start searching at.

  nsIContent* beginning = mStartPoint.GetChildAtOffset();

  if (beginning) {

    mIterator.Seek(*beginning);

    // If the start point is pointing to a node, when looking backwards we'd

    // start looking at the children of that node, and we don't really want

    // that. When looking forwards, we look at the next sibling afterwards.

    if (mFindBackward) {

      mIterator.GetPrevSkippingChildren();

  } else if (container && container->IsContent()) {

    // Text-only range, or pointing to past the end of the node, for example.

    mIterator.Seek(*container->AsContent());

  nsINode* current = mIterator.GetCurrent();

  if (!current) {

    return;

  const bool kAlreadyMatching = false;

  if (!AnalyzeNode(*current, nullptr, kAlreadyMatching, &mFoundBreak)) {

    Advance(Initializing::Yes, kAlreadyMatching);

    current = mIterator.GetCurrent();

    if (!current) {

      return;

  if (current != container) {

    return;

  mIterOffset = int(

      *mStartPoint.Offset(RangeBoundary::OffsetFilter::kValidOrInvalidOffsets));

class MOZ_STACK_CLASS nsFind::StateRestorer final {

 public:

  explicit StateRestorer(State& aState)

      : mState(aState),

        mIterOffset(aState.mIterOffset),

        mFoundBreak(aState.mFoundBreak),

        mCurrNode(aState.GetCurrentNode()) {}

  ~StateRestorer() {

    mState.mFoundBreak = mFoundBreak;

    mState.mIterOffset = mIterOffset;

    if (mCurrNode) {

      mState.PositionAt(*mCurrNode);

 private:

  State& mState;

  int32_t mIterOffset;

  bool mFoundBreak;

  Text* mCurrNode;

};

NS_IMETHODIMP

nsFind::GetFindBackwards(bool* aFindBackward) {

  if (!aFindBackward) {

    return NS_ERROR_NULL_POINTER;

  *aFindBackward = mFindBackward;

  return NS_OK;

NS_IMETHODIMP

nsFind::SetFindBackwards(bool aFindBackward) {

  mFindBackward = aFindBackward;

  return NS_OK;

NS_IMETHODIMP

nsFind::GetCaseSensitive(bool* aCaseSensitive) {

  if (!aCaseSensitive) {

    return NS_ERROR_NULL_POINTER;

  *aCaseSensitive = mCaseSensitive;

  return NS_OK;

NS_IMETHODIMP

nsFind::SetCaseSensitive(bool aCaseSensitive) {

  mCaseSensitive = aCaseSensitive;

  return NS_OK;

/* attribute boolean entireWord; */

NS_IMETHODIMP

nsFind::GetEntireWord(bool* aEntireWord) {

  if (!aEntireWord) return NS_ERROR_NULL_POINTER;

  *aEntireWord = mWordStartBounded && mWordEndBounded;

  return NS_OK;

NS_IMETHODIMP

nsFind::SetEntireWord(bool aEntireWord) {

  mWordStartBounded = mWordEndBounded = aEntireWord;

  return NS_OK;

NS_IMETHODIMP

nsFind::GetMatchDiacritics(bool* aMatchDiacritics) {

  if (!aMatchDiacritics) {

    return NS_ERROR_NULL_POINTER;

  *aMatchDiacritics = mMatchDiacritics;

  return NS_OK;

NS_IMETHODIMP

nsFind::SetMatchDiacritics(bool aMatchDiacritics) {

  mMatchDiacritics = aMatchDiacritics;

  return NS_OK;

// Here begins the find code. A ten-thousand-foot view of how it works: Find

// needs to be able to compare across inline (but not block) nodes, e.g. find

// for "abc" should match a<b>b</b>c. So after we've searched a node, we're not

// done with it; in the case of a partial match we may need to reset the

// iterator to go back to a previously visited node, so we always save the

// "match anchor" node and offset.

//

// Text nodes store their text in an nsTextFragment, which is effectively a

// union of a one-byte string or a two-byte string. Single and double strings

// are intermixed in the dom. We don't have string classes which can deal with

// intermixed strings, so all the handling is done explicitly here.

char32_t nsFind::DecodeChar(const char16_t* t2b, int32_t* index) const {

  char32_t c = t2b[*index];

  if (mFindBackward) {

    if (*index >= 1 && NS_IS_SURROGATE_PAIR(t2b[*index - 1], t2b[*index])) {

      c = SURROGATE_TO_UCS4(t2b[*index - 1], t2b[*index]);

      (*index)--;

  } else {

    if (NS_IS_SURROGATE_PAIR(t2b[*index], t2b[*index + 1])) {

      c = SURROGATE_TO_UCS4(t2b[*index], t2b[*index + 1]);

      (*index)++;

  return c;

bool nsFind::BreakInBetween(char32_t x, char32_t y) {

  nsAutoStringN<4> text;

  AppendUCS4ToUTF16(x, text);

  const uint32_t x16Len = text.Length();

  AppendUCS4ToUTF16(y, text);

  mWordBreakIter.Reset(text);

  return *mWordBreakIter.Seek(x16Len - 1) == x16Len;

char32_t nsFind::PeekNextChar(State& aState, bool aAlreadyMatching) const {

  // We need to restore the necessary state before this function returns.

  StateRestorer restorer(aState);

  while (true) {

    const Text* text = aState.GetNextNode(aAlreadyMatching);

    if (!text || aState.ForcedBreak()) {

      return L'\0';

    const nsTextFragment& frag = text->TextFragment();

    uint32_t len = frag.GetLength();

    if (!len) {

      continue;

    const char16_t* t2b = nullptr;

    const char* t1b = nullptr;

    if (frag.Is2b()) {

      t2b = frag.Get2b();

    } else {

      t1b = frag.Get1b();

    int32_t index = mFindBackward ? len - 1 : 0;

    return t1b ? CHAR_TO_UNICHAR(t1b[index]) : DecodeChar(t2b, &index);

#define NBSP_CHARCODE (CHAR_TO_UNICHAR(160))

// Bug 1943846: The char16_t version of nsCRT::IsAsciiSpace is not working as

// expected for characters with UTF32 > 0xFFFF. Add a check for UTF32 characters

static bool IsSpace(char16_t aChar) {

  return nsCRT::IsAsciiSpace(aChar) || aChar == NBSP_CHARCODE;

static bool IsSpace(char32_t aChar) {

  return aChar <= 0xffff && IsSpace(char16_t(aChar));

#define OVERFLOW_PINDEX (mFindBackward ? pindex < 0 : pindex > patLen)

#define DONE_WITH_PINDEX (mFindBackward ? pindex <= 0 : pindex >= patLen)

// Take nodes out of the tree with NextNode, until null (NextNode will return 0

// at the end of our range).

NS_IMETHODIMP

nsFind::Find(const nsAString& aPatText, nsRange* aSearchRange,

             nsRange* aStartPoint, nsRange* aEndPoint, nsRange** aRangeRet) {

  DEBUG_FIND_PRINTF("============== nsFind::Find('%s'%s, %p, %p, %p)\n",

                    NS_LossyConvertUTF16toASCII(aPatText).get(),

                    mFindBackward ? " (backward)" : " (forward)",

                    (void*)aSearchRange, (void*)aStartPoint, (void*)aEndPoint);

  NS_ENSURE_ARG(aSearchRange);

  NS_ENSURE_ARG(aStartPoint);

  NS_ENSURE_ARG(aEndPoint);

  NS_ENSURE_ARG_POINTER(aRangeRet);

  *aRangeRet = nullptr;

  if (RefPtr findResult = FindFromRangeBoundaries(

          aPatText, aStartPoint->StartRef(), aEndPoint->EndRef())) {

    findResult.forget(aRangeRet);

  return NS_OK;

already_AddRefed<nsRange> nsFind::FindFromRangeBoundaries(

    const nsAString& aPatText, const mozilla::RangeBoundary& aStartPoint,

    const mozilla::RangeBoundary& aEndPoint) {

  if (!aStartPoint.IsSetAndInComposedDoc() ||

      !aEndPoint.IsSetAndInComposedDoc()) {

    return nullptr;

  MOZ_ASSERT(aStartPoint.IsSetAndValid());

  MOZ_ASSERT(aEndPoint.IsSetAndValid());

  // set up node index cache. If there is no cache given from the outside

  // (for repeated calls to `Find()`), set up a local one.

  nsContentUtils::NodeIndexCache localCache;

  AutoRestore<nsContentUtils::NodeIndexCache*> restoreCache(mNodeIndexCache);

  if (!mNodeIndexCache) {

    mNodeIndexCache = &localCache;

#if MOZ_DIAGNOSTIC_ASSERT_ENABLED

  auto cmp =

      nsContentUtils::ComparePoints(aStartPoint, aEndPoint, mNodeIndexCache);

  MOZ_DIAGNOSTIC_ASSERT(cmp, "Start and end points in different trees?");

  MOZ_DIAGNOSTIC_ASSERT(*cmp != 1, "Start point must not be after end point");

#endif

  if (MOZ_UNLIKELY(aStartPoint == aEndPoint)) {

    return nullptr;

  const Document* document = aStartPoint.GetContainer()->GetComposedDoc();

  if (!document) {

    return nullptr;

  Element* root = document->GetRootElement();

  if (!root) {

    return nullptr;

  nsAutoString patAutoStr(aPatText);

  if (!mCaseSensitive) {

    ToFoldedCase(patAutoStr);

  if (!mMatchDiacritics) {

    ToNaked(patAutoStr);

  // Ignore soft hyphens in the pattern

  static const char16_t kShy[] = {CH_SHY, 0};

  patAutoStr.StripChars(kShy);

  const char16_t* patStr = patAutoStr.get();

  const int32_t patLen = int(patAutoStr.Length()) - 1;

  // If this function is called with an empty string, we should early exit.

  if (patLen < 0) {

    return nullptr;

  const int32_t patternStart = mFindBackward ? patLen : 0;

  // current offset into the pattern -- reset to beginning/end:

  int32_t pindex = patternStart;

  // Current offset into the fragment

  int32_t findex = 0;

  // Direction to move pindex and ptr*

  const int incr = mFindBackward ? -1 : 1;

  const nsTextFragment* frag = nullptr;

  int32_t fragLen = 0;

  // Pointers into the current fragment:

  const char16_t* t2b = nullptr;

  const char* t1b = nullptr;

  // Keep track of when we're in whitespace:

  // (only matters when we're matching)

  bool inWhitespace = false;

  // Place to save the range start point in case we find a match:

  Text* matchAnchorNode = nullptr;

  int32_t matchAnchorOffset = 0;

  char32_t matchAnchorChar = 0;

  // Get the end point, so we know when to end searches:

  const RangeBoundary& endPoint = mFindBackward ? aStartPoint : aEndPoint;

  char32_t c = 0;

  char32_t patc = 0;

  char32_t prevCharInMatch = 0;

  State state(mFindBackward, *root, mFindBackward ? aEndPoint : aStartPoint);

  Text* current = nullptr;

  auto EndPartialMatch = [&]() -> bool {

    // If we didn't match, go back to the beginning of patStr, and set findex

    // back to the next char after we started the current match.

//

    // There's no need to do this if we're still at the beginning of the pattern

    // (this can happen e.g. with whitespace, and prevents exponential

    // complexity when scanning a pattern that starts with whitespace).

    const bool restart = !!matchAnchorNode && pindex != patternStart;

    if (restart) {  // we're ending a partial match

      findex = matchAnchorOffset;

      state.mIterOffset = matchAnchorOffset;

      c = matchAnchorChar;

      // +incr will be added to findex when we continue

      // Are we going back to a previous node?

      if (matchAnchorNode != state.GetCurrentNode()) {

        frag = nullptr;

        state.PositionAt(*matchAnchorNode);

        DEBUG_FIND_PRINTF("Repositioned anchor node\n");

      DEBUG_FIND_PRINTF(

          "Ending a partial match; findex -> %d, mIterOffset -> %d\n", findex,

          state.mIterOffset);

    matchAnchorNode = nullptr;

    matchAnchorOffset = 0;

    matchAnchorChar = 0;

    inWhitespace = false;

    prevCharInMatch = 0;

    pindex = patternStart;

    DEBUG_FIND_PRINTF("Setting findex back to %d, pindex to %d\n", findex,

                      pindex);

    return restart;

};

  while (true) {

    DEBUG_FIND_PRINTF("Loop (pindex = %d)...\n", pindex);

    // If this is our first time on a new node, reset the pointers:

    if (!frag) {

      current = state.GetNextNode(!!matchAnchorNode);

      if (!current) {

        DEBUG_FIND_PRINTF("Reached the end, matching: %d\n", !!matchAnchorNode);

        if (EndPartialMatch()) {

          continue;

        return nullptr;

      // We have a new text content. See if we need to force a break due to

      // <br>, different blocks or what not.

      if (state.ForcedBreak()) {

        DEBUG_FIND_PRINTF("Forced break!\n");

        if (EndPartialMatch()) {

          continue;

        // This ensures word breaking thinks it has a new word, which is

        // effectively what we want.

        c = 0;

      frag = &current->TextFragment();

      fragLen = int32_t(frag->GetLength());

      // Set our starting point in this node. If we're going back to the anchor

      // node, which means that we just ended a partial match, use the saved

      // offset:

//

      // FIXME(emilio): How could current ever be the anchor node, if we had not

      // seen current so far?

      if (current == matchAnchorNode) {

        findex = matchAnchorOffset + (mFindBackward ? 1 : 0);

      } else if (state.mIterOffset >= 0) {

        findex = state.mIterOffset - (mFindBackward ? 1 : 0);

      } else {

        findex = mFindBackward ? (fragLen - 1) : 0;

      // Offset can only apply to the first node:

      state.mIterOffset = -1;

      DEBUG_FIND_PRINTF("Starting from offset %d of %d\n", findex, fragLen);

      // If this is outside the bounds of the string, then skip this node:

      if (findex < 0 || findex > fragLen - 1) {

        DEBUG_FIND_PRINTF(

            "At the end of a text node -- skipping to the next\n");

        frag = nullptr;

        continue;

      if (frag->Is2b()) {

        t2b = frag->Get2b();

        t1b = nullptr;

#ifdef DEBUG_FIND

        nsAutoString str2(t2b, fragLen);

        DEBUG_FIND_PRINTF("2 byte, '%s'\n",

                          NS_LossyConvertUTF16toASCII(str2).get());

#endif

      } else {

        t1b = frag->Get1b();

        t2b = nullptr;

#ifdef DEBUG_FIND

        nsAutoCString str1(t1b, fragLen);

        DEBUG_FIND_PRINTF("1 byte, '%s'\n", str1.get());

#endif

    } else {

      // Still on the old node. Advance the pointers, then see if we need to

      // pull a new node.

      findex += incr;

      DEBUG_FIND_PRINTF("Same node -- (%d, %d)\n", pindex, findex);

      if (mFindBackward ? (findex < 0) : (findex >= fragLen)) {

        DEBUG_FIND_PRINTF(

            "Will need to pull a new node: mAO = %d, frag len=%d\n",

            matchAnchorOffset, fragLen);

        // Done with this node.  Pull a new one.

        frag = nullptr;

        continue;

    // Have we gone past the endpoint yet? If we have, and we're not in the

    // middle of a match, return.

    if (auto cmp = nsContentUtils::ComparePoints(

            RawRangeBoundary(state.GetCurrentNode(), findex), endPoint,

            mNodeIndexCache)) {

      if ((mFindBackward && *cmp < 0) || (!mFindBackward && *cmp > 0)) {

        DEBUG_FIND_PRINTF("Reached the end and not in the middle of a match\n");

        return nullptr;

    // Save the previous character for word boundary detection

    char32_t prevChar = c;

    // The two characters we'll be comparing are c and patc. If not matching

    // diacritics, don't leave c set to a combining diacritical mark. (patc is

    // already guaranteed to not be a combining diacritical mark.)

    c = (t2b ? DecodeChar(t2b, &findex) : CHAR_TO_UNICHAR(t1b[findex]));

    if (!mMatchDiacritics && IsCombiningDiacritic(c) &&

        !intl::UnicodeProperties::IsMathOrMusicSymbol(prevChar)) {

      continue;

    patc = DecodeChar(patStr, &pindex);

    DEBUG_FIND_PRINTF(

        "Comparing '%c'=%#x to '%c'=%#x (%d of %d), findex=%d%s\n", (char)c,

        (int)c, (char)patc, (int)patc, pindex, patLen, findex,

        inWhitespace ? " (inWhitespace)" : "");

    // Do we need to go back to non-whitespace mode? If inWhitespace, then this

    // space in the pat str has already matched at least one space in the

    // document.

    if (inWhitespace && !IsSpace(c)) {

      inWhitespace = false;

      pindex += incr;

#ifdef DEBUG

      // This shouldn't happen -- if we were still matching, and we were at the

      // end of the pat string, then we should have caught it in the last

      // iteration and returned success.

      if (OVERFLOW_PINDEX) {

        NS_ASSERTION(false, "Missed a whitespace match");

#endif

      patc = DecodeChar(patStr, &pindex);

    if (!inWhitespace && IsSpace(patc)) {

      inWhitespace = true;

    } else if (!inWhitespace) {

      if (!mCaseSensitive) {

        c = ToFoldedCase(c);

      if (!mMatchDiacritics) {

        c = ToNaked(c);

    if (c == CH_SHY) {

      // ignore soft hyphens in the document

      continue;

    if (pindex != patternStart && c != patc && !inWhitespace) {

      // A non-matching '\n' between CJ characters is ignored

      if (c == '\n' && t2b && IS_CJ_CHAR(prevCharInMatch)) {

        int32_t nindex = findex + incr;

        if (mFindBackward ? (nindex >= 0) : (nindex < fragLen)) {

          if (IS_CJ_CHAR(t2b[nindex])) {

            continue;

      // We also ignore ZWSP and other default-ignorable characters.

      if (IsDefaultIgnorable(c)) {

        continue;

    // Figure whether the previous char is a word-breaking one,

    // if we care about word boundaries.

    bool wordBreakPrev = true;

    if (mWordStartBounded && prevChar) {

      if (prevChar == NBSP_CHARCODE) {

        prevChar = CHAR_TO_UNICHAR(' ');

      wordBreakPrev = BreakInBetween(prevChar, c);

    // Compare. Match if we're in whitespace and c is whitespace, or if the

    // characters match and at least one of the following is true:

    // a) we're not matching the entire word

    // b) a match has already been stored

    // c) the previous character is a different "class" than the current

    // character.

    if ((c == patc && (!(mWordStartBounded || mWordEndBounded) ||

                       matchAnchorNode || wordBreakPrev)) ||

        (inWhitespace && IsSpace(c))) {

      prevCharInMatch = c;

      if (inWhitespace) {

        DEBUG_FIND_PRINTF("YES (whitespace)(%d of %d)\n", pindex, patLen);

      } else {

        DEBUG_FIND_PRINTF("YES! '%c' == '%c' (%d of %d)\n", c, patc, pindex,

                          patLen);

      // Save the range anchors if we haven't already:

      if (!matchAnchorNode) {

        matchAnchorNode = state.GetCurrentNode();

        matchAnchorOffset = findex;

        if (!IS_IN_BMP(c)) {

          matchAnchorOffset -= incr;

        matchAnchorChar = c;

      // Are we done?

      if (DONE_WITH_PINDEX) {

        // Matched the whole string!

        DEBUG_FIND_PRINTF("Found a match!\n");

        // Make the range:

        // Check for word break (if necessary)

        if (mWordEndBounded || inWhitespace) {

          int32_t nextfindex = findex + incr;

          char32_t nextChar;

          // If still in array boundaries, get nextChar.

          if (mFindBackward ? (nextfindex >= 0) : (nextfindex < fragLen)) {

            if (t2b) {

              nextChar = DecodeChar(t2b, &nextfindex);

            } else {

              nextChar = CHAR_TO_UNICHAR(t1b[nextfindex]);

          } else {

            // Get next character from the next node.

            nextChar = PeekNextChar(state, !!matchAnchorNode);

          if (nextChar == NBSP_CHARCODE) {

            nextChar = CHAR_TO_UNICHAR(' ');

          // If a word break isn't there when it needs to be, reset search.

          if (mWordEndBounded && nextChar && !BreakInBetween(c, nextChar)) {

            matchAnchorNode = nullptr;

            continue;

          if (inWhitespace && IsSpace(nextChar)) {

            // If the next character is also an space, keep going, this space

            // will collapse.

            continue;

        int32_t matchStartOffset;

        int32_t matchEndOffset;

        // convert char index to range point:

        int32_t mao = matchAnchorOffset + (mFindBackward ? 1 : 0);

        Text* startParent;

        Text* endParent;

        if (mFindBackward) {

          startParent = current;

          endParent = matchAnchorNode;

          matchStartOffset = findex;

          matchEndOffset = mao;

        } else {

          startParent = matchAnchorNode;

          endParent = current;

          matchStartOffset = mao;

          matchEndOffset = findex + 1;

        RefPtr<nsRange> range = nsRange::Create(current);

        if (startParent && endParent && IsFindableNode(startParent) &&

            IsFindableNode(endParent)) {

          IgnoredErrorResult rv;

          range->SetStart(*startParent, matchStartOffset, rv);

          if (!rv.Failed()) {

            range->SetEnd(*endParent, matchEndOffset, rv);

          // https://html.spec.whatwg.org/#interaction-with-details-and-hidden=until-found

          NS_DispatchToMainThread(NS_NewRunnableFunction(

              "RevealHiddenUntilFound",

              [node = RefPtr(startParent)]()

                  MOZ_CAN_RUN_SCRIPT_BOUNDARY_LAMBDA {

                    node->RevealAncestorClosedDetails();

                    node->RevealAncestorHiddenUntilFoundAndFireBeforematchEvent(

                        IgnoreErrors());

                  }));

          if (!rv.Failed()) {

            return range.forget();

        // This match is no good, continue on in document

        matchAnchorNode = nullptr;

      if (matchAnchorNode) {

        // Not done, but still matching. Advance and loop around for the next

        // characters. But don't advance from a space to a non-space:

        if (!inWhitespace || DONE_WITH_PINDEX ||

            IsSpace(patStr[pindex + incr])) {

          pindex += incr;

          inWhitespace = false;

          DEBUG_FIND_PRINTF("Advancing pindex to %d\n", pindex);

        continue;

    DEBUG_FIND_PRINTF("NOT: %c == %c\n", c, patc);

    EndPartialMatch();