nsUnicharUtils.h - mozsearch

Enable keyboard shortcuts

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsUnicharUtils_h__

#define nsUnicharUtils_h__

#include "nsString.h"

/* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */

/* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables          */

#define IS_CJ_CHAR(u)                                                          \

  ((0x2e80u <= (u) && (u) <= 0x312fu) || (0x3190u <= (u) && (u) <= 0xabffu) || \

   (0xf900u <= (u) && (u) <= 0xfaffu) || (0xff00u <= (u) && (u) <= 0xffefu))

#define IS_ZERO_WIDTH_SPACE(u) ((u) == 0x200B)

#define IS_ASCII(u) ((u) < 0x80)

#define IS_ASCII_UPPER(u) (('A' <= (u)) && ((u) <= 'Z'))

#define IS_ASCII_LOWER(u) (('a' <= (u)) && ((u) <= 'z'))

#define IS_ASCII_ALPHA(u) (IS_ASCII_UPPER(u) || IS_ASCII_LOWER(u))

#define IS_ASCII_SPACE(u) (' ' == (u))

void ToLowerCase(nsAString& aString);

void ToLowerCaseASCII(nsAString& aString);

void ToUpperCase(nsAString& aString);

void ToLowerCase(const nsAString& aSource, nsAString& aDest);

void ToLowerCaseASCII(const nsAString& aSource, nsAString& aDest);

void ToUpperCase(const nsAString& aSource, nsAString& aDest);

uint32_t ToLowerCase(uint32_t aChar);

uint32_t ToUpperCase(uint32_t aChar);

uint32_t ToTitleCase(uint32_t aChar);

void ToLowerCase(const char16_t* aIn, char16_t* aOut, size_t aLen);

void ToLowerCaseASCII(const char16_t* aIn, char16_t* aOut, size_t aLen);

void ToUpperCase(const char16_t* aIn, char16_t* aOut, size_t aLen);

char ToLowerCaseASCII(const char aChar);

char16_t ToLowerCaseASCII(const char16_t aChar);

char32_t ToLowerCaseASCII(const char32_t aChar);

char ToUpperCaseASCII(const char aChar);

char16_t ToUpperCaseASCII(const char16_t aChar);

char32_t ToUpperCaseASCII(const char32_t aChar);

inline bool IsUpperCase(uint32_t c) { return ToLowerCase(c) != c; }

inline bool IsLowerCase(uint32_t c) { return ToUpperCase(c) != c; }

#ifdef MOZILLA_INTERNAL_API

uint32_t ToFoldedCase(uint32_t aChar);

void ToFoldedCase(nsAString& aString);

void ToFoldedCase(const char16_t* aIn, char16_t* aOut, size_t aLen);

uint32_t ToNaked(uint32_t aChar);

void ToNaked(nsAString& aString);

int32_t nsCaseInsensitiveStringComparator(const char16_t*, const char16_t*,

                                          size_t, size_t);

int32_t nsCaseInsensitiveUTF8StringComparator(const char*, const char*, size_t,

                                              size_t);

class nsCaseInsensitiveStringArrayComparator {

 public:

  template <class A, class B>

  bool Equals(const A& a, const B& b) const {

    return a.Equals(b, nsCaseInsensitiveStringComparator);

};

int32_t nsASCIICaseInsensitiveStringComparator(const char16_t*, const char16_t*,

                                               size_t, size_t);

inline bool CaseInsensitiveFindInReadable(

    const nsAString& aPattern, nsAString::const_iterator& aSearchStart,

    nsAString::const_iterator& aSearchEnd) {

  return FindInReadable(aPattern, aSearchStart, aSearchEnd,

                        nsCaseInsensitiveStringComparator);

inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern,

                                          const nsAString& aHay) {

  nsAString::const_iterator searchBegin, searchEnd;

  return FindInReadable(aPattern, aHay.BeginReading(searchBegin),

                        aHay.EndReading(searchEnd),

                        nsCaseInsensitiveStringComparator);

#endif  // MOZILLA_INTERNAL_API

int32_t CaseInsensitiveCompare(const char16_t* a, const char16_t* b,

                               size_t len);

int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight,

                               size_t aLeftBytes, size_t aRightBytes);

/**

 * Calculates the lower-case of the codepoint of the UTF8 sequence starting at

 * aStr.  Sets aNext to the byte following the end of the sequence.

 * If the sequence is invalid, or if computing the codepoint would take us off

 * the end of the string (as marked by aEnd), returns -1 and does not set

 * aNext.  Note that this function doesn't check that aStr < aEnd -- it assumes

 * you've done that already.

*/

uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd,

                               const char** aNext);

/**

 * This function determines whether the UTF-8 sequence pointed to by aLeft is

 * case insensitively equal to the UTF-8 sequence pointed to by aRight (or

 * optionally, case and diacritic insensitively equal), as defined by having

 * matching (naked) lower-cased codepoints.

 * aLeftEnd marks the first memory location past aLeft that is not part of

 * aLeft; aRightEnd similarly marks the end of aRight.

 * The function assumes that aLeft < aLeftEnd and aRight < aRightEnd.

 * The function stores the addresses of the next characters in the sequence

 * into aLeftNext and aRightNext.  It's up to the caller to make sure that the

 * returned pointers are valid -- i.e. the function may return aLeftNext >=

 * aLeftEnd or aRightNext >= aRightEnd.

 * If the function encounters invalid text, it sets aErr to true and returns

 * false, possibly leaving aLeftNext and aRightNext uninitialized.  If the

 * function returns true, aErr is guaranteed to be false and both aLeftNext and

 * aRightNext are guaranteed to be initialized.

 * If aMatchDiacritics is false, the comparison is neither case-sensitive nor

 * diacritic-sensitive.

*/

bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,

                                   const char* aLeftEnd, const char* aRightEnd,

                                   const char** aLeftNext,

                                   const char** aRightNext, bool* aErr,

                                   bool aMatchDiacritics = true);

namespace mozilla {

/**

 * Hash a UTF8 string as though it were a UTF16 string.

 * The value returned is the same as if we converted the string to UTF16 and

 * then ran HashString() on the result.

 * The given |length| is in bytes.

*/

uint32_t HashUTF8AsUTF16(const char* aUTF8, size_t aLength, bool* aErr);

bool IsSegmentBreakSkipChar(uint32_t u);

/**

 * Return true for all Punctuation categories (Unicode general category P?),

 * and also for Symbol categories (S?) except for Modifier Symbol, which is

 * kept together with any adjacent letter/number. (Bug 1066756)

*/

bool IsPunctuationForWordSelect(char16_t aCh);

}  // namespace mozilla

#endif /* nsUnicharUtils_h__ */