nsImapGenericParser.cpp

comm-central/mailnews/imap/src/nsImapGenericParser.cpp

Enable keyboard shortcuts

Source code

Go to header file

Revision control

Copy as Markdown

Other Tools

HG Web

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsImapGenericParser.h"

#include "nsString.h"

#include "plstr.h"

#include "prmem.h"

////////////////// nsImapGenericParser /////////////////////////

nsImapGenericParser::nsImapGenericParser()

    : fNextToken(nullptr),

      fCurrentLine(nullptr),

      fLineOfTokens(nullptr),

      fStartOfLineOfTokens(nullptr),

      fCurrentTokenPlaceHolder(nullptr),

      fAtEndOfLine(false),

      fParserState(stateOK) {}

nsImapGenericParser::~nsImapGenericParser() {

  PR_FREEIF(fCurrentLine);

  PR_FREEIF(fStartOfLineOfTokens);

void nsImapGenericParser::HandleMemoryFailure() { SetConnected(false); }

void nsImapGenericParser::ResetLexAnalyzer() {

  PR_FREEIF(fCurrentLine);

  PR_FREEIF(fStartOfLineOfTokens);

  fNextToken = fCurrentLine = fLineOfTokens = fStartOfLineOfTokens =

      fCurrentTokenPlaceHolder = nullptr;

  fAtEndOfLine = false;

bool nsImapGenericParser::LastCommandSuccessful() {

  return fParserState == stateOK;

void nsImapGenericParser::SetSyntaxError(bool error, const char* msg) {

  if (error)

    fParserState |= stateSyntaxErrorFlag;

  else

    fParserState &= ~stateSyntaxErrorFlag;

  NS_ASSERTION(!error, "syntax error in generic parser");

void nsImapGenericParser::SetConnected(bool connected) {

  if (connected)

    fParserState &= ~stateDisconnectedFlag;

  else

    fParserState |= stateDisconnectedFlag;

void nsImapGenericParser::skip_to_CRLF() {

  while (Connected() && !fAtEndOfLine) AdvanceToNextToken();

// fNextToken initially should point to

// a string after the initial open paren ("(")

// After this call, fNextToken points to the

// first character after the matching close

// paren.  Only call AdvanceToNextToken() to get the NEXT

// token after the one returned in fNextToken.

void nsImapGenericParser::skip_to_close_paren() {

  int numberOfCloseParensNeeded = 1;

  while (ContinueParse()) {

    // go through fNextToken, account for nested parens

    const char* loc;

    for (loc = fNextToken; loc && *loc; loc++) {

      if (*loc == '(')

        numberOfCloseParensNeeded++;

      else if (*loc == ')') {

        numberOfCloseParensNeeded--;

        if (numberOfCloseParensNeeded == 0) {

          fNextToken = loc + 1;

          if (!fNextToken || !*fNextToken) AdvanceToNextToken();

          return;

      } else if (*loc == '{' || *loc == '"') {

        // quoted or literal

        fNextToken = loc;

        char* a = CreateString();

        PR_FREEIF(a);

        break;  // move to next token

    if (ContinueParse()) AdvanceToNextToken();

void nsImapGenericParser::AdvanceToNextToken() {

  if (!fCurrentLine || fAtEndOfLine) AdvanceToNextLine();

  if (Connected()) {

    if (!fStartOfLineOfTokens) {

      // this is the first token of the line; setup tokenizer now

      fStartOfLineOfTokens = PL_strdup(fCurrentLine);

      if (!fStartOfLineOfTokens) {

        HandleMemoryFailure();

        return;

      fLineOfTokens = fStartOfLineOfTokens;

      fCurrentTokenPlaceHolder = fStartOfLineOfTokens;

    fNextToken = NS_strtok(WHITESPACE, &fCurrentTokenPlaceHolder);

    if (!fNextToken) {

      fAtEndOfLine = true;

      fNextToken = CRLF;

void nsImapGenericParser::AdvanceToNextLine() {

  PR_FREEIF(fCurrentLine);

  PR_FREEIF(fStartOfLineOfTokens);

  bool ok = GetNextLineForParser(&fCurrentLine);

  if (!ok) {

    SetConnected(false);

    fStartOfLineOfTokens = nullptr;

    fLineOfTokens = nullptr;

    fCurrentTokenPlaceHolder = nullptr;

    fAtEndOfLine = true;

    fNextToken = CRLF;

  } else if (!fCurrentLine) {

    HandleMemoryFailure();

  } else {

    fNextToken = nullptr;

    // determine if there are any tokens (without calling AdvanceToNextToken);

    // otherwise we are already at end of line

    NS_ASSERTION(strlen(WHITESPACE) == 3, "assume 3 chars of whitespace");

    char* firstToken = fCurrentLine;

    while (*firstToken &&

           (*firstToken == WHITESPACE[0] || *firstToken == WHITESPACE[1] ||

            *firstToken == WHITESPACE[2]))

      firstToken++;

    fAtEndOfLine = (*firstToken == '\0');

// advances |fLineOfTokens| by |bytesToAdvance| bytes

void nsImapGenericParser::AdvanceTokenizerStartingPoint(

    int32_t bytesToAdvance) {

  NS_ASSERTION(bytesToAdvance >= 0, "bytesToAdvance must not be negative");

  if (!fStartOfLineOfTokens) {

    AdvanceToNextToken();  // the tokenizer was not yet initialized, do it now

    if (!fStartOfLineOfTokens) return;

  if (!fStartOfLineOfTokens) return;

  // The last call to AdvanceToNextToken() cleared the token separator to '\0'

  // iff |fCurrentTokenPlaceHolder|.  We must recover this token separator now.

  if (fCurrentTokenPlaceHolder) {

    int endTokenOffset = fCurrentTokenPlaceHolder - fStartOfLineOfTokens - 1;

    if (endTokenOffset >= 0)

      fStartOfLineOfTokens[endTokenOffset] = fCurrentLine[endTokenOffset];

  NS_ASSERTION(bytesToAdvance + (fLineOfTokens - fStartOfLineOfTokens) <=

                   (int32_t)strlen(fCurrentLine),

               "cannot advance beyond end of fLineOfTokens");

  fLineOfTokens += bytesToAdvance;

  fCurrentTokenPlaceHolder = fLineOfTokens;

// RFC3501:  astring = 1*ASTRING-CHAR / string

//           string  = quoted / literal

// This function leaves us off with fCurrentTokenPlaceHolder immediately after

// the end of the Astring.  Call AdvanceToNextToken() to get the token after it.

char* nsImapGenericParser::CreateAstring() {

  if (*fNextToken == '{') return CreateLiteral();  // literal

  if (*fNextToken == '"') return CreateQuoted();   // quoted

  return CreateAtom(true);                         // atom

// Create an atom

// This function does not advance the parser.

// Call AdvanceToNextToken() to get the next token after the atom.

// RFC3501:  atom            = 1*ATOM-CHAR

//           ASTRING-CHAR    = ATOM-CHAR / resp-specials

//           ATOM-CHAR       = <any CHAR except atom-specials>

//           atom-specials   = "(" / ")" / "{" / SP / CTL / list-wildcards /

//                             quoted-specials / resp-specials

//           list-wildcards  = "%" / "*"

//           quoted-specials = DQUOTE / "\"

//           resp-specials   = "]"

// "Characters are 7-bit US-ASCII unless otherwise specified." [RFC3501, 1.2.]

char* nsImapGenericParser::CreateAtom(bool isAstring) {

  char* rv = PL_strdup(fNextToken);

  if (!rv) {

    HandleMemoryFailure();

    return nullptr;

  // We wish to stop at the following characters (in decimal ascii)

  // 1-31 (CTL), 32 (SP), 34 '"', 37 '%', 40-42 "()*", 92 '\\', 123 '{'

  // also, ']' is only allowed in astrings

  char* last = rv;

  char c = *last;

  while ((c > 42 || c == 33 || c == 35 || c == 36 || c == 38 || c == 39) &&

         c != '\\' && c != '{' && (isAstring || c != ']'))

    c = *++last;

  if (rv == last) {

    SetSyntaxError(true, "no atom characters found");

    PL_strfree(rv);

    return nullptr;

  if (*last) {

    // not the whole token was consumed

    *last = '\0';

    AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + (last - rv));

  return rv;

// CreateNilString return either NULL (for "NIL") or a string

// Call with fNextToken pointing to the thing which we think is the nilstring.

// This function leaves us off with fCurrentTokenPlaceHolder immediately after

// the end of the string.

// Regardless of type, call AdvanceToNextToken() to get the token after it.

// RFC3501:   nstring  = string / nil

//            nil      = "NIL"

char* nsImapGenericParser::CreateNilString() {

  if (!PL_strncasecmp(fNextToken, "NIL", 3)) {

    // check if there is text after "NIL" in fNextToken,

    // equivalent handling as in CreateQuoted

    if (fNextToken[3])

      AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + 3);

    return NULL;

  return CreateString();

// Create a string, which can either be quoted or literal,

// but not an atom.

// This function leaves us off with fCurrentTokenPlaceHolder immediately after

// the end of the String.  Call AdvanceToNextToken() to get the token after it.

char* nsImapGenericParser::CreateString() {

  if (*fNextToken == '{') {

    char* rv = CreateLiteral();  // literal

    return (rv);

  if (*fNextToken == '"') {

    char* rv = CreateQuoted();  // quoted

    return (rv);

  SetSyntaxError(true, "string does not start with '{' or '\"'");

  return NULL;

// This function sets fCurrentTokenPlaceHolder immediately after the end of the

// closing quote.  Call AdvanceToNextToken() to get the token after it.

// QUOTED_CHAR     ::= <any TEXT_CHAR except quoted_specials> /

//                     "\" quoted_specials

// TEXT_CHAR       ::= <any CHAR except CR and LF>

// quoted_specials ::= <"> / "\"

// Note that according to RFC 1064 and RFC 2060, CRs and LFs are not allowed

// inside a quoted string.  It is sufficient to read from the current line only.

char* nsImapGenericParser::CreateQuoted(bool /*skipToEnd*/) {

  // one char past opening '"'

  char* currentChar = fCurrentLine + (fNextToken - fStartOfLineOfTokens) + 1;

  int escapeCharsCut = 0;

  nsCString returnString(currentChar);

  int charIndex;

  for (charIndex = 0; returnString.CharAt(charIndex) != '"'; charIndex++) {

    if (!returnString.CharAt(charIndex)) {

      SetSyntaxError(true, "no closing '\"' found in quoted");

      return nullptr;

    if (returnString.CharAt(charIndex) == '\\') {

      // eat the escape character, but keep the escaped character

      returnString.Cut(charIndex, 1);

      escapeCharsCut++;

  // +2 because of the start and end quotes

  AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + charIndex +

                                escapeCharsCut + 2);

  returnString.SetLength(charIndex);

  return ToNewCString(returnString);

// This function leaves us off with fCurrentTokenPlaceHolder immediately after

// the end of the literal string.  Call AdvanceToNextToken() to get the token

// after the literal string.

// RFC3501:  literal = "{" number "}" CRLF *CHAR8

//                       ; Number represents the number of CHAR8s

//           CHAR8   = %x01-ff

//                       ; any OCTET except NUL, %x00

char* nsImapGenericParser::CreateLiteral() {

  int32_t numberOfCharsInMessage = atoi(fNextToken + 1);

  uint32_t numBytes = numberOfCharsInMessage + 1;

  NS_ASSERTION(numBytes, "overflow!");

  if (!numBytes) return nullptr;

  char* returnString = (char*)PR_Malloc(numBytes);

  if (!returnString) {

    HandleMemoryFailure();

    return nullptr;

  int32_t currentLineLength = 0;

  int32_t charsReadSoFar = 0;

  int32_t bytesToCopy = 0;

  while (charsReadSoFar < numberOfCharsInMessage) {

    AdvanceToNextLine();

    if (!ContinueParse()) break;

    currentLineLength = strlen(fCurrentLine);

    bytesToCopy = (currentLineLength > numberOfCharsInMessage - charsReadSoFar

                       ? numberOfCharsInMessage - charsReadSoFar

                       : currentLineLength);

    NS_ASSERTION(bytesToCopy, "zero-length line?");

    memcpy(returnString + charsReadSoFar, fCurrentLine, bytesToCopy);

    charsReadSoFar += bytesToCopy;

  if (ContinueParse()) {

    if (currentLineLength == bytesToCopy) {

      // We have consumed the entire line.

      // Consider the input  "{4}\r\n"  "L1\r\n"  " A2\r\n"  which is read

      // line-by-line.  Reading an Astring, this should result in "L1\r\n".

      // Note that the second line is "L1\r\n", where the "\r\n" is part of

      // the literal.  Hence, we now read the next line to ensure that the

      // next call to AdvanceToNextToken() leads to fNextToken=="A2" in our

      // example.

      AdvanceToNextLine();

    } else

      AdvanceTokenizerStartingPoint(bytesToCopy);

  returnString[charsReadSoFar] = 0;

  return returnString;

// Call this to create a buffer containing all characters within

// a given set of parentheses.

// Call this with fNextToken[0]=='(', that is, the open paren

// of the group.

// It will allocate and return all characters up to and including the

// corresponding closing paren, and leave the parser in the right place

// afterwards.

char* nsImapGenericParser::CreateParenGroup() {

  NS_ASSERTION(fNextToken[0] == '(', "we don't have a paren group!");

  int numOpenParens = 0;

  AdvanceTokenizerStartingPoint(fNextToken - fLineOfTokens);

  // Build up a buffer containing the paren group.

  nsCString returnString;

  char* parenGroupStart = fCurrentTokenPlaceHolder;

  NS_ASSERTION(parenGroupStart[0] == '(', "we don't have a paren group (2)!");

  while (*fCurrentTokenPlaceHolder) {

    if (*fCurrentTokenPlaceHolder == '{')  // literal

      // Ensure it is a properly formatted literal.

      NS_ASSERTION(!strcmp("}\r\n", fCurrentTokenPlaceHolder +

                                        strlen(fCurrentTokenPlaceHolder) - 3),

                   "not a literal");

      // Append previous characters and the "{xx}\r\n" to buffer.

      returnString.Append(parenGroupStart);

      // Append literal itself.

      AdvanceToNextToken();

      if (!ContinueParse()) break;

      char* lit = CreateLiteral();

      NS_ASSERTION(lit, "syntax error or out of memory");

      if (!lit) break;

      returnString.Append(lit);

      PR_Free(lit);

      if (!ContinueParse()) break;

      parenGroupStart = fCurrentTokenPlaceHolder;

    } else if (*fCurrentTokenPlaceHolder == '"')  // quoted

      // Append the _escaped_ version of the quoted string:

      // just skip it (because the quoted string must be on the same line).

      AdvanceToNextToken();

      if (!ContinueParse()) break;

      char* q = CreateQuoted();

      if (!q) break;

      PR_Free(q);

      if (!ContinueParse()) break;

    } else {

      // Append this character to the buffer.

      char c = *fCurrentTokenPlaceHolder++;

      if (c == '(')

        numOpenParens++;

      else if (c == ')') {

        numOpenParens--;

        if (numOpenParens == 0) break;

  if (numOpenParens != 0 || !ContinueParse()) {

    SetSyntaxError(true, "closing ')' not found in paren group");

    return nullptr;

  returnString.Append(parenGroupStart,

                      fCurrentTokenPlaceHolder - parenGroupStart);

  AdvanceToNextToken();

  return ToNewCString(returnString);