Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef _MORKPARSER_
#define _MORKPARSER_ 1
#ifndef _MORK_
# include "mork.h"
#endif
#ifndef _MORKBLOB_
# include "morkBlob.h"
#endif
#ifndef _MORKSINK_
# include "morkSink.h"
#endif
#ifndef _MORKYARN_
# include "morkYarn.h"
#endif
#ifndef _MORKCELL_
# include "morkCell.h"
#endif
// 456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789
/*=============================================================================
* morkPlace: stream byte position and stream line count
*/
class morkPlace {
public:
mork_pos mPlace_Pos; // byte offset in an input stream
mork_line mPlace_Line; // line count in an input stream
void ClearPlace() {
mPlace_Pos = 0;
mPlace_Line = 0;
}
void SetPlace(mork_pos inPos, mork_line inLine) {
mPlace_Pos = inPos;
mPlace_Line = inLine;
}
morkPlace() {
mPlace_Pos = 0;
mPlace_Line = 0;
}
morkPlace(mork_pos inPos, mork_line inLine) {
mPlace_Pos = inPos;
mPlace_Line = inLine;
}
morkPlace(const morkPlace& inPlace)
: mPlace_Pos(inPlace.mPlace_Pos), mPlace_Line(inPlace.mPlace_Line) {}
};
/*=============================================================================
* morkGlitch: stream place and error comment describing a parsing error
*/
class morkGlitch {
public:
morkPlace mGlitch_Place; // place in stream where problem happened
const char* mGlitch_Comment; // null-terminated ASCII C string
morkGlitch() { mGlitch_Comment = 0; }
morkGlitch(const morkPlace& inPlace, const char* inComment)
: mGlitch_Place(inPlace), mGlitch_Comment(inComment) {}
};
/*=============================================================================
* morkMid: all possible ways needed to express an alias ID in Mork syntax
*/
/*| morkMid: an abstraction of all the variations we might need to support
**| in order to present an ID through the parser interface most cheaply and
**| with minimum transformation away from the original text format.
**|
**|| An ID can have one of four forms:
**| 1) idHex (mMid_Oid.mOid_Id <- idHex)
**| 2) idHex:^scopeHex (mMid_Oid.mOid_Id <- idHex, mOid_Scope <- scopeHex)
**| 3) idHex:scopeName (mMid_Oid.mOid_Id <- idHex, mMid_Buf <- scopeName)
**| 4) columnName (mMid_Buf <- columnName, for columns in cells only)
**|
**|| Typically, mMid_Oid.mOid_Id will hold a nonzero integer value for
**| an ID, but we might have an optional scope specified by either an integer
**| in hex format, or a string name. (Note that while the first ID can be
**| scoped variably, any integer ID for a scope is assumed always located in
**| the same scope, so the second ID need not be disambiguated.)
**|
**|| The only time mMid_Oid.mOid_Id is ever zero is when mMid_Buf alone
**| is nonzero, to indicate an explicit string instead of an alias appeared.
**| This case happens to make the representation of columns in cells somewhat
**| easier to represent, since columns can just appear as a string name; and
**| this unifies those interfaces with row and table APIs expecting IDs.
**|
**|| So when the parser passes an instance of morkMid to a subclass, the
**| mMid_Oid.mOid_Id slot should usually be nonzero. And the other two
**| slots, mMid_Oid.mOid_Scope and mMid_Buf, might both be zero, or at
**| most one of them will be nonzero to indicate an explicit scope; the
**| parser is responsible for ensuring at most one of these is nonzero.
|*/
class morkMid {
public:
mdbOid mMid_Oid; // mOid_Scope is zero when not specified
const morkBuf* mMid_Buf; // points to some specific buf subclass
morkMid() {
mMid_Oid.mOid_Scope = 0;
mMid_Oid.mOid_Id = morkId_kMinusOne;
mMid_Buf = 0;
}
void InitMidWithCoil(morkCoil* ioCoil) {
mMid_Oid.mOid_Scope = 0;
mMid_Oid.mOid_Id = morkId_kMinusOne;
mMid_Buf = ioCoil;
}
void ClearMid() {
mMid_Oid.mOid_Scope = 0;
mMid_Oid.mOid_Id = morkId_kMinusOne;
mMid_Buf = 0;
}
morkMid(const morkMid& other)
: mMid_Oid(other.mMid_Oid), mMid_Buf(other.mMid_Buf) {}
mork_bool HasNoId() const // ID is unspecified?
{
return (mMid_Oid.mOid_Id == morkId_kMinusOne);
}
mork_bool HasSomeId() const // ID is specified?
{
return (mMid_Oid.mOid_Id != morkId_kMinusOne);
}
};
/*=============================================================================
* morkSpan: start and end stream byte position and stream line count
*/
class morkSpan {
public:
morkPlace mSpan_Start;
morkPlace mSpan_End;
public: // methods
public: // inlines
morkSpan() {} // use inline empty constructor for each place
morkPlace* AsPlace() { return &mSpan_Start; }
const morkPlace* AsConstPlace() const { return &mSpan_Start; }
void SetSpan(mork_pos inFromPos, mork_line inFromLine, mork_pos inToPos,
mork_line inToLine) {
mSpan_Start.SetPlace(inFromPos, inFromLine);
mSpan_End.SetPlace(inToPos, inToLine);
}
// setting end, useful to terminate a span using current port span end:
void SetEndWithEnd(const morkSpan& inSpan) // end <- span.end
{
mSpan_End = inSpan.mSpan_End;
}
// setting start, useful to initiate a span using current port span end:
void SetStartWithEnd(const morkSpan& inSpan) // start <- span.end
{
mSpan_Start = inSpan.mSpan_End;
}
void ClearSpan() {
mSpan_Start.mPlace_Pos = 0;
mSpan_Start.mPlace_Line = 0;
mSpan_End.mPlace_Pos = 0;
mSpan_End.mPlace_Line = 0;
}
morkSpan(mork_pos inFromPos, mork_line inFromLine, mork_pos inToPos,
mork_line inToLine)
: mSpan_Start(inFromPos, inFromLine),
mSpan_End(inToPos, inToLine) { /* empty implementation */
}
};
/*=============================================================================
* morkParser: for parsing Mork text syntax
*/
/* parse at least half 0.5K at once */
#define morkParser_kMinGranularity 512
/* parse at most 64 K at once */
#define morkParser_kMaxGranularity (64 * 1024)
#define morkDerived_kParser /*i*/ 0x5073 /* ascii 'Ps' */
#define morkParser_kTag /*i*/ 0x70417253 /* ascii 'pArS' */
// These are states for the simple parsing virtual machine. Needless to say,
// these must be distinct, and preferably in a contiguous integer range.
// Don't change these constants without looking at switch statements in code.
#define morkParser_kCellState 0 /* cell is tightest scope */
#define morkParser_kMetaState 1 /* meta is tightest scope */
#define morkParser_kRowState 2 /* row is tightest scope */
#define morkParser_kTableState 3 /* table is tightest scope */
#define morkParser_kDictState 4 /* dict is tightest scope */
#define morkParser_kPortState 5 /* port is tightest scope */
#define morkParser_kStartState 6 /* parsing has not yet begun */
#define morkParser_kDoneState 7 /* parsing is complete */
#define morkParser_kBrokenState 8 /* parsing is to broken to work */
class morkParser /*d*/ : public morkNode {
// public: // slots inherited from morkNode (meant to inform only)
// nsIMdbHeap* mNode_Heap;
// mork_base mNode_Base; // must equal morkBase_kNode
// mork_derived mNode_Derived; // depends on specific node subclass
// mork_access mNode_Access; // kOpen, kClosing, kShut, or kDead
// mork_usage mNode_Usage; // kHeap, kStack, kMember, kGlobal, kNone
// mork_able mNode_Mutable; // can this node be modified?
// mork_load mNode_Load; // is this node clean or dirty?
// mork_uses mNode_Uses; // refcount for strong refs
// mork_refs mNode_Refs; // refcount for strong refs + weak refs
// ````` ````` ````` ````` ````` ````` ````` `````
protected: // protected morkParser members
nsIMdbHeap* mParser_Heap; // refcounted heap used for allocation
morkStream* mParser_Stream; // refcounted input stream
mork_u4 mParser_Tag; // must equal morkParser_kTag
mork_count mParser_MoreGranularity; // constructor inBytesPerParseSegment
mork_u4 mParser_State; // state where parser should resume
// after finding ends of group transactions, we can re-seek the start:
mork_pos mParser_GroupContentStartPos; // start of this group
morkMid mParser_TableMid; // table mid if inside a table
morkMid mParser_RowMid; // row mid if inside a row
morkMid mParser_CellMid; // cell mid if inside a row
mork_gid mParser_GroupId; // group ID if inside a group
mork_bool mParser_InPort; // called OnNewPort but not OnPortEnd?
mork_bool mParser_InDict; // called OnNewDict but not OnDictEnd?
mork_bool mParser_InCell; // called OnNewCell but not OnCellEnd?
mork_bool mParser_InMeta; // called OnNewMeta but not OnMetaEnd?
mork_bool mParser_InPortRow; // called OnNewPortRow but not OnPortRowEnd?
mork_bool mParser_InRow; // called OnNewRow but not OnNewRowEnd?
mork_bool mParser_InTable; // called OnNewMeta but not OnMetaEnd?
mork_bool mParser_InGroup; // called OnNewGroup but not OnGroupEnd?
mork_change mParser_AtomChange; // driven by mParser_Change
mork_change mParser_CellChange; // driven by mParser_Change
mork_change mParser_RowChange; // driven by mParser_Change
mork_change mParser_TableChange; // driven by mParser_Change
mork_change mParser_Change; // driven by modifier in text
mork_bool mParser_IsBroken; // has the parse become broken?
mork_bool mParser_IsDone; // has the parse finished?
mork_bool mParser_DoMore; // mParser_MoreGranularity not exhausted?
morkMid mParser_Mid; // current alias being parsed
// note that mParser_Mid.mMid_Buf points at mParser_ScopeCoil below:
// blob coils allocated in mParser_Heap
morkCoil mParser_ScopeCoil; // place to accumulate ID scope blobs
morkCoil mParser_ValueCoil; // place to accumulate value blobs
morkCoil mParser_ColumnCoil; // place to accumulate column blobs
morkCoil mParser_StringCoil; // place to accumulate string blobs
morkSpool mParser_ScopeSpool; // writes to mParser_ScopeCoil
morkSpool mParser_ValueSpool; // writes to mParser_ValueCoil
morkSpool mParser_ColumnSpool; // writes to mParser_ColumnCoil
morkSpool mParser_StringSpool; // writes to mParser_StringCoil
// yarns allocated in mParser_Heap
morkYarn mParser_MidYarn; // place to receive from MidToYarn()
// span showing current ongoing file position status:
morkSpan mParser_PortSpan; // span of current db port file
// various spans denoting nested subspaces inside the file's port span:
morkSpan mParser_GroupSpan; // span of current transaction group
morkSpan mParser_DictSpan;
morkSpan mParser_AliasSpan;
morkSpan mParser_MetaSpan;
morkSpan mParser_TableSpan;
morkSpan mParser_RowSpan;
morkSpan mParser_CellSpan;
morkSpan mParser_ColumnSpan;
morkSpan mParser_SlotSpan;
private: // convenience inlines
mork_pos HerePos() const { return mParser_PortSpan.mSpan_End.mPlace_Pos; }
void SetHerePos(mork_pos inPos) {
mParser_PortSpan.mSpan_End.mPlace_Pos = inPos;
}
void CountLineBreak() { ++mParser_PortSpan.mSpan_End.mPlace_Line; }
// { ===== begin morkNode interface =====
public: // morkNode virtual methods
virtual void CloseMorkNode(
morkEnv* ev) override; // CloseParser() only if open
virtual ~morkParser(); // assert that CloseParser() executed earlier
public: // morkYarn construction & destruction
morkParser(morkEnv* ev, const morkUsage& inUsage, nsIMdbHeap* ioHeap,
morkStream* ioStream, // the readonly stream for input bytes
mdb_count inBytesPerParseSegment, // target for ParseMore()
nsIMdbHeap* ioSlotHeap);
void CloseParser(morkEnv* ev); // called by CloseMorkNode();
private: // copying is not allowed
morkParser(const morkParser& other);
morkParser& operator=(const morkParser& other);
public: // dynamic type identification
mork_bool IsParser() const {
return IsNode() && mNode_Derived == morkDerived_kParser;
}
// } ===== end morkNode methods =====
public: // errors and warnings
static void UnexpectedEofError(morkEnv* ev);
static void EofInsteadOfHexError(morkEnv* ev);
static void ExpectedEqualError(morkEnv* ev);
static void ExpectedHexDigitError(morkEnv* ev, int c);
static void NonParserTypeError(morkEnv* ev);
static void UnexpectedByteInMetaWarning(morkEnv* ev);
public: // other type methods
mork_bool GoodParserTag() const { return mParser_Tag == morkParser_kTag; }
void NonGoodParserError(morkEnv* ev);
void NonUsableParserError(morkEnv* ev);
// call when IsNode() or GoodParserTag() is false
// ````` ````` ````` ````` ````` ````` ````` `````
public: // in virtual morkParser methods, data flow subclass to parser
virtual void MidToYarn(
morkEnv* ev,
const morkMid& inMid, // typically an alias to concat with strings
mdbYarn* outYarn) = 0;
// The parser might ask that some aliases be turned into yarns, so they
// can be concatenated into longer blobs under some circumstances. This
// is an alternative to using a long and complex callback for many parts
// for a single cell value.
// ````` ````` ````` ````` ````` ````` ````` `````
public: // out virtual morkParser methods, data flow parser to subclass
// The virtual methods below will be called in a pattern corresponding
// to the following grammar isomorphic to the Mork grammar. There should
// be no exceptions, so subclasses can rely on seeing an appropriate "end"
// method whenever some "new" method has been seen earlier. In the event
// that some error occurs that causes content to be flushed, or sudden early
// termination of a larger containing entity, we will always call a more
// enclosed "end" method before we call an "end" method with greater scope.
// Note the "mp" prefix stands for "Mork Parser":
// mp:Start ::= OnNewPort mp:PortItem* OnPortEnd
// mp:PortItem ::= mp:Content | mp:Group | OnPortGlitch
// mp:Group ::= OnNewGroup mp:GroupItem* mp:GroupEnd
// mp:GroupItem ::= mp:Content | OnGroupGlitch
// mp:GroupEnd ::= OnGroupCommitEnd | OnGroupAbortEnd
// mp:Content ::= mp:PortRow | mp:Dict | mp:Table | mp:Row
// mp:PortRow ::= OnNewPortRow mp:RowItem* OnPortRowEnd
// mp:Dict ::= OnNewDict mp:DictItem* OnDictEnd
// mp:DictItem ::= OnAlias | OnAliasGlitch | mp:Meta | OnDictGlitch
// mp:Table ::= OnNewTable mp:TableItem* OnTableEnd
// mp:TableItem ::= mp:Row | mp:MetaTable | OnTableGlitch
// mp:MetaTable ::= OnNewMeta mp:MetaItem* mp:Row OnMetaEnd
// mp:Meta ::= OnNewMeta mp:MetaItem* OnMetaEnd
// mp:MetaItem ::= mp:Cell | OnMetaGlitch
// mp:Row ::= OnMinusRow? OnNewRow mp:RowItem* OnRowEnd
// mp:RowItem ::= mp:Cell | mp:Meta | OnRowGlitch
// mp:Cell ::= OnMinusCell? OnNewCell mp:CellItem? OnCellEnd
// mp:CellItem ::= mp:Slot | OnCellForm | OnCellGlitch
// mp:Slot ::= OnValue | OnValueMid | OnRowMid | OnTableMid
// Note that in interfaces below, mork_change parameters kAdd and kNil
// both mean about the same thing by default. Only kCut is interesting,
// because this usually means to remove members instead of adding them.
virtual void OnNewPort(morkEnv* ev, const morkPlace& inPlace) = 0;
virtual void OnPortGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnPortEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnNewGroup(morkEnv* ev, const morkPlace& inPlace,
mork_gid inGid) = 0;
virtual void OnGroupGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnGroupCommitEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnGroupAbortEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnNewPortRow(morkEnv* ev, const morkPlace& inPlace,
const morkMid& inMid, mork_change inChange) = 0;
virtual void OnPortRowGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnPortRowEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnNewTable(morkEnv* ev, const morkPlace& inPlace,
const morkMid& inMid, mork_bool inCutAllRows) = 0;
virtual void OnTableGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnTableEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnNewMeta(morkEnv* ev, const morkPlace& inPlace) = 0;
virtual void OnMetaGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnMetaEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnMinusRow(morkEnv* ev) = 0;
virtual void OnNewRow(morkEnv* ev, const morkPlace& inPlace,
const morkMid& inMid, mork_bool inCutAllCols) = 0;
virtual void OnRowPos(morkEnv* ev, mork_pos inRowPos) = 0;
virtual void OnRowGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnRowEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnNewDict(morkEnv* ev, const morkPlace& inPlace) = 0;
virtual void OnDictGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnDictEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnAlias(morkEnv* ev, const morkSpan& inSpan,
const morkMid& inMid) = 0;
virtual void OnAliasGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnMinusCell(morkEnv* ev) = 0;
virtual void OnNewCell(morkEnv* ev, const morkPlace& inPlace,
const morkMid* inMid, const morkBuf* inBuf) = 0;
// Exactly one of inMid and inBuf is nil, and the other is non-nil.
// When hex ID syntax is used for a column, then inMid is not nil, and
// when a naked string names a column, then inBuf is not nil.
virtual void OnCellGlitch(morkEnv* ev, const morkGlitch& inGlitch) = 0;
virtual void OnCellForm(morkEnv* ev, mork_cscode inCharsetFormat) = 0;
virtual void OnCellEnd(morkEnv* ev, const morkSpan& inSpan) = 0;
virtual void OnValue(morkEnv* ev, const morkSpan& inSpan,
const morkBuf& inBuf) = 0;
virtual void OnValueMid(morkEnv* ev, const morkSpan& inSpan,
const morkMid& inMid) = 0;
virtual void OnRowMid(morkEnv* ev, const morkSpan& inSpan,
const morkMid& inMid) = 0;
virtual void OnTableMid(morkEnv* ev, const morkSpan& inSpan,
const morkMid& inMid) = 0;
// ````` ````` ````` ````` ````` ````` ````` `````
protected: // protected parser helper methods
void ParseChunk(morkEnv* ev); // find parse continuation and resume
void StartParse(morkEnv* ev); // prepare for parsing
void StopParse(morkEnv* ev); // terminate parsing & call needed methods
int NextChar(morkEnv* ev); // next non-white content
void OnCellState(morkEnv* ev);
void OnMetaState(morkEnv* ev);
void OnRowState(morkEnv* ev);
void OnTableState(morkEnv* ev);
void OnDictState(morkEnv* ev);
void OnPortState(morkEnv* ev);
void OnStartState(morkEnv* ev);
void ReadCell(morkEnv* ev);
void ReadRow(morkEnv* ev, int c);
void ReadRowPos(morkEnv* ev);
void ReadTable(morkEnv* ev);
void ReadTableMeta(morkEnv* ev);
void ReadDict(morkEnv* ev);
mork_bool ReadContent(morkEnv* ev, mork_bool inInsideGroup);
void ReadGroup(morkEnv* ev);
mork_bool ReadEndGroupId(morkEnv* ev);
mork_bool ReadAt(morkEnv* ev, mork_bool inInsideGroup);
mork_bool FindGroupEnd(morkEnv* ev);
void ReadMeta(morkEnv* ev, int inEndMeta);
void ReadAlias(morkEnv* ev);
mork_id ReadHex(morkEnv* ev, int* outNextChar);
morkBuf* ReadValue(morkEnv* ev);
morkBuf* ReadName(morkEnv* ev, int c);
mork_bool ReadMid(morkEnv* ev, morkMid* outMid);
void ReadDictForm(morkEnv* ev);
void ReadCellForm(morkEnv* ev, int c);
mork_bool MatchPattern(morkEnv* ev, const char* inPattern);
void EndSpanOnThisByte(morkEnv* ev, morkSpan* ioSpan);
void EndSpanOnLastByte(morkEnv* ev, morkSpan* ioSpan);
void StartSpanOnLastByte(morkEnv* ev, morkSpan* ioSpan);
void StartSpanOnThisByte(morkEnv* ev, morkSpan* ioSpan);
// void EndSpanOnThisByte(morkEnv* ev, morkSpan* ioSpan)
// { MORK_USED_2(ev,ioSpan); }
// void EndSpanOnLastByte(morkEnv* ev, morkSpan* ioSpan)
// { MORK_USED_2(ev,ioSpan); }
// void StartSpanOnLastByte(morkEnv* ev, morkSpan* ioSpan)
// { MORK_USED_2(ev,ioSpan); }
// void StartSpanOnThisByte(morkEnv* ev, morkSpan* ioSpan)
// { MORK_USED_2(ev,ioSpan); }
int eat_line_break(morkEnv* ev, int inLast);
int eat_line_continue(morkEnv* ev); // last char was '\\'
int eat_comment(morkEnv* ev); // last char was '/'
// ````` ````` ````` ````` ````` ````` ````` `````
public: // public non-poly morkParser methods
mdb_count ParseMore( // return count of bytes consumed now
morkEnv* ev, // context
mork_pos* outPos, // current byte pos in the stream afterwards
mork_bool* outDone, // is parsing finished?
mork_bool* outBroken // is parsing irreparably dead and broken?
);
public: // typesafe refcounting inlines calling inherited morkNode methods
static void SlotWeakParser(morkParser* me, morkEnv* ev, morkParser** ioSlot) {
morkNode::SlotWeakNode((morkNode*)me, ev, (morkNode**)ioSlot);
}
static void SlotStrongParser(morkParser* me, morkEnv* ev,
morkParser** ioSlot) {
morkNode::SlotStrongNode((morkNode*)me, ev, (morkNode**)ioSlot);
}
};
// 456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789
#endif /* _MORKPARSER_ */