OggCodecState.h - mozsearch

firefox-main/dom/media/ogg/OggCodecState.h (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim:set ts=2 sw=2 sts=2 et cindent: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "Intervals.h"

#if !defined(OggCodecState_h_)

#  define OggCodecState_h_

#  include <ogg/ogg.h>

// For MOZ_SAMPLE_TYPE_*

#  include <nsClassHashtable.h>

#  include <nsDeque.h>

#  include <nsTArray.h>

#  include <vorbis/codec.h>

#  include "FlacFrameParser.h"

#  include "OggRLBoxTypes.h"

#  include "VideoUtils.h"

// Uncomment the following to validate that we're predicting the number

// of Vorbis samples in each packet correctly.

#  define VALIDATE_VORBIS_SAMPLE_CALCULATION

#  ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION

#    include <map>

#  endif

struct OpusMSDecoder;

namespace mozilla {

inline constexpr char RLBOX_SAFE_DEBUG_ASSERTION[] =

    "Tainted data is being inspected only for debugging purposes. This is not "

    "a condition that is critical for safety of the renderer.";

inline constexpr char RLBOX_OGG_STATE_ASSERT_REASON[] =

    "Tainted data is being inspected only to check the internal state of "

    "libogg structures. This is not a condition that is critical for safety of "

    "the renderer.";

inline constexpr char RLBOX_OGG_PAGE_SERIAL_REASON[] =

    "We are checking the serial of the page. If libogg is operating correctly, "

    "we check serial numbers to make sure the Firefox renderer is correctly "

    "passing streams to the correct source. If libogg has been corrupted, it "

    "could return an incorrect serial, however this would mean that an OGG "

    "file has intentionally corrupted data across multiple logical streams. "

    "This however cannot compromise memory safety of the renderer.";

class OpusParser;

struct OggPacketDeletePolicy {

  void operator()(ogg_packet* aPacket) const {

    delete[] aPacket->packet;

    delete aPacket;

};

using OggPacketPtr = UniquePtr<ogg_packet, OggPacketDeletePolicy>;

// Deallocates a packet, used in OggPacketQueue below.

class OggPacketDeallocator : public nsDequeFunctor<ogg_packet> {

  virtual void operator()(ogg_packet* aPacket) override {

    OggPacketDeletePolicy()(aPacket);

};

// A queue of ogg_packets. When we read a page, we extract the page's packets

// and buffer them in the owning stream's OggCodecState. This is because

// if we're skipping up to the next keyframe in very large frame sized videos,

// there may be several megabytes of data between keyframes, and the

// ogg_stream_state would end up resizing its buffer every time we added a

// new 4KB page to the bitstream, which kills performance on Windows. This

// also gives us the option to timestamp packets rather than decoded

// frames/samples, reducing the amount of frames/samples we must decode to

// determine start-time at a particular offset, and gives us finer control

// over memory usage.

class OggPacketQueue : private nsDeque<ogg_packet> {

 public:

  OggPacketQueue() : nsDeque(new OggPacketDeallocator()) {}

  ~OggPacketQueue() { Erase(); }

  bool IsEmpty() { return nsDeque<ogg_packet>::GetSize() == 0; }

  void Append(OggPacketPtr aPacket);

  OggPacketPtr PopFront() {

    return OggPacketPtr(nsDeque<ogg_packet>::PopFront());

  ogg_packet* PeekFront() { return nsDeque<ogg_packet>::PeekFront(); }

  OggPacketPtr Pop() { return OggPacketPtr(nsDeque<ogg_packet>::Pop()); }

  ogg_packet* operator[](size_t aIndex) const {

    return nsDeque<ogg_packet>::ObjectAt(aIndex);

  size_t Length() const { return nsDeque<ogg_packet>::GetSize(); }

  void PushFront(OggPacketPtr aPacket) {

    nsDeque<ogg_packet>::PushFront(aPacket.release());

  void Erase() { nsDeque<ogg_packet>::Erase(); }

};

// Encapsulates the data required for decoding an ogg bitstream and for

// converting granulepos to timestamps.

class OggCodecState {

 public:

  using MetadataTags = mozilla::MetadataTags;

  // Ogg types we know about

  enum CodecType {

    TYPE_VORBIS = 0,

    TYPE_OPUS,

    TYPE_SKELETON,

    TYPE_FLAC,

    TYPE_UNKNOWN

};

  virtual ~OggCodecState();

  // Factory for creating nsCodecStates. Use instead of constructor.

  // aPage should be a beginning-of-stream page.

  static UniquePtr<OggCodecState> Create(rlbox_sandbox_ogg* aSandbox,

                                         tainted_opaque_ogg<ogg_page*> aPage,

                                         uint32_t aSerial);

  virtual CodecType GetType() { return TYPE_UNKNOWN; }

  // Reads a header packet. Returns false if an error was encountered

  // while reading header packets. Callers should check DoneReadingHeaders()

  // to determine if the last header has been read.

  // This function takes ownership of the packet and is responsible for

  // releasing it or queuing it for later processing.

  virtual bool DecodeHeader(OggPacketPtr aPacket) {

    return (mDoneReadingHeaders = true);

  // Build a hash table with tag metadata parsed from the stream.

  virtual UniquePtr<MetadataTags> GetTags() { return nullptr; }

  using TimeUnit = media::TimeUnit;

  // Returns the end time that a granulepos represents.

  virtual TimeUnit Time(int64_t aGranulepos) { return TimeUnit::Invalid(); }

  // Returns the start time that a granulepos represents.

  virtual TimeUnit StartTime(int64_t aGranulepos) {

    return TimeUnit::Invalid();

  // Returns the duration of the given packet, if it can be determined.

  virtual TimeUnit PacketDuration(ogg_packet* aPacket) {

    return TimeUnit::Invalid();

  // Returns the start time of the given packet, if it can be determined.

  virtual TimeUnit PacketStartTime(ogg_packet* aPacket) {

    if (aPacket->granulepos < 0) {

      return TimeUnit::Invalid();

    TimeUnit endTime = Time(aPacket->granulepos);

    TimeUnit duration = PacketDuration(aPacket);

    // When looping, it's possible to find header packets there because the

    // demuxing restarts from the beginning of the stream. Just skip and retry

    // with the next packet.

    if (!duration.IsValid()) {

      return TimeUnit::Invalid();

    if (duration > endTime) {

      // Audio preskip may eat a whole packet or more.

      return TimeUnit::Zero();

    return endTime - duration;

  // Initializes the codec state.

  virtual bool Init() { return true; }

  // Returns true when this bitstream has finished reading all its

  // header packets.

  bool DoneReadingHeaders() { return mDoneReadingHeaders; }

  // Deactivates the bitstream. Only the primary video and audio bitstreams

  // should be active.

  void Deactivate() {

    mActive = false;

    mDoneReadingHeaders = true;

    Reset();

  // Resets decoding state.

  virtual nsresult Reset();

  // Returns true if the OggCodecState thinks this packet is a header

  // packet. Note this does not verify the validity of the header packet,

  // it just guarantees that the packet is marked as a header packet (i.e.

  // it is definintely not a data packet). Do not use this to identify

  // streams, use it to filter header packets from data packets while

  // decoding.

  virtual bool IsHeader(ogg_packet* aPacket) { return false; }

  // Returns true if the OggCodecState thinks this packet represents a

  // keyframe, from which decoding can restart safely.

  virtual bool IsKeyframe(ogg_packet* aPacket) { return true; }

  // Returns true if there is a packet available for dequeueing in the stream.

  bool IsPacketReady();

  // Returns the next raw packet in the stream, or nullptr if there are no more

  // packets buffered in the packet queue. More packets can be buffered by

  // inserting one or more pages into the stream by calling PageIn().

  // The packet will have a valid granulepos.

  OggPacketPtr PacketOut();

  // Returns the next raw packet in the stream, or nullptr if there are no more

  // packets buffered in the packet queue, without consuming it.

  // The packet will have a valid granulepos.

  ogg_packet* PacketPeek();

  // Moves all raw packets from aOther to the front of the current packet queue.

  void PushFront(OggPacketQueue&& aOther);

  // Returns the next packet in the stream as a MediaRawData, or nullptr

  // if there are no more packets buffered in the packet queue. More packets

  // can be buffered by inserting one or more pages into the stream by calling

  // PageIn(). The packet will have a valid granulepos.

  virtual already_AddRefed<MediaRawData> PacketOutAsMediaRawData();

  // Extracts all packets from the page, and inserts them into the packet

  // queue. They can be extracted by calling PacketOut(). Packets from an

  // inactive stream are not buffered, i.e. this call has no effect for

  // inactive streams. Multiple pages may need to be inserted before

  // PacketOut() starts to return packets, as granulepos may need to be

  // captured.

  virtual nsresult PageIn(tainted_opaque_ogg<ogg_page*> aPage);

  // Returns the maximum number of microseconds which a keyframe can be offset

  // from any given interframe.b

  virtual TimeUnit MaxKeyframeOffset() { return TimeUnit::Zero(); }

  // Number of packets read.

  uint64_t mPacketCount;

  // Serial number of the bitstream.

  uint32_t mSerial;

  // Ogg specific state.

  tainted_opaque_ogg<ogg_stream_state*> mState;

  // Queue of as yet undecoded packets. Packets are guaranteed to have

  // a valid granulepos.

  OggPacketQueue mPackets;

  // Is the bitstream active; whether we're decoding and playing this bitstream.

  bool mActive;

  // True when all headers packets have been read.

  bool mDoneReadingHeaders;

  // All invocations of libogg functionality from the demuxer is sandboxed using

  // wasm library sandboxes on supported platforms. This is the sandbox

  // instance.

  rlbox_sandbox_ogg* mSandbox;

  virtual const TrackInfo* GetInfo() const {

    MOZ_RELEASE_ASSERT(false, "Can't be called directly");

    return nullptr;

  // Validation utility for vorbis-style tag names.

  static bool IsValidVorbisTagName(nsCString& aName);

  // Utility method to parse and add a vorbis-style comment

  // to a metadata hash table. Most Ogg-encapsulated codecs

  // use the vorbis comment format for metadata.

  static bool AddVorbisComment(UniquePtr<MetadataTags>& aTags,

                               const char* aComment, uint32_t aLength);

 protected:

  // Constructs a new OggCodecState. aActive denotes whether the stream is

  // active. For streams of unsupported or unknown types, aActive should be

  // false.

  OggCodecState(rlbox_sandbox_ogg* aSandbox,

                tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial,

                bool aActive);

  // Deallocates all packets stored in mUnstamped, and clears the array.

  void ClearUnstamped();

  // Extracts packets out of mState until a data packet with a non -1

  // granulepos is encountered, or no more packets are readable. Header

  // packets are pushed into the packet queue immediately, and data packets

  // are buffered in mUnstamped. Once a non -1 granulepos packet is read

  // the granulepos of the packets in mUnstamped can be inferred, and they

  // can be pushed over to mPackets. Used by PageIn() implementations in

  // subclasses.

  nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos);

  // Temporary buffer in which to store packets while we're reading packets

  // in order to capture granulepos.

  nsTArray<OggPacketPtr> mUnstamped;

  bool SetCodecSpecificConfig(MediaByteBuffer* aBuffer,

                              OggPacketQueue& aHeaders);

 private:

  bool InternalInit();

};

class VorbisState : public OggCodecState {

 public:

  explicit VorbisState(rlbox_sandbox_ogg* aSandbox,

                       tainted_opaque_ogg<ogg_page*> aBosPage,

                       uint32_t aSerial);

  virtual ~VorbisState();

  CodecType GetType() override { return TYPE_VORBIS; }

  bool DecodeHeader(OggPacketPtr aPacket) override;

  TimeUnit Time(int64_t aGranulepos) override;

  TimeUnit PacketDuration(ogg_packet* aPacket) override;

  bool Init() override;

  nsresult Reset() override;

  bool IsHeader(ogg_packet* aPacket) override;

  nsresult PageIn(tainted_opaque_ogg<ogg_page*> aPage) override;

  const TrackInfo* GetInfo() const override { return &mInfo; }

  // Return a hash table with tag metadata.

  UniquePtr<MetadataTags> GetTags() override;

 private:

  AudioInfo mInfo;

  vorbis_info mVorbisInfo = {};

  vorbis_comment mComment = {};

  vorbis_dsp_state mDsp = {};

  vorbis_block mBlock = {};

  OggPacketQueue mHeaders;

  // Returns the end time that a granulepos represents.

  static TimeUnit Time(vorbis_info* aInfo, int64_t aGranulePos);

  // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped

  // array.

  void ReconstructVorbisGranulepos();

  // The "block size" of the previously decoded Vorbis packet, or 0 if we've

  // not yet decoded anything. This is used to calculate the number of samples

  // in a Vorbis packet, since each Vorbis packet depends on the previous

  // packet while being decoded.

  long mPrevVorbisBlockSize;

  // Granulepos (end sample) of the last decoded Vorbis packet. This is used

  // to calculate the Vorbis granulepos when we don't find a granulepos to

  // back-propagate from.

  int64_t mGranulepos;

#  ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION

  // When validating that we've correctly predicted Vorbis packets' number

  // of samples, we store each packet's predicted number of samples in this

  // map, and verify we decode the predicted number of samples.

  std::map<ogg_packet*, long> mVorbisPacketSamples;

#  endif

  // Records that aPacket is predicted to have aSamples samples.

  // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION

  // is not defined.

  void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples);

  // Verifies that aPacket has had its number of samples predicted.

  // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION

  // is not defined.

  void AssertHasRecordedPacketSamples(ogg_packet* aPacket);

 public:

  // Asserts that the number of samples predicted for aPacket is aSamples.

  // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION

  // is not defined.

  void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples);

};

class OpusState : public OggCodecState {

 public:

  explicit OpusState(rlbox_sandbox_ogg* aSandbox,

                     tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial);

  virtual ~OpusState();

  CodecType GetType() override { return TYPE_OPUS; }

  bool DecodeHeader(OggPacketPtr aPacket) override;

  TimeUnit Time(int64_t aGranulepos) override;

  TimeUnit PacketDuration(ogg_packet* aPacket) override;

  bool Init() override;

  nsresult Reset() override;

  nsresult Reset(bool aStart);

  bool IsHeader(ogg_packet* aPacket) override;

  nsresult PageIn(tainted_opaque_ogg<ogg_page*> aPage) override;

  already_AddRefed<MediaRawData> PacketOutAsMediaRawData() override;

  const TrackInfo* GetInfo() const override { return &mInfo; }

  // Returns the end time that a granulepos represents.

  static TimeUnit Time(int aPreSkip, int64_t aGranulepos);

  // Construct and return a table of tags from the metadata header.

  UniquePtr<MetadataTags> GetTags() override;

 private:

  UniquePtr<OpusParser> mParser;

  OpusMSDecoder* mDecoder;

  // Granule position (end sample) of the last decoded Opus packet. This is

  // used to calculate the amount we should trim from the last packet.

  int64_t mPrevPacketGranulepos;

  // Reconstructs the granulepos of Opus packets stored in the

  // mUnstamped array. mUnstamped must be filled with consecutive packets from

  // the stream, with the last packet having a known granulepos. Using this

  // known granulepos, and the known frame numbers, we recover the granulepos

  // of all frames in the array. This enables us to determine their timestamps.

  bool ReconstructOpusGranulepos();

  // Granule position (end sample) of the last decoded Opus page. This is

  // used to calculate the Opus per-packet granule positions on the last page,

  // where we may need to trim some samples from the end.

  int64_t mPrevPageGranulepos;

  AudioInfo mInfo;

  OggPacketQueue mHeaders;

};

// Constructs a 32bit version number out of two 16 bit major,minor

// version numbers.

#  define SKELETON_VERSION(major, minor) (((major) << 16) | (minor))

enum EMsgHeaderType {

  eContentType,

  eRole,

  eName,

  eLanguage,

  eTitle,

  eDisplayHint,

  eAltitude,

  eTrackOrder,

  eTrackDependencies

};

struct FieldPatternType {

  const char* mPatternToRecognize;

  EMsgHeaderType mMsgHeaderType;

};

// Stores the message information for different logical bitstream.

struct MessageField {

  nsClassHashtable<nsUint32HashKey, nsCString> mValuesStore;

};

class SkeletonState : public OggCodecState {

 public:

  explicit SkeletonState(rlbox_sandbox_ogg* aSandbox,

                         tainted_opaque_ogg<ogg_page*> aBosPage,

                         uint32_t aSerial);

  ~SkeletonState();

  nsClassHashtable<nsUint32HashKey, MessageField> mMsgFieldStore;

  CodecType GetType() override { return TYPE_SKELETON; }

  bool DecodeHeader(OggPacketPtr aPacket) override;

  TimeUnit Time(int64_t aGranulepos) override { return TimeUnit::Invalid(); }

  bool IsHeader(ogg_packet* aPacket) override { return true; }

  // Return true if the given time (in milliseconds) is within

  // the presentation time defined in the skeleton track.

  bool IsPresentable(int64_t aTime) { return aTime >= mPresentationTime; }

  // Stores the offset of the page on which a keyframe starts,

  // and its presentation time.

  class nsKeyPoint {

   public:

    nsKeyPoint() : mOffset(INT64_MAX), mTime(TimeUnit::Invalid()) {}

    nsKeyPoint(int64_t aOffset, TimeUnit aTime)

        : mOffset(aOffset), mTime(aTime) {}

    // Offset from start of segment/link-in-the-chain in bytes.

    int64_t mOffset;

    // Presentation time

    TimeUnit mTime;

    bool IsNull() { return mOffset == INT64_MAX && !mTime.IsValid(); }

};

  // Stores a keyframe's byte-offset, presentation time and the serialno

  // of the stream it belongs to.

  class nsSeekTarget {

   public:

    nsSeekTarget() : mSerial(0) {}

    nsKeyPoint mKeyPoint;

    uint32_t mSerial;

    bool IsNull() { return mKeyPoint.IsNull() && mSerial == 0; }

};

  // Determines from the seek index the keyframe which you must seek back to

  // in order to get all keyframes required to render all streams with

  // serialnos in aTracks, at time aTarget.

  nsresult IndexedSeekTarget(const TimeUnit& aTarget,

                             nsTArray<uint32_t>& aTracks,

                             nsSeekTarget& aResult);

  bool HasIndex() const { return mIndex.Count() > 0; }

  // Returns the duration of the active tracks in the media, if we have

  // an index. aTracks must be filled with the serialnos of the active tracks.

  // The duration is calculated as the greatest end time of all active tracks,

  // minus the smalled start time of all the active tracks.

  nsresult GetDuration(const nsTArray<uint32_t>& aTracks, TimeUnit& aDuration);

 private:

  // Decodes an index packet. Returns false on failure.

  bool DecodeIndex(ogg_packet* aPacket);

  // Decodes an fisbone packet. Returns false on failure.

  bool DecodeFisbone(ogg_packet* aPacket);

  // Gets the keypoint you must seek to in order to get the keyframe required

  // to render the stream at time aTarget on stream with serial aSerialno.

  nsresult IndexedSeekTargetForTrack(uint32_t aSerialno,

                                     const TimeUnit& aTarget,

                                     nsKeyPoint& aResult);

  // Version of the decoded skeleton track, as per the SKELETON_VERSION macro.

  uint32_t mVersion;

  // Presentation time of the resource in milliseconds

  int64_t mPresentationTime;

  // Length of the resource in bytes.

  int64_t mLength;

  // Stores the keyframe index and duration information for a particular

  // stream.

  class nsKeyFrameIndex {

   public:

    nsKeyFrameIndex(const TimeUnit& aStartTime, const TimeUnit& aEndTime)

        : mStartTime(aStartTime), mEndTime(aEndTime) {

      MOZ_COUNT_CTOR(nsKeyFrameIndex);

    MOZ_COUNTED_DTOR(nsKeyFrameIndex)

    void Add(int64_t aOffset, const TimeUnit& aTime) {

      mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTime));

    const nsKeyPoint& Get(uint32_t aIndex) const { return mKeyPoints[aIndex]; }

    uint32_t Length() const { return mKeyPoints.Length(); }

    // Presentation time of the first sample in this stream in usecs.

    const TimeUnit mStartTime;

    // End time of the last sample in this stream in usecs.

    const TimeUnit mEndTime;

   private:

    nsTArray<nsKeyPoint> mKeyPoints;

};

  // Maps Ogg serialnos to the index-keypoint list.

  nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex;

};

class FlacState : public OggCodecState {

 public:

  explicit FlacState(rlbox_sandbox_ogg* aSandbox,

                     tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial);

  CodecType GetType() override { return TYPE_FLAC; }

  bool DecodeHeader(OggPacketPtr aPacket) override;

  TimeUnit Time(int64_t aGranulepos) override;

  TimeUnit PacketDuration(ogg_packet* aPacket) override;

  bool IsHeader(ogg_packet* aPacket) override;

  nsresult PageIn(tainted_opaque_ogg<ogg_page*> aPage) override;

  // Return a hash table with tag metadata.

  UniquePtr<MetadataTags> GetTags() override;

  const TrackInfo* GetInfo() const override;

 private:

  bool ReconstructFlacGranulepos(void);

  FlacFrameParser mParser;

};

}  // namespace mozilla

#endif