SpeechSynthesis.cpp

mozilla-central/dom/media/webspeech/synth/SpeechSynthesis.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim:set ts=2 sw=2 sts=2 et cindent: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsISupportsPrimitives.h"

#include "nsSpeechTask.h"

#include "mozilla/Logging.h"

#include "mozilla/dom/Element.h"

#include "mozilla/dom/SpeechSynthesisBinding.h"

#include "mozilla/dom/WindowGlobalChild.h"

#include "SpeechSynthesis.h"

#include "nsContentUtils.h"

#include "nsSynthVoiceRegistry.h"

#include "mozilla/dom/Document.h"

#include "nsIDocShell.h"

#include "nsGlobalWindowInner.h"

#undef LOG

mozilla::LogModule* GetSpeechSynthLog() {

  static mozilla::LazyLogModule sLog("SpeechSynthesis");

  return sLog;

#define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)

namespace mozilla::dom {

NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis)

NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis,

                                                DOMEventTargetHelper)

  NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask)

  NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue)

  tmp->mVoiceCache.Clear();

  NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE

NS_IMPL_CYCLE_COLLECTION_UNLINK_END

NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis,

                                                  DOMEventTargetHelper)

  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask)

  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue)

  for (SpeechSynthesisVoice* voice : tmp->mVoiceCache.Values()) {

    cb.NoteXPCOMChild(voice);

NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END

NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis)

  NS_INTERFACE_MAP_ENTRY(nsIObserver)

  NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)

NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)

NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper)

NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper)

SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent)

    : DOMEventTargetHelper(aParent),

      mHoldQueue(false),

      mInnerID(aParent->WindowID()) {

  MOZ_ASSERT(NS_IsMainThread());

  nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();

  if (obs) {

    obs->AddObserver(this, "inner-window-destroyed", true);

    obs->AddObserver(this, "synth-voices-changed", true);

    obs->AddObserver(this, "synth-voices-error", true);

SpeechSynthesis::~SpeechSynthesis() = default;

JSObject* SpeechSynthesis::WrapObject(JSContext* aCx,

                                      JS::Handle<JSObject*> aGivenProto) {

  return SpeechSynthesis_Binding::Wrap(aCx, this, aGivenProto);

bool SpeechSynthesis::Pending() const {

  // If we don't have any task, nothing is pending. If we have only one task,

  // check if that task is currently pending. If we have more than one task,

  // then the tasks after the first one are definitely pending.

  return mSpeechQueue.Length() > 1 ||

         (mSpeechQueue.Length() == 1 &&

          (!mCurrentTask || mCurrentTask->IsPending()));

bool SpeechSynthesis::Speaking() const {

  // Check global speaking state if there is no active speaking task.

  return (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) ||

         nsSynthVoiceRegistry::GetInstance()->IsSpeaking();

bool SpeechSynthesis::Paused() const {

  return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) ||

         (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused());

bool SpeechSynthesis::HasEmptyQueue() const {

  return mSpeechQueue.Length() == 0;

bool SpeechSynthesis::HasVoices() const {

  uint32_t voiceCount = mVoiceCache.Count();

  if (voiceCount == 0) {

    nsresult rv =

        nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);

    if (NS_WARN_IF(NS_FAILED(rv))) {

      return false;

  return voiceCount != 0;

void SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) {

  if (!mInnerID) {

    return;

  mSpeechQueue.AppendElement(&aUtterance);

  if (mSpeechQueue.Length() == 1) {

    RefPtr<WindowGlobalChild> wgc =

        WindowGlobalChild::GetByInnerWindowId(mInnerID);

    if (wgc) {

      wgc->BlockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);

    // If we only have one item in the queue, we aren't pre-paused, and

    // we have voices available, speak it.

    if (!mCurrentTask && !mHoldQueue && HasVoices()) {

      AdvanceQueue();

void SpeechSynthesis::AdvanceQueue() {

  LOG(LogLevel::Debug,

      ("SpeechSynthesis::AdvanceQueue length=%zu", mSpeechQueue.Length()));

  if (mSpeechQueue.IsEmpty()) {

    return;

  RefPtr<SpeechSynthesisUtterance> utterance = mSpeechQueue.ElementAt(0);

  nsAutoString docLang;

  nsCOMPtr<nsPIDOMWindowInner> window = GetOwnerWindow();

  if (Document* doc = window ? window->GetExtantDoc() : nullptr) {

    if (Element* elm = doc->GetHtmlElement()) {

      elm->GetLang(docLang);

  mCurrentTask =

      nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang);

  if (mCurrentTask) {

    mCurrentTask->SetSpeechSynthesis(this);

void SpeechSynthesis::Cancel() {

  if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {

    // Remove all queued utterances except for current one, we will remove it

    // in OnEnd

    mSpeechQueue.RemoveLastElements(mSpeechQueue.Length() - 1);

  } else {

    mSpeechQueue.Clear();

  if (mCurrentTask) {

    mCurrentTask->Cancel();

void SpeechSynthesis::Pause() {

  if (Paused()) {

    return;

  if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {

    mCurrentTask->Pause();

  } else {

    mHoldQueue = true;

void SpeechSynthesis::Resume() {

  if (!Paused()) {

    return;

  mHoldQueue = false;

  if (mCurrentTask) {

    mCurrentTask->Resume();

  } else {

    AdvanceQueue();

void SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) {

  MOZ_ASSERT(mCurrentTask == aTask);

  if (!mSpeechQueue.IsEmpty()) {

    mSpeechQueue.RemoveElementAt(0);

    if (mSpeechQueue.IsEmpty()) {

      RefPtr<WindowGlobalChild> wgc =

          WindowGlobalChild::GetByInnerWindowId(mInnerID);

      if (wgc) {

        wgc->UnblockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);

  mCurrentTask = nullptr;

  AdvanceQueue();

void SpeechSynthesis::GetVoices(

    nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult) {

  aResult.Clear();

  uint32_t voiceCount = 0;

  nsCOMPtr<nsPIDOMWindowInner> window = GetOwnerWindow();

  nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;

  if (nsContentUtils::ShouldResistFingerprinting(docShell,

                                                 RFPTarget::SpeechSynthesis)) {

    return;

  nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);

  if (NS_WARN_IF(NS_FAILED(rv))) {

    return;

  nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this);

  for (uint32_t i = 0; i < voiceCount; i++) {

    nsAutoString uri;

    rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri);

    if (NS_FAILED(rv)) {

      NS_WARNING("Failed to retrieve voice from registry");

      continue;

    SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri);

    if (!voice) {

      voice = new SpeechSynthesisVoice(voiceParent, uri);

    aResult.AppendElement(voice);

  mVoiceCache.Clear();

  for (uint32_t i = 0; i < aResult.Length(); i++) {

    SpeechSynthesisVoice* voice = aResult[i];

    mVoiceCache.InsertOrUpdate(voice->mUri, RefPtr{voice});

// For testing purposes, allows us to cancel the current task that is

// misbehaving, and flush the queue.

void SpeechSynthesis::ForceEnd() {

  if (mCurrentTask) {

    mCurrentTask->ForceEnd();

NS_IMETHODIMP

SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic,

                         const char16_t* aData) {

  MOZ_ASSERT(NS_IsMainThread());

  if (strcmp(aTopic, "inner-window-destroyed") == 0) {

    nsCOMPtr<nsISupportsPRUint64> wrapper = do_QueryInterface(aSubject);

    NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE);

    uint64_t innerID;

    nsresult rv = wrapper->GetData(&innerID);

    NS_ENSURE_SUCCESS(rv, rv);

    if (innerID == mInnerID) {

      mInnerID = 0;

      Cancel();

      nsCOMPtr<nsIObserverService> obs =

          mozilla::services::GetObserverService();

      if (obs) {

        obs->RemoveObserver(this, "inner-window-destroyed");

  } else if (strcmp(aTopic, "synth-voices-changed") == 0) {

    LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));

    nsCOMPtr<nsPIDOMWindowInner> window = GetOwnerWindow();

    nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;

    if (!nsContentUtils::ShouldResistFingerprinting(

            docShell, RFPTarget::SpeechSynthesis)) {

      DispatchTrustedEvent(u"voiceschanged"_ns);

      // If we have a pending item, and voices become available, speak it.

      if (!mCurrentTask && !mHoldQueue && HasVoices()) {

        AdvanceQueue();

  } else if (strcmp(aTopic, "synth-voices-error") == 0) {

    NS_WARNING("SpeechSynthesis::Observe: synth-voices-error");

    LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceserror"));

    nsCOMPtr<nsPIDOMWindowInner> window = GetOwnerWindow();

    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();

    if (obs) {

      obs->NotifyObservers(window, "chrome-synth-voices-error", aData);

    if (!mSpeechQueue.IsEmpty()) {

      for (RefPtr<SpeechSynthesisUtterance>& utterance : mSpeechQueue) {

        utterance->DispatchSpeechSynthesisEvent(u"error"_ns, 0, nullptr, 0,

                                                u""_ns);

      mSpeechQueue.Clear();

  return NS_OK;

}  // namespace mozilla::dom