- From: guest271314 <guest271314@gmail.com>
- Date: Sat, 17 Jun 2017 19:18:25 -0700
- To: public-speech-api@w3.org
- Message-ID: <CA+syWAPNoZU-+uRcf8_9EHX4_F2MhUqLyJ6VDpwQeHWVVoeqgg@mail.gmail.com>
Was able to set RecordStream from Monitor of Built-in Audio Analog Stereo
at Chromium at *nix to record audio output of .speak() directly from
speakers. Firefox 53 has issues: with RecordStream from Monitor of Built-in
Audio Analog Stereo setting; the setting does not remain persistent; and
closed abruptly when tried several times with RecordStream from Monitor of
Built-in Audio Analog Stereo set at Recording at system Sound Settings
GUI; there is reverb in recorded audio.
Code:
// SpeechSynthesisRecorder.js guest271314 <guest271314@gmail.com>
6-17-2017
// Version 0.0.1
// Motivation: Get audio output from `window.speechSynthesis.speak()`
call
// as `ArrayBuffer`, `AudioBuffer`, `Blob`, `MediaSource`,
`MediaStream`, `ReadableStream`, or other object or data types
// See
https://lists.w3.org/Archives/Public/public-speech-api/2017Jun/0000.html
// Configuration: Analog Stereo Duplex
// Input Devices: Monitor of Built-in Audio Analog Stereo, Built-in
Audio Analog Stereo
// Playback: Chromium: Playback, speech-dspatcher: playback
// Recording: Chrome input: RecordStream from Monitor of Built-in Audio
Analog Stereo
// Issues: Recording: Firefox throws `Uncaught (in promise)
NavigatorUserMediaError {name: "TrackStartError", message: "",
constraintName: ""}`
// Issues: `navigator.getUserMedia({audio:true}) throws error Uncaught
(in promise) NavigatorUserMediaError {name: "TrackStartError", message: "",
constraintName: ""}` and closes when RecordStream from Monitor of Built-in
Audio Analog Stereo is set, at *nix OS
// See https://bugzilla.mozilla.org/show_bug.cgi?id=1373364
// Issues: Stop MediaStream, navigator.getUserMedia() when recording is
complete
// Issues: When MediaStream is returned avoid feedback;
// get accurate media duration; stop all associated MediaStream when
// SpeechSynthesisUtterance ended event dispatched
class SpeechSynthesisRecorder {
constructor(text = "", utteranceOptions = {}, recorderOptions = {},
dataType = void 0) {
if (text === "") throw new Error("no words to synthesize");
if (dataType === undefined) throw new TypeError("dataType is
undefined");
this.dataType = dataType;
this.text = text;
this.utterance = new SpeechSynthesisUtterance(this.text);
this.speechSynthesis = window.speechSynthesis;
this.mediaStream_ = new MediaStream();
this.mediaSource_ = new MediaSource();
this.mediaRecorder = new MediaRecorder(this.mediaStream_,
recorderOptions || {
// does not set value at chromium 58
/* audioBitsPerSecond: 128000, */
mimeType: "audio/webm; codecs=opus"
});
this.audioContext = new AudioContext();
this.audioNode = new Audio();
this.chunks = Array();
this.mimeType = recorderOptions.mimeType || "audio/webm;
codecs=opus";
// adjust codecs set at `type` of `Blob` is necessary
// this.blobType = this.mimeType.substring(0,
this.mimeType.indexOf(";"));
if (utteranceOptions) {
if (utteranceOptions.voice) {
this.speechSynthesis.onvoiceschanged = e => {
const voice = this.speechSynthesis.getVoices().find(({
name: _name
}) => _name === utteranceOptions.voice);
this.utterance.voice = voice;
console.log(voice, this.utterance);
}
this.speechSynthesis.getVoices();
}
let {
lang, rate, pitch
} = utteranceOptions;
Object.assign(this.utterance, {
lang, rate, pitch
});
}
this.audioNode.controls = "controls";
document.body.appendChild(this.audioNode);
}
async start(text = "") {
if (text) this.text = text;
if (this.text === "") throw new Error("no words to synthesize");
const stream = await navigator.mediaDevices.getUserMedia({
audio: true
});
const media = await new Promise(resolve => {
const track = stream.getAudioTracks()[0];
this.mediaStream_.addTrack(track);
// return the current `MediaStream` when available
if (this.dataType === "mediaStream") {
const clone = this.mediaStream_;
resolve({tts:this, data:this.mediaStream_})
};
this.mediaRecorder.ondataavailable = event => {
if (event.data.size > 0) {
this.chunks.push(event.data);
};
};
this.mediaRecorder.onstop = () => {
track.stop();
this.mediaStream_.getAudioTracks()[0].stop();
this.mediaStream_.removeTrack(track);
console.log(`Completed recording ${this.utterance.text}`,
this.chunks);
resolve(this);
}
this.mediaRecorder.start();
this.utterance.onstart = () => {
console.log(`Starting recording SpeechSynthesisUtterance
${this.utterance.text}`);
}
this.utterance.onend = () => {
this.mediaRecorder.stop();
console.log(`Ending recording SpeechSynthesisUtterance
${this.utterance.text}`);
}
this.speechSynthesis.speak(this.utterance);
});
return media
}
async blob() {
if (!this.chunks.length) throw new Error("no data to return");
const blob = await Promise.resolve(new Blob(this.chunks, {
type: this.mimeType
}));
return blob
}
async arrayBuffer(blob) {
if (!this.chunks.length) throw new Error("no data to return");
const arrayBuffer = await new Promise(resolve => {
const reader = new FileReader;
reader.onload = e => resolve(reader.result);
reader.readAsArrayBuffer(blob ? new Blob(blob, {type: blob.type})
: new Blob(this.chunks, {
type: this.mimeType
}));
});
return arrayBuffer
}
async audioBuffer() {
if (!this.chunks.length) throw new Error("no data to return");
const ab = await this.arrayBuffer();
const buffer = await this.audioContext.decodeAudioData(ab);
return buffer
}
async mediaStream() {
}
async mediaSource() {
if (!this.chunks.length) throw new Error("no data to return");
const ab = await this.arrayBuffer();
const mediaSource = await new Promise((resolve, reject) => {
this.mediaSource_.onsourceended = () =>
resolve(this.mediaSource_);
this.mediaSource_.onsourceopen = () => {
if (MediaSource.isTypeSupported(this.mimeType)) {
const sourceBuffer =
this.mediaSource_.addSourceBuffer(this.mimeType);
sourceBuffer.mode = "sequence";
sourceBuffer.onupdateend = () =>
this.mediaSource_.endOfStream();
sourceBuffer.appendBuffer(ab);
} else {
reject(`${this.mimeType} is not supported`)
}
}
this.audioNode.src = URL.createObjectURL(this.mediaSource_);
});
return mediaSource
}
async readableStream(size = 1024, rsOptions = {}) {
if (!this.chunks.length) throw new Error("no data to return");
const src = this.chunks.slice(0);
const chunk = size;
const rs = await new ReadableStream({
start(controller) {
console.log(src.length);
controller.enqueue(src.splice(0, chunk))
},
pull(controller) {
if (src.length = 0) controller.close();
controller.enqueue(src.splice(0, chunk));
}
}, rsOptions);
return rs
}
}
Usage:
async function ttsRecorder(text, utteranceOptions,
recorderOptions, dataType = void 0) {
if (dataType === undefined) throw new TypeError("dataType is undefined");
const ttsRecorder = await new SpeechSynthesisRecorder(text,
utteranceOptions, recorderOptions, dataType);
const tts = await ttsRecorder.start();
// return `MediaStream`
if (dataType === "mediaStream") return ttsRecorder.start();
const data = await tts[dataType]();
return {tts, data}
}
let ttsRecording = ttsRecorder("The revolution will not be televised", {
voice: "english-us espeak",
lang: "en-US",
pitch: .75,
rate: 1
}, {
mimeType: "audio/webm; codecs=opus"
}, /* `dataType` */ "mediaStream");
/*
`dataType` :
"blob" : Blob,
"arrayBuffer" : ArrayBuffer,
"audioBuffer" : AudioBuffer,
"mediaSource" : MediaSource,
"mediaStream" : MediaStream,
"readableStream" : ReadableStream
*/
ttsRecording
.then(({tts, data}) => {
// do stuff with `ArrayBuffer`, `AudioBuffer`, `Blob`,
// `MediaSource`, `MediaStream`, `ReadableStream`;
// for example, play audio, download audio
console.log(tts, data); // `tts` : `SpeechSynthesisRecorder`
instance, `data` : audio as `dataType`
/*
// `data` : `MediaSource`
tts.audioNode.srcObj = data;
tts.audioNode.title = tts.utterance.text;
tts.audioNode.onloadedmetadata = () => {
console.log(tts.audioNode.duration);
tts.audioNode.play();
}
*/
/*
// `data` : `ArrayBuffer`
tts.audioNode.srcObj = data;
tts.audioNode.title = tts.utterance.text;
tts.audioNode.onloadedmetadata = () => {
console.log(tts.audioNode.duration);
tts.audioNode.play();
}
*/
/*
// `data` : `AudioBuffer`
let source = tts.audioContext.createBufferSource();
source.buffer = ab;
source.connect(tts.audioContext.destination);
source.start()
*/
/*
// `data` : `Blob`
tts.audioNode.src = URL.createObjectURL(blob);
tts.audioNode.title = tts.utterance.text;
tts.audioNode.onloadedmetadata = () => {
console.log(tts.audioNode.duration);
tts.audioNode.play();
}
*/
/*
// `data` : `ReabableStream`
data.getReader().read().then(d => {
// do stuff with stream
tts.audioNode.src = URL.createObjectURL(d.value[0])
})
*/
/*
// `data` : `MediaStream`
// do stuff with active `MediaStream`
})
.catch(err => console.log(err))
Possible improvements:
Not using navigator.mediaDevices.getUserMedia() to get audio playback; other.
Github https://github.com/guest271314/SpeechSynthesisRecorder
Received on Sunday, 18 June 2017 02:18:59 UTC