- From: guest271314 <guest271314@gmail.com>
- Date: Sat, 17 Jun 2017 19:18:25 -0700
- To: public-speech-api@w3.org
- Message-ID: <CA+syWAPNoZU-+uRcf8_9EHX4_F2MhUqLyJ6VDpwQeHWVVoeqgg@mail.gmail.com>
Was able to set RecordStream from Monitor of Built-in Audio Analog Stereo at Chromium at *nix to record audio output of .speak() directly from speakers. Firefox 53 has issues: with RecordStream from Monitor of Built-in Audio Analog Stereo setting; the setting does not remain persistent; and closed abruptly when tried several times with RecordStream from Monitor of Built-in Audio Analog Stereo set at Recording at system Sound Settings GUI; there is reverb in recorded audio. Code: // SpeechSynthesisRecorder.js guest271314 <guest271314@gmail.com> 6-17-2017 // Version 0.0.1 // Motivation: Get audio output from `window.speechSynthesis.speak()` call // as `ArrayBuffer`, `AudioBuffer`, `Blob`, `MediaSource`, `MediaStream`, `ReadableStream`, or other object or data types // See https://lists.w3.org/Archives/Public/public-speech-api/2017Jun/0000.html // Configuration: Analog Stereo Duplex // Input Devices: Monitor of Built-in Audio Analog Stereo, Built-in Audio Analog Stereo // Playback: Chromium: Playback, speech-dspatcher: playback // Recording: Chrome input: RecordStream from Monitor of Built-in Audio Analog Stereo // Issues: Recording: Firefox throws `Uncaught (in promise) NavigatorUserMediaError {name: "TrackStartError", message: "", constraintName: ""}` // Issues: `navigator.getUserMedia({audio:true}) throws error Uncaught (in promise) NavigatorUserMediaError {name: "TrackStartError", message: "", constraintName: ""}` and closes when RecordStream from Monitor of Built-in Audio Analog Stereo is set, at *nix OS // See https://bugzilla.mozilla.org/show_bug.cgi?id=1373364 // Issues: Stop MediaStream, navigator.getUserMedia() when recording is complete // Issues: When MediaStream is returned avoid feedback; // get accurate media duration; stop all associated MediaStream when // SpeechSynthesisUtterance ended event dispatched class SpeechSynthesisRecorder { constructor(text = "", utteranceOptions = {}, recorderOptions = {}, dataType = void 0) { if (text === "") throw new Error("no words to synthesize"); if (dataType === undefined) throw new TypeError("dataType is undefined"); this.dataType = dataType; this.text = text; this.utterance = new SpeechSynthesisUtterance(this.text); this.speechSynthesis = window.speechSynthesis; this.mediaStream_ = new MediaStream(); this.mediaSource_ = new MediaSource(); this.mediaRecorder = new MediaRecorder(this.mediaStream_, recorderOptions || { // does not set value at chromium 58 /* audioBitsPerSecond: 128000, */ mimeType: "audio/webm; codecs=opus" }); this.audioContext = new AudioContext(); this.audioNode = new Audio(); this.chunks = Array(); this.mimeType = recorderOptions.mimeType || "audio/webm; codecs=opus"; // adjust codecs set at `type` of `Blob` is necessary // this.blobType = this.mimeType.substring(0, this.mimeType.indexOf(";")); if (utteranceOptions) { if (utteranceOptions.voice) { this.speechSynthesis.onvoiceschanged = e => { const voice = this.speechSynthesis.getVoices().find(({ name: _name }) => _name === utteranceOptions.voice); this.utterance.voice = voice; console.log(voice, this.utterance); } this.speechSynthesis.getVoices(); } let { lang, rate, pitch } = utteranceOptions; Object.assign(this.utterance, { lang, rate, pitch }); } this.audioNode.controls = "controls"; document.body.appendChild(this.audioNode); } async start(text = "") { if (text) this.text = text; if (this.text === "") throw new Error("no words to synthesize"); const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const media = await new Promise(resolve => { const track = stream.getAudioTracks()[0]; this.mediaStream_.addTrack(track); // return the current `MediaStream` when available if (this.dataType === "mediaStream") { const clone = this.mediaStream_; resolve({tts:this, data:this.mediaStream_}) }; this.mediaRecorder.ondataavailable = event => { if (event.data.size > 0) { this.chunks.push(event.data); }; }; this.mediaRecorder.onstop = () => { track.stop(); this.mediaStream_.getAudioTracks()[0].stop(); this.mediaStream_.removeTrack(track); console.log(`Completed recording ${this.utterance.text}`, this.chunks); resolve(this); } this.mediaRecorder.start(); this.utterance.onstart = () => { console.log(`Starting recording SpeechSynthesisUtterance ${this.utterance.text}`); } this.utterance.onend = () => { this.mediaRecorder.stop(); console.log(`Ending recording SpeechSynthesisUtterance ${this.utterance.text}`); } this.speechSynthesis.speak(this.utterance); }); return media } async blob() { if (!this.chunks.length) throw new Error("no data to return"); const blob = await Promise.resolve(new Blob(this.chunks, { type: this.mimeType })); return blob } async arrayBuffer(blob) { if (!this.chunks.length) throw new Error("no data to return"); const arrayBuffer = await new Promise(resolve => { const reader = new FileReader; reader.onload = e => resolve(reader.result); reader.readAsArrayBuffer(blob ? new Blob(blob, {type: blob.type}) : new Blob(this.chunks, { type: this.mimeType })); }); return arrayBuffer } async audioBuffer() { if (!this.chunks.length) throw new Error("no data to return"); const ab = await this.arrayBuffer(); const buffer = await this.audioContext.decodeAudioData(ab); return buffer } async mediaStream() { } async mediaSource() { if (!this.chunks.length) throw new Error("no data to return"); const ab = await this.arrayBuffer(); const mediaSource = await new Promise((resolve, reject) => { this.mediaSource_.onsourceended = () => resolve(this.mediaSource_); this.mediaSource_.onsourceopen = () => { if (MediaSource.isTypeSupported(this.mimeType)) { const sourceBuffer = this.mediaSource_.addSourceBuffer(this.mimeType); sourceBuffer.mode = "sequence"; sourceBuffer.onupdateend = () => this.mediaSource_.endOfStream(); sourceBuffer.appendBuffer(ab); } else { reject(`${this.mimeType} is not supported`) } } this.audioNode.src = URL.createObjectURL(this.mediaSource_); }); return mediaSource } async readableStream(size = 1024, rsOptions = {}) { if (!this.chunks.length) throw new Error("no data to return"); const src = this.chunks.slice(0); const chunk = size; const rs = await new ReadableStream({ start(controller) { console.log(src.length); controller.enqueue(src.splice(0, chunk)) }, pull(controller) { if (src.length = 0) controller.close(); controller.enqueue(src.splice(0, chunk)); } }, rsOptions); return rs } } Usage: async function ttsRecorder(text, utteranceOptions, recorderOptions, dataType = void 0) { if (dataType === undefined) throw new TypeError("dataType is undefined"); const ttsRecorder = await new SpeechSynthesisRecorder(text, utteranceOptions, recorderOptions, dataType); const tts = await ttsRecorder.start(); // return `MediaStream` if (dataType === "mediaStream") return ttsRecorder.start(); const data = await tts[dataType](); return {tts, data} } let ttsRecording = ttsRecorder("The revolution will not be televised", { voice: "english-us espeak", lang: "en-US", pitch: .75, rate: 1 }, { mimeType: "audio/webm; codecs=opus" }, /* `dataType` */ "mediaStream"); /* `dataType` : "blob" : Blob, "arrayBuffer" : ArrayBuffer, "audioBuffer" : AudioBuffer, "mediaSource" : MediaSource, "mediaStream" : MediaStream, "readableStream" : ReadableStream */ ttsRecording .then(({tts, data}) => { // do stuff with `ArrayBuffer`, `AudioBuffer`, `Blob`, // `MediaSource`, `MediaStream`, `ReadableStream`; // for example, play audio, download audio console.log(tts, data); // `tts` : `SpeechSynthesisRecorder` instance, `data` : audio as `dataType` /* // `data` : `MediaSource` tts.audioNode.srcObj = data; tts.audioNode.title = tts.utterance.text; tts.audioNode.onloadedmetadata = () => { console.log(tts.audioNode.duration); tts.audioNode.play(); } */ /* // `data` : `ArrayBuffer` tts.audioNode.srcObj = data; tts.audioNode.title = tts.utterance.text; tts.audioNode.onloadedmetadata = () => { console.log(tts.audioNode.duration); tts.audioNode.play(); } */ /* // `data` : `AudioBuffer` let source = tts.audioContext.createBufferSource(); source.buffer = ab; source.connect(tts.audioContext.destination); source.start() */ /* // `data` : `Blob` tts.audioNode.src = URL.createObjectURL(blob); tts.audioNode.title = tts.utterance.text; tts.audioNode.onloadedmetadata = () => { console.log(tts.audioNode.duration); tts.audioNode.play(); } */ /* // `data` : `ReabableStream` data.getReader().read().then(d => { // do stuff with stream tts.audioNode.src = URL.createObjectURL(d.value[0]) }) */ /* // `data` : `MediaStream` // do stuff with active `MediaStream` }) .catch(err => console.log(err)) Possible improvements: Not using navigator.mediaDevices.getUserMedia() to get audio playback; other. Github https://github.com/guest271314/SpeechSynthesisRecorder
Received on Sunday, 18 June 2017 02:18:59 UTC