- From: guest271314 via GitHub <sysbot+gh@w3.org>
- Date: Sun, 13 Sep 2020 04:17:23 +0000
- To: public-webrtc-logs@w3.org
@josephrocca There is not a simple way to get the direct output from a speech synthesis engine other than calling the engine directly and processing raw audio output. Technically, a socket connection can be established to `speech-dispatcher`
No specification, including Media Capture and Streams, Audio Output Devices API, Web Audio API, or Web SPeech API (see [MediaStream, ArrayBuffer, Blob audio result from speak() for recording?](https://lists.w3.org/Archives/Public/public-speech-api/2017Jun/0000.html), https://github.com/WebAudio/web-audio-api-v2/issues/10#issuecomment-682259080) defines a means to access or capture speech synthesis engine output directly.
```
$ speech-dispatcher -h
Speech Dispatcher -- Common interface for Speech Synthesis (GNU GPL)
Usage: speech-dispatcher [-{d|s}] [-l {1|2|3|4|5}] [-c com_method] [-S socket_path] [-p port] [-t timeout] | [-v] | [-h]
Options:
-d, --run-daemon Run as a daemon
-s, --run-single Run as single application
-a, --spawn Start only if autospawn is not disabled
-l, --log-level Set log level (between 1 and 5)
-L, --log-dir Set path to logging
-c, --communication-method
Communication method to use ('unix_socket'
or 'inet_socket')
-S, --socket-path Socket path to use for 'unix_socket' method
(filesystem path or 'default')
-p, --port Specify a port number for 'inet_socket' method
-t, --timeout Set time in seconds for the server to wait before it
shuts down, if it has no clients connected
-P, --pid-file Set path to pid file
-C, --config-dir Set path to configuration
-m, --module-dir Set path to modules
-v, --version Report version of this program
-D, --debug Output debugging information into $TMPDIR/speechd-debug
if TMPDIR is exported, otherwise to /tmp/speechd-debug
-h, --help Print this info
Please report bugs to speechd-discuss@nongnu.org
```
Aside from more elaborate solutions that involve growing `WebAssembly.Memory` https://github.com/WebAudio/web-audio-api-v2/issues/97 and streaming monitor device from Nightly to Chromium https://gist.github.com/guest271314/04a539c00926e15905b86d05138c113c one solution is to use a local server. There are then ways to get the `MediaStreamTrack` from localhost to any origin. Note capturing monitor device captures all system audio output, not only from `speech-dispatcher` speech synthesis module.
You can use any language for a server. Here we use `php` with `espeak-ng` speech synthesis engine
speak.php
```
<?php
if (isset($_POST["speak"])) {
header("Access-Control-Allow-Origin: localhost:8000");
header("Content-Type: application/octet-stream");
$input = urldecode($_POST["speak"]);
$options = urldecode($_POST["options"]);
echo passthru("espeak-ng --stdout " . $options . " '" . $input . "'");
exit();
}
```
Using `MediaStreamAudioSourceNode`
```
// https://stackoverflow.com/a/35248852
function int16ToFloat32(inputArray) {
const output = new Float32Array(inputArray.length);
for (let i = 0; i < output.length; i++) {
const int = inputArray[i];
// If the high bit is on, then it is a negative number, and actually counts backwards.
const float = (int >= 0x8000) ? -(0x10000 - int) / 0x8000 : int / 0x7FFF;
output[i] = float;
}
return output;
}
var fd = new FormData();
fd.append('options', '-v Storm');
fd.append('speak', `Now watch. Um, this how science works.
One researcher comes up with a result.
And that is not the truth. No, no.
A scientific emergent truth is not the
result of one experiment. What has to
happen is somebody else has to verify
it. Preferably a competitor. Preferably
someone who doesnt want you to be correct.
- Neil deGrasse Tyson, May 3, 2017 at 92nd Street Y`);
fetch('', {method:'post', body:fd})
.then(r => r.arrayBuffer())
.then(async arrayBuffer => {
const uint16 = new Uint16Array(arrayBuffer.slice(44));
const floats = int16ToFloat32(uint16, 1, uint16.length);
const ac = new AudioContext({sampleRate: 22050});
const buffer = new AudioBuffer({
numberOfChannels: 1,
length: floats.byteLength,
sampleRate: ac.sampleRate
});
console.log(floats);
buffer.getChannelData(0).set(floats);
const absn = new AudioBufferSourceNode(ac, {buffer});
// cannot be connected directly to AudioContext.destination
const msd = new MediaStreamAudioDestinationNode(ac);
const {stream: mediaStream} = msd;
const source = new MediaStreamAudioSourceNode(ac, {mediaStream});
absn.connect(msd);
absn.start();
source.connect(ac.destination);
});
```
Using `AudioWorkletNode` with a single `Float32Array` initially passed. We could write the stream `STDOUT` to a single `ArrayBuffer` or `SharedArrayBuffer` using `Response.body.getReader()` and read from that memory in `process()`
```
// https://stackoverflow.com/a/35248852
function int16ToFloat32(inputArray) {
const output = new Float32Array(inputArray.length);
for (let i = 0; i < output.length; i++) {
const int = inputArray[i];
// If the high bit is on, then it is a negative number, and actually counts backwards.
const float = (int >= 0x8000) ? -(0x10000 - int) / 0x8000 : int / 0x7FFF;
output[i] = float;
}
return output;
}
var fd = new FormData();
fd.append('options', '-v Storm');
fd.append('speak', `Now watch. Um, this how science works.
One researcher comes up with a result.
And that is not the truth. No, no.
A scientific emergent truth is not the
result of one experiment. What has to
happen is somebody else has to verify
it. Preferably a competitor. Preferably
someone who doesnt want you to be correct.
- Neil deGrasse Tyson, May 3, 2017 at 92nd Street Y`);
fetch('', {method:'post', body:fd})
.then(r => r.arrayBuffer())
.then(async arrayBuffer => {
const uint16 = new Uint16Array(arrayBuffer.slice(44));
const floats = int16ToFloat32(uint16, 1, uint16.length);
const ac = new AudioContext({sampleRate: 22050});
console.log(ac.state);
class AudioWorkletProcessor {}
class SpeechSynthesisStream extends AudioWorkletProcessor {
constructor(options) {
super(options);
Object.assign(this, options.processorOptions);
globalThis.console.log(this.floats);
this.port.postMessage({start:this.start = !this.start});
}
endOfStream() {
this.port.postMessage({
ended: true,
currentTime,
currentFrame,
readOffset: this.readOffset,
});
}
process(inputs, outputs) {
const [channel] = outputs.flat();
if (
this.readOffset >= this.floats.length
) {
console.log(this);
this.endOfStream();
return false;
}
const data = Float32Array.from({length: 128}, _ => {
const index = this.readOffset;
if (index > this.floats.length) return 0;
return this.floats[this.readOffset++];
});
channel.set(data);
return true;
}
}
// register processor in AudioWorkletGlobalScope
function registerProcessor(name, processorCtor) {
return `${processorCtor};\nregisterProcessor('${name}', ${processorCtor.name});`;
}
const worklet = URL.createObjectURL(
new Blob(
[
registerProcessor(
'speech-synthesis-stream',
SpeechSynthesisStream
),
],
{ type: 'text/javascript' }
)
);
ac.onstatechange = e => console.log(ac.state);
await ac.audioWorklet.addModule(worklet);
const aw = new AudioWorkletNode(
ac,
'speech-synthesis-stream',
{
numberOfInputs: 1,
numberOfOutputs: 1,
channelCount: 1,
processorOptions: {
readOffset: 0,
ended: false,
start: false,
floats
},
}
);
aw.onprocessorerror = e => {
console.error(e);
console.trace();
};
const msd = new MediaStreamAudioDestinationNode(ac);
const { stream } = msd;
const [track] = stream.getAudioTracks();
aw.connect(msd);
aw.connect(ac.destination);
// const recorder = new MediaRecorder(stream);
// recorder.ondataavailable = e => console.log(URL.createObjectURL(e.data));
if (ac.state === 'running') {
await ac.suspend();
}
aw.port.onmessage = async e => {
console.log(e.data, ac.state);
if (
e.data.start &&
ac.state === 'suspended'
) {
await ac.resume();
// recorder.start();
} else {
// if (recorder.state === 'recording') {
// recorder.stop();
track.stop();
aw.disconnect();
msd.disconnect();
await ac.close();
console.log(track);
}
};
});
```
--
GitHub Notification of comment by guest271314
Please view or discuss this issue at https://github.com/w3c/mediacapture-output/issues/102#issuecomment-691608499 using your GitHub account
--
Sent via github-notify-ml as configured in https://github.com/w3c/github-notify-ml-config
Received on Sunday, 13 September 2020 04:17:26 UTC