Próbuję nagrać dźwięk i natychmiast wysłać go do IBM Watson Speech-To-Text w celu transkrypcji. Przetestowałem Watsona z plikiem WAV załadowanym z dysku i to zadziałało. Z drugiej strony testowałem także z nagrywaniem z mikrofonu i przechowywaniem go na dysku, działa też dobrze.Nagrywanie WAV na IBM Watson Mowa na tekst
Ale kiedy próbuję nagrać dźwięk za pomocą NA NAV WaveIn, wynik Watsona jest pusty, tak jakby nie było dźwięku.
Ktoś, kto może oświetlić to, czy ktoś ma jakieś pomysły?
private async void StartHere()
{
var ws = new ClientWebSocket();
ws.Options.Credentials = new NetworkCredential("*****", "*****");
await ws.ConnectAsync(new Uri("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize?model=en-US_NarrowbandModel"), CancellationToken.None);
Task.WaitAll(ws.SendAsync(openingMessage, WebSocketMessageType.Text, true, CancellationToken.None), HandleResults(ws));
Record();
}
public void Record()
{
var waveIn = new WaveInEvent
{
BufferMilliseconds = 50,
DeviceNumber = 0,
WaveFormat = format
};
waveIn.DataAvailable += new EventHandler(WaveIn_DataAvailable);
waveIn.RecordingStopped += new EventHandler(WaveIn_RecordingStopped);
waveIn.StartRecording();
}
public void Stop()
{
await ws.SendAsync(closingMessage, WebSocketMessageType.Text, true, CancellationToken.None);
}
public void Close()
{
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "Close", CancellationToken.None).Wait();
}
private void WaveIn_DataAvailable(object sender, WaveInEventArgs e)
{
await ws.SendAsync(new ArraySegment(e.Buffer), WebSocketMessageType.Binary, true, CancellationToken.None);
}
private async Task HandleResults(ClientWebSocket ws)
{
var buffer = new byte[1024];
while (true)
{
var segment = new ArraySegment(buffer);
var result = await ws.ReceiveAsync(segment, CancellationToken.None);
if (result.MessageType == WebSocketMessageType.Close)
{
return;
}
int count = result.Count;
while (!result.EndOfMessage)
{
if (count >= buffer.Length)
{
await ws.CloseAsync(WebSocketCloseStatus.InvalidPayloadData, "That's too long", CancellationToken.None);
return;
}
segment = new ArraySegment(buffer, count, buffer.Length - count);
result = await ws.ReceiveAsync(segment, CancellationToken.None);
count += result.Count;
}
var message = Encoding.UTF8.GetString(buffer, 0, count);
// you'll probably want to parse the JSON into a useful object here,
// see ServiceState and IsDelimeter for a light-weight example of that.
Console.WriteLine(message);
if (IsDelimeter(message))
{
return;
}
}
}
private bool IsDelimeter(String json)
{
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(json));
DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(ServiceState));
ServiceState obj = (ServiceState) ser.ReadObject(stream);
return obj.state == "listening";
}
[DataContract]
internal class ServiceState
{
[DataMember]
public string state = "";
}
Edit: Próbowałem też wysłać WAV "nagłówka" przed StartRecording, jak to
waveIn.DataAvailable += new EventHandler(WaveIn_DataAvailable);
waveIn.RecordingStopped += new EventHandler(WaveIn_RecordingStopped);
/* Send WAV "header" first */
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream, Encoding.UTF8))
{
writer.Write(Encoding.UTF8.GetBytes("RIFF"));
writer.Write(0); // placeholder
writer.Write(Encoding.UTF8.GetBytes("WAVE"));
writer.Write(Encoding.UTF8.GetBytes("fmt "));
format.Serialize(writer);
if (format.Encoding != WaveFormatEncoding.Pcm && format.BitsPerSample != 0)
{
writer.Write(Encoding.UTF8.GetBytes("fact"));
writer.Write(4);
writer.Write(0);
}
writer.Write(Encoding.UTF8.GetBytes("data"));
writer.Write(0);
writer.Flush();
}
byte[] header = stream.ToArray();
await ws.SendAsync(new ArraySegment(header), WebSocketMessageType.Binary, true, CancellationToken.None);
}
/* End WAV header */
waveIn.StartRecording();