本文介绍在Unity3D中,使用C#编写讯飞文本转语音、语音转文本的功能。这两个功能相似,学会一个另一个就简单了。建议新建一个Unity3D项目来进行测试,否则极大可能会出现各种问题。
准备工作
首先在讯飞开放平台上获取以下三个内容:APPID、APISecret、APIKey。
Unity3D WebSocket接口
新建一个Unity3D项目:
在Unity顶部菜单栏上的Window-Package Manager,打开
选择Packages:Unity Registry,选择Newtonsoft.Json包,点击安装(Install)。等待安装完成…
新建一个代码类:XunFeiManager(XunFeiManager.cs)
打开代码,改成如下内容:
这里有三个参数需要改成你在讯飞控制台中的参数:APPID、APISecret、APIKey。
using System;
using System.Collections.Generic;
using System.Net.WebSockets;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
using System.Threading;
using UnityEngine;
using Newtonsoft.Json.Linq;
using System.Linq;
public class XunFeiManager : MonoBehaviour
{
/// <summary>单例</summary>
public static XunFeiManager Instance;
/// <summary>固定不动</summary>
private const string STTURL = "wss://iat-api.xfyun.cn/v2/iat";
/// <summary>固定不动</summary>
private const string TTSURL = "wss://tts-api.xfyun.cn/v2/tts";
/// <summary>改成你自己的APPID!!!!!!</summary>
private const string APPID = "********";
/// <summary>改成你自己的APISecret!!!!!!</summary>
private const string APISecret = "********************************";
/// <summary>改成你自己的APIKey!!!!!!</summary>
private const string APIKey = "********************************";
/// <summary>WebSocket</summary>
private ClientWebSocket webSocket;
private void Awake()
{
//初始化单例
Instance = this;
}
public async Task<string> SpeechToText(JObject request)
{
//获取请求数据
byte[] bytes = Convert.FromBase64String(request["data"].ToString());
//建立连接
await Connect(STTURL);
//发送消息
await STTSendMessage(bytes);
//接收消息
string text = await STTReceiveMessage();
//关闭接连
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty, CancellationToken.None);
//构建返回Json数据
JObject response = new JObject();
response["text"] = text;
//响应Post请求
return response.ToString();
}
public async Task<string> TextToSpeech(JObject request)
{
//获取请求数据
string text = request["text"].ToString();
string voice = request["voice"].ToString();
//建立连接
await Connect(TTSURL);
//发送消息
await TTSSendMessage(text, voice);
//接收消息
string base64String = await TTSReceiveMessage();
//关闭接连
await webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, string.Empty, CancellationToken.None);
//构建返回Json数据
JObject response = new JObject();
response["data"] = base64String;
//响应Post请求
return response.ToString();
}
/// <summary>
/// 连接讯飞API
/// </summary>
/// <returns></returns>
private async Task Connect(string url)
{
//新建ClientWebSocket
webSocket = new ClientWebSocket();
//使用WebSocket连接讯飞的服务
await webSocket.ConnectAsync(new Uri(GetUrl(url)), CancellationToken.None);
//await Console.Out.WriteLineAsync("讯飞WebSocket连接成功");
}
/// <summary>
/// 讯飞语音转文本,发送消息
/// </summary>
private async Task STTSendMessage(byte[] bytes)
{
//状态值 0代表第1帧,1代表中间帧,2代表结尾帧
int status;
int remainLength = bytes.Length;
//分段发送消息
while (remainLength > 0)
{
byte[] currBytes;
if (remainLength > 1280)
{
status = remainLength == bytes.Length ? 0 : 1;
currBytes = new byte[1280];
Array.Copy(bytes, bytes.Length - remainLength, currBytes, 0, 1280);
remainLength -= 1280;
}
else
{
status = 2;
currBytes = new byte[remainLength];
Array.Copy(bytes, bytes.Length - remainLength, currBytes, 0, remainLength);
remainLength = 0;
}
JObject jsonData = STTCreateJsonData(status, currBytes);
byte[] messageBytes = Encoding.UTF8.GetBytes(jsonData.ToString());
await webSocket.SendAsync(messageBytes, WebSocketMessageType.Text, true, CancellationToken.None);
//等待30ms
await Task.Delay(20);
//Console.WriteLine("发送消息:"+jsonData.ToString());
}
//Console.WriteLine("数据发送完成");
}
/// <summary>
/// 讯飞语音转文本,生成需要发送的Json数据
/// </summary>
/// <param name="status"></param>
/// <param name="bytes"></param>
/// <returns></returns>
private JObject STTCreateJsonData(int status, byte[] bytes)
{
JObject requestObj = new JObject();
JObject commonJson = new JObject();
commonJson["app_id"] = APPID;
requestObj["common"] = commonJson;
JObject bussinessJson = new JObject();
bussinessJson["language"] = "zh_cn";
bussinessJson["domain"] = "iat";
bussinessJson["accent"] = "mandarin";
bussinessJson["dwa"] = "wpgs";
requestObj["business"] = bussinessJson;
JObject dataJson = new JObject();
dataJson["status"] = status;
dataJson["format"] = "audio/L16;rate=16000";
dataJson["encoding"] = "raw";
dataJson["audio"] = Convert.ToBase64String(bytes);
requestObj["data"] = dataJson;
return requestObj;
}
/// <summary>
/// 讯飞语音转文本,接收消息
/// </summary>
/// <returns></returns>
private async Task<string> STTReceiveMessage()
{
//webSocket.
//状态值
int status = 0;
string finalText = string.Empty;
while (status != 2)
{
byte[] buffer = new byte[8 * 1024];
WebSocketReceiveResult webSocketReceiveResult = await webSocket.ReceiveAsync(buffer, CancellationToken.None);
string receivedMessage = Encoding.UTF8.GetString(buffer, 0, webSocketReceiveResult.Count);
//await Console.Out.WriteLineAsync("receivedMessage:" + receivedMessage);
finalText += STTParseMessage(receivedMessage, out status);
}
Debug.Log("讯飞语音转文本:" + finalText);
return finalText;
}
/// <summary>
/// 讯飞语音转文本,解析收到的Json消息
/// </summary>
/// <param name="message"></param>
/// <param name="status"></param>
/// <returns></returns>
private string STTParseMessage(string message, out int status)
{
JObject jObject = JObject.Parse(message);
int code = (int)jObject["code"];
if (code != 0)//失败
{
string errMsg = jObject["message"].ToString();
Debug.LogError("讯飞语音转文本,解析Json消息失败,错误信息:" + errMsg);
}
else//成功
{
string result = string.Empty;
foreach (JObject i in jObject["data"]["result"]["ws"])
{
foreach (JObject w in i["cw"])
{
result += w["w"].ToString();
}
}
//状态值
//识别结果是否结束标识:
//0:识别的第一块结果
//1:识别中间结果
//2:识别最后一块结果
status = (int)jObject["data"]["status"];
return result;
}
status = -1;
return string.Empty;
}
/// <summary>
/// 讯飞文本转语音,发送消息
/// </summary>
private async Task TTSSendMessage(string text, string voice)
{
//构建请求需要的Json字符串
JObject jsonData = TTSCreateJsonData(text, voice);
byte[] messageBytes = Encoding.UTF8.GetBytes(jsonData.ToString());
//发送消息
await webSocket.SendAsync(messageBytes, WebSocketMessageType.Text, true, CancellationToken.None);
}
/// <summary>
/// 讯飞文本转语音,生成需要发送的Json数据
/// </summary>
/// <param name="status"></param>
/// <param name="bytes"></param>
/// <returns></returns>
private JObject TTSCreateJsonData(string text, string voice)
{
JObject requestObj = new JObject();
JObject commonJson = new JObject();
commonJson["app_id"] = APPID;
requestObj["common"] = commonJson;
JObject bussinessJson = new JObject();
bussinessJson["aue"] = "raw";
bussinessJson["vcn"] = voice;
bussinessJson["speed"] = 50;
bussinessJson["volume"] = 50;
bussinessJson["pitch"] = 50;
bussinessJson["tte"] = "UTF8";
requestObj["business"] = bussinessJson;
JObject dataJson = new JObject();
dataJson["status"] = 2;
dataJson["text"] = Convert.ToBase64String(Encoding.UTF8.GetBytes(text));
requestObj["data"] = dataJson;
return requestObj;
}
/// <summary>
/// 讯飞文本转语音,接收消息
/// </summary>
/// <returns>语音合成后的base64字符串</returns>
private async Task<string> TTSReceiveMessage()
{
//webSocket.
//状态值
int status = 0;
List<byte> bytes = new List<byte>();
while (status != 2)
{
bool receivedCompleted = false;
string receivedMessage = string.Empty;
while (!receivedCompleted)
{
byte[] buffer = new byte[8 * 1024];
WebSocketReceiveResult webSocketReceiveResult = await webSocket.ReceiveAsync(buffer, CancellationToken.None);
receivedMessage += Encoding.UTF8.GetString(buffer, 0, webSocketReceiveResult.Count);
receivedCompleted = webSocketReceiveResult.Count != 8 * 1024;
}
//await Console.Out.WriteLineAsync("receivedMessage:" + receivedMessage);
bytes.AddRange(Convert.FromBase64String(TTSParseMessage(receivedMessage, out status)).ToList());
//finalAudioBase64String += TTSParseMessage(receivedMessage, out status).TrimEnd('=');
}
string finalAudioBase64String = Convert.ToBase64String(bytes.ToArray());
//await Console.Out.WriteLineAsync("讯飞语音转文本:" + finalAudioBase64String);
return finalAudioBase64String;
}
/// <summary>
/// 讯飞文本转语音,解析收到的Json消息
/// </summary>
/// <param name="message"></param>
/// <param name="status"></param>
/// <returns></returns>
private string TTSParseMessage(string message, out int status)
{
JObject jObject = JObject.Parse(message);
if (jObject["message"].ToString() == "success")
{
if (jObject["data"] != null)
{
if (jObject["data"]["audio"] != null)
{
if ((int)jObject["data"]["status"] == 2)
{
status = 2;
}
else
{
status = 1;
}
return jObject["data"]["audio"].ToString();
}
}
Debug.LogError("ERROR:TTSParseMessage失败,data为空");
status = 0;
return string.Empty;
}
else
{
Debug.LogError("ERROR:TTSParseMessage失败,错误消息:" + jObject["message"].ToString());
status = 0;
return string.Empty;
}
}
#region 生成URL
private string GetUrl(string url)
{
Uri uri = new Uri(url);
//官方文档要求时间必须是UTC+0或GMT时区,RFC1123格式(Thu, 01 Aug 2019 01:53:21 GMT)。
string date = DateTime.Now.ToString("r");
//组装生成鉴权
string authorization = ComposeAuthUrl(uri, date);
//生成最终鉴权
string uriStr = $"{uri}?authorization={authorization}&date={date}&host={uri.Host}";
//返回生成后的Url
return uriStr;
}
/// <summary>
/// 组装生成鉴权
/// </summary>
/// <param name="uri"></param>
/// <param name="date"></param>
/// <returns>最终编码后的鉴权</returns>
private string ComposeAuthUrl(Uri uri, string date)
{
string signature; //最终编码后的签名
string authorization_origin; //原始鉴权
//原始签名
string signature_origin = string.Format("host: " + uri.Host + "\ndate: " + date + "\nGET " + uri.AbsolutePath + " HTTP/1.1");
//使用hmac-sha256算法加密后的signature
string signature_sha = HmacSHA256(signature_origin, APISecret); //使用hmac - sha256算法结合apiSecret对signature_origin签名
signature = signature_sha;
string auth = "api_key=\"{0}\", algorithm=\"{1}\", headers=\"{2}\", signature=\"{3}\"";
authorization_origin = string.Format(auth, APIKey, "hmac-sha256", "host date request-line", signature); //参数介绍:APIKey,加密算法名,headers是参与签名的参数(该参数名是固定的"host date request-line"),生成的签名
return ToBase64String(authorization_origin);
}
/// <summary>
/// 加密算法HmacSHA256
/// </summary>
/// <param name="secret"></param>
/// <param name="signKey"></param>
/// <returns></returns>
private static string HmacSHA256(string secret, string signKey)
{
string signRet = string.Empty;
using (HMACSHA256 mac = new HMACSHA256(Encoding.UTF8.GetBytes(signKey)))
{
byte[] hash = mac.ComputeHash(Encoding.UTF8.GetBytes(secret));
signRet = Convert.ToBase64String(hash);
}
return signRet;
}
/// <summary>
/// UTF字符串转成Base64字符串
/// </summary>
/// <param name="value"></param>
/// <returns></returns>
private static string ToBase64String(string value)
{
if (value == null || value == "")
{
return "";
}
byte[] bytes = Encoding.UTF8.GetBytes(value);
return Convert.ToBase64String(bytes);
}
#endregion
}
这里有三个参数需要改成你在讯飞控制台中的参数:
APPID、APISecret、APIKey。
返回Unity编辑器,在场景中创建一个空物体,命名为XunFeiManager,并挂载上刚才的脚本(XunFeiManager.cs)。
文本转语音
再新建一个脚本SpeechTest.cs,在场景中新建一个空物体SpeechTest,挂载上这个脚本(SpeechTest.cs)。
然后,再在这个物体(SpeechTest)上挂载一个Audio Source组件。
打开这个脚本(SpeechTest.cs),改成以下代码:
using Newtonsoft.Json.Linq;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using UnityEngine;
using UnityEngine.Networking;
[RequireComponent(typeof(AudioSource))]
public class SpeechTest : MonoBehaviour
{
/// <summary>当前物体的AudioSource组件<summary>
[HideInInspector]
public AudioSource audioSource;
/// <summary>用户录入的音频</summary>
private AudioClip recordedAudioClip;
private void Start()
{
//获取AudioSource组件
audioSource = GetComponent<AudioSource>();
//测试用[!重要代码],此操作为异步完成,需要一段时间后才能执行=>{}里的代码。
SendTextToSpeechMsg("你好啊,我是讯飞语音助手!", auidoClip =>
{
audioSource.clip = auidoClip;
audioSource.Play();
});
}
#region 语音转文本 测试用
private bool recording = false;
/// <summary>
/// 当 录音按钮 按下时调用
/// </summary>
public void OnButtonClick()
{
if (recording == false)
{
recording = true;
//开始录音[!重要代码]
StartRecord();
}
else
{
recording = false;
//结束录音[!重要代码]
EndRecord((text, _) =>
{
Debug.Log($"讯飞语音转文本成功!文本为:{text}");
});
}
}
#endregion
#region 讯飞文本转语音
/// <summary>
/// 向XunFei发送消息,并等待其返回
/// </summary>
/// <param name="text">文本</param>
/// <param name="chatGLMCallback">回调函数</param>
public void SendTextToSpeechMsg(string text, Action<AudioClip> callback)
{
//构建Json字符串
JObject jObject = new JObject();
jObject["text"] = text;
//可以更改成你想要的声音,具体内容在讯飞控制台中查看
jObject["voice"] = "xiaoyan";
//发送消息
StartCoroutine(SendTextToSpeechMsgCoroutine(jObject, callback));
}
/// <summary>
/// 向XunFei发送消息的协程
/// </summary>
/// <param name="message"></param>
/// <param name="callback">收到消息后的回调函数</param>
/// <returns></returns>
private IEnumerator SendTextToSpeechMsgCoroutine(JObject message, Action<AudioClip> callback)
{
//请求数据
Task<string> resultJson = XunFeiManager.Instance.TextToSpeech(message);
//等待返回消息
yield return new WaitUntil(() => resultJson.IsCompleted);
//成功接收到消息
if (resultJson.IsCompletedSuccessfully == true)
{
//解析Json字符串
JObject obj = JObject.Parse(resultJson.Result);
//获取音频数据(base64字符串)
string text = obj["data"].ToString();
//解析音频数据
float[] audioData = BytesToFloat(Convert.FromBase64String(text));
if (audioData.Length == 0)//讯飞文本转语音失败
{
Debug.Log($"讯飞文本转语音失败,可能由于输入文本为空或不正确,导致语音长度为0,错误信息:{resultJson.Result}");
//失败回调
callback.Invoke(null);
}
//构建AudioClip
AudioClip audioClip = AudioClip.Create("SynthesizedAudio", audioData.Length, 1, 16000, false);
audioClip.SetData(audioData, 0);
//Debug.Log("讯飞文本转语音成功");
//回调
callback.Invoke(audioClip);
}
else
{
Debug.Log($"讯飞文本转语音消息发送失败,错误信息:{resultJson.Result}");
//失败回调
callback.Invoke(null);
}
}
/// <summary>
/// byte[]数组转化为AudioClip可读取的float[]类型
/// </summary>
/// <param name="byteArray"></param>
/// <returns></returns>
private static float[] BytesToFloat(byte[] byteArray)
{
float[] sounddata = new float[byteArray.Length / 2];
for (int i = 0; i < sounddata.Length; i++)
{
sounddata[i] = bytesToFloat(byteArray[i * 2], byteArray[i * 2 + 1]);
}
return sounddata;
}
private static float bytesToFloat(byte firstByte, byte secondByte)
{
// convert two bytes to one short (little endian)
//小端和大端顺序要调整
short s;
if (BitConverter.IsLittleEndian)
s = (short)((secondByte << 8) | firstByte);
else
s = (short)((firstByte << 8) | secondByte);
// convert to range from -1 to (just below) 1
return s / 32768.0F;
}
#endregion
#region 讯飞语音转文本
/// <summary>
/// 开始录音
/// </summary>
public void StartRecord()
{
//开始录音频(最长40秒)
recordedAudioClip = Microphone.Start(null, true, 40, 16000);
}
/// <summary>
/// 结束录音
/// </summary>
/// <param name="speechToTextCallback">语音转文本成功后的回调函数</param>
public void EndRecord(Action<string, AudioClip> speechToTextCallback)
{
//取消了录音
if (speechToTextCallback == null) return;
//录音结束
Microphone.End(null);
//去除掉没有声音的片段
recordedAudioClip = TrimSilence(recordedAudioClip, 0.01f);
//发送消息
SendSpeechToTextMsg(recordedAudioClip, text =>
{
//回调
speechToTextCallback.Invoke(text, recordedAudioClip);
});
}
/// <summary>
/// 向XunFei发送消息,并等待其返回
/// </summary>
/// <param name="strContent">音频数据</param>
/// <param name="chatGLMCallback">回调函数</param>
public void SendSpeechToTextMsg(AudioClip audioClip, Action<string> callback)
{
byte[] bytes = AudioClipToBytes(audioClip);
//构建Json字符串
JObject jObject = new JObject();
jObject["data"] = Convert.ToBase64String(bytes);
//发送消息
StartCoroutine(SendSpeechToTextMsgCoroutine(jObject, callback));
}
/// <summary>
/// 向XunFei发送消息的协程
/// </summary>
/// <param name="message"></param>
/// <param name="callback">收到消息后的回调函数</param>
/// <returns></returns>
private IEnumerator SendSpeechToTextMsgCoroutine(JObject message, Action<string> callback)
{
//请求数据
Task<string> resultJson = XunFeiManager.Instance.SpeechToText(message);
//等待返回消息
yield return new WaitUntil(() => resultJson.IsCompleted);
//成功接收到消息
if (resultJson.IsCompletedSuccessfully == true)
{
//解析Json字符串
JObject obj = JObject.Parse(resultJson.Result);
//获取相似度
string text = obj["text"].ToString();
//Debug.Log("讯飞语音转文本:" + text);
//回调
callback.Invoke(text);
}
else
{
Debug.Log("讯飞语音转文本消息发送失败");
//失败回调
callback.Invoke(string.Empty);
}
}
/// <summary>
/// 将AudioClip转换成byte[]数据
/// </summary>
/// <param name="audioClip">Unity中的音频数据</param>
/// <returns>byte[]数据</returns>
private static byte[] AudioClipToBytes(AudioClip audioClip)
{
float[] data = new float[audioClip.samples];
audioClip.GetData(data, 0);
int rescaleFactor = 32767; //to convert float to Int16
byte[] outData = new byte[data.Length * 2];
for (int i = 0; i < data.Length; i++)
{
short temshort = (short)(data[i] * rescaleFactor);
byte[] temdata = BitConverter.GetBytes(temshort);
outData[i * 2] = temdata[0];
outData[i * 2 + 1] = temdata[1];
}
return outData;
}
/// <summary>
/// 剔除沉默音域
/// </summary>
/// <param name="clip"></param>
/// <param name="min"></param>
/// <returns></returns>
private static AudioClip TrimSilence(AudioClip clip, float min)
{
var samples = new float[clip.samples];
clip.GetData(samples, 0);
return TrimSilence(new List<float>(samples), min, clip.channels, clip.frequency);
}
private static AudioClip TrimSilence(List<float> samples, float min, int channels, int hz, bool _3D = false)
{
int origSamples = samples.Count;
int i;
for (i = 0; i < samples.Count; i++)
{
if (Mathf.Abs(samples[i]) > min)
{
break;
}
}
i -= (int)(hz * .1f);
i = Mathf.Max(i, 0);
// Remove start silence
samples.RemoveRange(0, i);
for (i = samples.Count - 1; i > 0; i--)
{
if (Mathf.Abs(samples[i]) > min)
{
break;
}
}
// Add some tail onto it
i += (int)(hz * .1f);
i = Mathf.Min(i, samples.Count - 1);
samples.RemoveRange(i, samples.Count - i);
if (samples.Count == 0)
{
Debug.Log("剔除后的AudioClip长度为0");
return null;
}
var clip = AudioClip.Create("TempClip", samples.Count, channels, hz, _3D);
clip.SetData(samples.ToArray(), 0);
return clip;
}
#endregion
}
运行游戏,就可以听到语音了。
找到以下这段话,可以改成自己需要的文本。文章来源:https://www.toymoban.com/news/detail-774025.html
SendTextToSpeechMsg("你好啊,我是讯飞语音助手!", auidoClip =>
{
audioSource.clip = auidoClip;
audioSource.Play();
});
语音转文本
在Unity编辑器中,新建一个按钮。
简单设置一下。
首先,给按钮添加一个点击事件(OnClick),点击加号;然后将左侧的脚本(位置2)拖拽到右侧的RuntimeOnly(位置3)下面;选择脚本的SpeechTest.OnButtonClick方法(位置4)。
现在可以运行游戏,点击按钮开始录音,再次点击结束录音。文章来源地址https://www.toymoban.com/news/detail-774025.html
到了这里,关于Unity+C#,讯飞 文本转语音、语音转文本的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!