原文:
与众不同 windows phone (45) - 8.0 语音: TTS, 语音识别, 语音命令
[源码下载]
与众不同 windows phone (45) - 8.0 语音: TTS, 语音识别, 语音命令
作者:webabcd
介绍
与众不同 windows phone 8.0 之 语音
- TTS(Text To Speech)
- 语音识别
- 语音命令
示例
1、演示 TTS(Text To Speech)的应用
Speech/TTS.xaml
<phone:PhoneApplicationPage
x:Class="Demo.Speech.TTS"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
FontFamily="{StaticResource PhoneFontFamilyNormal}"
FontSize="{StaticResource PhoneFontSizeNormal}"
Foreground="{StaticResource PhoneForegroundBrush}"
SupportedOrientations="Portrait" Orientation="Portrait"
mc:Ignorable="d"
shell:SystemTray.IsVisible="True">
<Grid Background="Transparent">
<StackPanel Orientation="Vertical">
<TextBlock Name="lblMsg" />
<Button x:Name="btnTTS_Basic" Content="TTS 基础" Click="btnTTS_Basic_Click" />
<Button x:Name="btnTTS_Select" Content="用指定的语音 TTS" Click="btnTTS_Select_Click" />
<Button x:Name="btnTTS_SSML" Content="朗读 SSML 文档" Click="btnTTS_SSML_Click" />
</StackPanel>
</Grid>
</phone:PhoneApplicationPage>
Speech/TTS.xaml.cs
/*
* 演示 TTS(Text To Speech)的应用
*
*
* InstalledVoices - 管理已安装的语音
* All - 已安装的全部语音,返回 VoiceInformation 对象列表
* Default - 默认语音,返回 VoiceInformation 对象
*
* VoiceInformation - 语音信息
* Id - 标识
* Language - 语言
* DisplayName - 名称
* Description - 描述
* Gender - 性别(VoiceGender.Male 或 VoiceGender.Female)
*
* SpeechSynthesizer - TTS 的类
* SetVoice(VoiceInformation voiceInformation) - 设置语音
* GetVoice() - 获取语音信息
* SpeakTextAsync(string content, object userState) - 朗读指定的文本。可以设置一个上下文对象,在 SpeechStarted 时取出
* SpeakSsmlAsync(string content, object userState) - 朗读指定的 SSML 文档。可以设置一个上下文对象,在 SpeechStarted 时取出
* SpeakSsmlFromUriAsync(Uri content, object userState) - 朗读指定地址的 SSML 文档。可以设置一个上下文对象,在 SpeechStarted 时取出
* CancelAll() - 取消全部朗读
* SpeechStarted - 开始朗读时触发的事件
* BookmarkReached - 朗读到 <mark /> 标记时触发的事件(仅针对 SSML 协议)
*
*
* 注:
* 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" />
* 2、SSML - Speech Synthesis Markup Language
* 3、微软关于 ssml 的说明:http://msdn.microsoft.com/en-us/library/hh361578
* 4、W3C 关于 ssml 的说明:http://www.w3.org/TR/speech-synthesis/
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.Synthesis;
namespace Demo.Speech
{
public partial class TTS : PhoneApplicationPage
{
private string _text = "TTS 是 Text To Speech 的缩写<mark name=\"xxx\" />,即“从文本到语音”,是人机对话的一部分,让机器能够说话。";
public TTS()
{
InitializeComponent();
}
// 默认方式朗读文本
private async void btnTTS_Basic_Click(object sender, RoutedEventArgs e)
{
SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();
await speechSynthesizer.SpeakTextAsync(_text);
}
// 用指定的语音朗读文本
private async void btnTTS_Select_Click(object sender, RoutedEventArgs e)
{
SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();
// 中文语音列表(应该有两条记录:第一条是女声;第二条是男声。具体信息可从 VoiceInformation 对象中获取)
IEnumerable<VoiceInformation> zhVoices = from voice in InstalledVoices.All
where voice.Language == "zh-CN"
select voice;
// 设置语音
speechSynthesizer.SetVoice(zhVoices.ElementAt(0));
// 朗读文本
await speechSynthesizer.SpeakTextAsync(_text);
}
// 朗读指定 SSML 协议文档
private async void btnTTS_SSML_Click(object sender, RoutedEventArgs e)
{
SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer();
// 开始朗读时触发的事件
speechSynthesizer.SpeechStarted += speechSynthesizer_SpeechStarted;
// 到达 <mark /> 标记时触发的事件
speechSynthesizer.BookmarkReached += speechSynthesizer_BookmarkReached;
// 微软关于 ssml 的说明:http://msdn.microsoft.com/en-us/library/hh361578
// W3C 关于 ssml 的说明:http://www.w3.org/TR/speech-synthesis/
string ssml = "<speak version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\" xml:lang=\"zh-CN\">"; // 中文
ssml += "<voice gender=\"male\">"; // 男声
ssml += "<prosody rate=\"-50%\">"; // 语速放慢 50%
ssml += _text;
ssml += "</prosody>";
ssml += "</voice>";
ssml += "</speak>";
// 朗读 SSML
await speechSynthesizer.SpeakSsmlAsync(ssml);
}
void speechSynthesizer_SpeechStarted(SpeechSynthesizer sender, SpeechStartedEventArgs args)
{
// 获取上下文对象
object userState = args.UserState;
}
void speechSynthesizer_BookmarkReached(SpeechSynthesizer sender, SpeechBookmarkReachedEventArgs args)
{
this.Dispatcher.BeginInvoke(delegate()
{
// 触发当前事件的 <mark /> 标记的名称
lblMsg.Text = "mark name: " + args.Bookmark;
lblMsg.Text += Environment.NewLine;
// 朗读到触发当前事件的 <mark /> 标记所用的时间
lblMsg.Text += "audio position: " + args.AudioPosition.TotalSeconds;
});
}
}
}
2、演示如何通过自定义语法列表做语音识别,以及如何通过 SRGS 自定义语法做语音识别
Speech/SRGSGrammar.xml
<?xml version="1.0" encoding="utf-8"?>
<grammar version="1.0" xml:lang="zh-cn" root="Main" tag-format="semantics/1.0"
xmlns="http://www.w3.org/2001/06/grammar"
xmlns:sapi="http://schemas.microsoft.com/Speech/2002/06/SRGSExtensions">
<rule id="Main">
<item repeat="0-1">我想去</item>
<ruleref uri="#Cities" />
</rule>
<rule id="Cities" scope="public">
<one-of>
<item>北京</item>
<item>深圳</item>
<item>上海</item>
<item>广州</item>
</one-of>
</rule>
</grammar>
<!--
本例可以识别:我想去北京;我想去深圳;我想去上海;我想去广州;北京;深圳;上海;广州
Visual Studio 有创建 SRGSGrammar(SRGS 语法)文件的模板
微软关于 SRGS 的说明:http://msdn.microsoft.com/en-us/library/hh361653
W3C 关于 SRGS 的说明:http://www.w3.org/TR/speech-grammar/
-->
Speech/SpeechRecognition.xaml
<phone:PhoneApplicationPage
x:Class="Demo.Speech.SpeechRecognition"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
FontFamily="{StaticResource PhoneFontFamilyNormal}"
FontSize="{StaticResource PhoneFontSizeNormal}"
Foreground="{StaticResource PhoneForegroundBrush}"
SupportedOrientations="Portrait" Orientation="Portrait"
mc:Ignorable="d"
shell:SystemTray.IsVisible="True">
<Grid Background="Transparent">
<StackPanel Orientation="Vertical">
<TextBlock Name="lblMsg" />
<Button x:Name="btnDemo" Content="通过自定义语法列表做语音识别" Click="btnDemo_Click" />
<Button x:Name="btnSRGS" Content="通过 SRGS 自定义语法做语音识别" Click="btnSRGS_Click" />
</StackPanel>
</Grid>
</phone:PhoneApplicationPage>
Speech/SpeechRecognition.xaml.cs
/*
* 演示如何通过自定义语法列表做语音识别,以及如何通过 SRGS 自定义语法做语音识别
*
*
* 语音识别:用于在 app 内识别语音
* 语音命令:用于在 app 外通过语音命令启动 app
*
*
* 注:
* 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> <Capability Name="ID_CAP_MICROPHONE" />
* 2、安装语音识别器:设置 -> 语音 -> 在“语音语言”列表中安装指定的语音识别器,并启用语音识别服务
* 3、SRGS - Speech Recognition Grammar Specification
* 4、微软关于 SRGS 的说明:http://msdn.microsoft.com/en-us/library/hh361653
* 5、W3C 关于 SRGS 的说明:http://www.w3.org/TR/speech-grammar/
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.Recognition;
namespace Demo.Speech
{
public partial class SpeechRecognition : PhoneApplicationPage
{
public SpeechRecognition()
{
InitializeComponent();
}
private async void btnDemo_Click(object sender, RoutedEventArgs e)
{
// 语音识别器,带 UI 的
SpeechRecognizerUI speechRecognizerUI = new SpeechRecognizerUI();
// 识别过程中发生问题时触发的事件
speechRecognizerUI.Recognizer.AudioProblemOccurred += Recognizer_AudioProblemOccurred;
// 音频捕获状态发生变化时触发的事件
speechRecognizerUI.Recognizer.AudioCaptureStateChanged += Recognizer_AudioCaptureStateChanged;
// InitialSilenceTimeout - 在此时间内收到的都是无声输入,则终止识别
speechRecognizerUI.Recognizer.Settings.InitialSilenceTimeout = TimeSpan.FromSeconds(5.0);
// EndSilenceTimeout - 语音识别开始后,如果此时间内都是无声输入,则识别结束
speechRecognizerUI.Recognizer.Settings.EndSilenceTimeout = TimeSpan.FromSeconds(0.15);
// BabbleTimeout - 在此时间内收到的都是噪音,则终止识别(0 代表禁用此功能)
speechRecognizerUI.Recognizer.Settings.BabbleTimeout = TimeSpan.FromSeconds(0.0);
// 获取中文语音识别器
IEnumerable<SpeechRecognizerInformation> zhRecognizers = from recognizerInfo in InstalledSpeechRecognizers.All
where recognizerInfo.Language == "zh-CN"
select recognizerInfo;
if (zhRecognizers.Count() > 0)
{
// 指定语音识别器
speechRecognizerUI.Recognizer.SetRecognizer(zhRecognizers.First());
// 设置语音识别的单词列表
string[] phrases = { "xbox", "海贼王", "王磊" };
speechRecognizerUI.Recognizer.Grammars.AddGrammarFromList("myWord", phrases);
// speechRecognizerUI.Recognizer.Grammars.AddGrammarFromPredefinedType("dictation", SpeechPredefinedGrammar.Dictation); // 听写整句,基于本地的语音识别
// speechRecognizerUI.Recognizer.Grammars.AddGrammarFromPredefinedType("webSearch", SpeechPredefinedGrammar.WebSearch); // 听写整句,基于网络的语音识别
// 预加载全部语法
await speechRecognizerUI.Recognizer.PreloadGrammarsAsync();
// 带 UI 的语音识别器的监听页上显示的标题
speechRecognizerUI.Settings.ListenText = "监听中。。。";
// 带 UI 的语音识别器的监听页上显示的示例文本
speechRecognizerUI.Settings.ExampleText = "精确识别:xbox, 海贼王, 王磊";
// 在“您说的是”页(如果匹配到多条记录,则会在此页列出)和“听到您说”页是否需要通过 TTS 朗读识别的内容(当在语音设置中启用了“播放音频确认”时,此处 true 才会有效)
speechRecognizerUI.Settings.ReadoutEnabled = true;
// 是否显示“听到您说”页(用于显示识别出的最终文本)
speechRecognizerUI.Settings.ShowConfirmation = false;
try
{
// 开始识别
SpeechRecognitionUIResult result = await speechRecognizerUI.RecognizeWithUIAsync();
// 输出识别状态和结果
lblMsg.Text = "识别状态: " + result.ResultStatus.ToString();
lblMsg.Text += Environment.NewLine;
lblMsg.Text += "识别结果:" + result.RecognitionResult.Text;
lblMsg.Text += Environment.NewLine;
lblMsg.Text += "可信度级别: " + result.RecognitionResult.TextConfidence.ToString(); // Rejected, Low, Medium, High
}
catch (Exception ex)
{
if ((uint)ex.HResult == 0x800455BC)
{
lblMsg.Text = "当前语音识别器不支持所请求的语言: " + speechRecognizerUI.Recognizer.GetRecognizer().Language;
}
else
{
lblMsg.Text = ex.ToString();
}
}
}
else
{
lblMsg.Text = "未安装中文语音识别器";
}
}
void Recognizer_AudioCaptureStateChanged(SpeechRecognizer sender, SpeechRecognizerAudioCaptureStateChangedEventArgs args)
{
// 音频捕获状态发生了变化:Capturing(捕获中) 或 Inactive(未捕获)
lblMsg.Text = "AudioCaptureStateChanged: " + args.State.ToString();
}
void Recognizer_AudioProblemOccurred(SpeechRecognizer sender, SpeechAudioProblemOccurredEventArgs args)
{
// 识别过程中发生了问题:TooLoud, TooQuiet, TooFast, TooSlow, TooNoisy, NoSignal, None
lblMsg.Text = "AudioProblemOccurred: " + args.Problem.ToString();
}
// 通过 SRGS 自定义语法
// 微软关于 SRGS 的说明:http://msdn.microsoft.com/en-us/library/hh361653
// W3C 关于 SRGS 的说明:http://www.w3.org/TR/speech-grammar/
private async void btnSRGS_Click(object sender, RoutedEventArgs e)
{
// 语音识别器,无 UI 的
SpeechRecognizer speechRecognizer = new SpeechRecognizer();
// 指定 SRGS 语法
Uri mySRGS = new Uri("ms-appx:///Speech/SRGSGrammar.xml", UriKind.Absolute);
speechRecognizer.Grammars.AddGrammarFromUri("srgs", mySRGS);
try
{
lblMsg.Text = "监听中。。。";
lblMsg.Text += Environment.NewLine;
// 开始识别
SpeechRecognitionResult result = await speechRecognizer.RecognizeAsync();
// 输出识别结果
lblMsg.Text += "识别结果:" + result.Text;
lblMsg.Text += Environment.NewLine;
lblMsg.Text += "可信度级别: " + result.TextConfidence.ToString(); // Rejected, Low, Medium, High
}
catch (Exception ex)
{
if ((uint)ex.HResult == 0x800455BC)
{
lblMsg.Text = "当前语音识别器不支持所请求的语言: " + speechRecognizer.GetRecognizer().Language;
}
else
{
lblMsg.Text = ex.ToString();
}
}
}
}
}
3、演示如何通过语音命令启动 app,以及 app 启动后如何获取启动此 app 的语音命令的标识和内容
Speech/VoiceCommandDefinition.xml
<?xml version="1.0" encoding="utf-8"?>
<VoiceCommands xmlns="http://schemas.microsoft.com/voicecommands/1.0">
<CommandSet xml:lang="zh-cn">
<!--命令前缀,不指定此值的话则会使用程序名做命令前缀-->
<CommandPrefix>贪吃蛇</CommandPrefix>
<!--语音监听窗口会随机显示不同 app 的语音命令提示文字(贪吃蛇 开始),轮到此 app 的时候就可能会显示这个-->
<Example>开始</Example>
<Command Name="PlayGame">
<!--语音监听窗口会随机显示不同 app 的语音命令提示文字(贪吃蛇 开始),轮到此 app 的时候就可能会显示这个-->
<Example>开始</Example>
<!--监听语法-->
<ListenFor>[马上] 开始</ListenFor>
<!--监听语法-->
<ListenFor>[马上] 启动</ListenFor>
<!--准备启动目标 app 时,在监听窗口中显示的提示文字(当在语音设置中启用了“播放音频确认”时,此文字会作为 TTS 的文本)-->
<Feedback>准备启动</Feedback>
<!--启动页-->
<Navigate Target="/Speech/VoiceCommands.xaml" />
</Command>
<Command Name="PlayLevel">
<!--语音监听窗口会随机显示不同 app 的语音命令提示文字(贪吃蛇 从等级 2 开始),轮到此 app 的时候就可能会显示这个-->
<Example>从等级 2 开始</Example>
<!--监听语法-->
<ListenFor>从等级 {number} 开始</ListenFor>
<!--准备启动目标 app 时,在监听窗口中显示的提示文字(当在语音设置中启用了“播放音频确认”时,此文字会作为 TTS 的文本)-->
<Feedback>正转到等级 {number}... </Feedback>
<!--启动页-->
<Navigate Target="/Speech/VoiceCommands.xaml" />
</Command>
<!--ListenFor 和 Feedback 可以通过 {number} 来引用此集合-->
<PhraseList Label="number">
<Item>1</Item>
<Item>2</Item>
<Item>3</Item>
</PhraseList>
</CommandSet>
</VoiceCommands>
<!--
本例可以识别:贪吃蛇开始,贪吃蛇马上开始,贪吃蛇启动,贪吃蛇马上启动,贪吃蛇从等级 1 开始,从等级 2 开始,从等级 3 开始
Visual Studio 有创建 VoiceCommandDefinition(语音命令定义)文件的模板
关于 VoiceCommands 的详细说明参见:http://msdn.microsoft.com/en-us/library/windowsphone/develop/jj207041
-->
Speech/VoiceCommands.xaml
<phone:PhoneApplicationPage
x:Class="Demo.Speech.VoiceCommands"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
FontFamily="{StaticResource PhoneFontFamilyNormal}"
FontSize="{StaticResource PhoneFontSizeNormal}"
Foreground="{StaticResource PhoneForegroundBrush}"
SupportedOrientations="Portrait" Orientation="Portrait"
mc:Ignorable="d"
shell:SystemTray.IsVisible="True">
<Grid Background="Transparent">
<StackPanel Orientation="Vertical">
<TextBlock Name="lblMsg" TextWrapping="Wrap" Text="返回到开始屏幕,长按 windows 键,说出你的语音命令(语音命令的定义参见 VoiceCommandDefinition.xml)" />
</StackPanel>
</Grid>
</phone:PhoneApplicationPage>
Speech/VoiceCommands.xaml.cs
/*
* 演示如何通过语音命令启动 app,以及 app 启动后如何获取启动此 app 的语音命令的标识和内容
*
*
* 语音识别:用于在 app 内识别语音
* 语音命令:用于在 app 外通过语音命令启动 app
*
*
* 注:
* 1、需要在 manifest 中增加配置 <Capability Name="ID_CAP_SPEECH_RECOGNITION" /> <Capability Name="ID_CAP_MICROPHONE" />
* 2、关于 VoiceCommands 的详细说明参见:http://msdn.microsoft.com/en-us/library/windowsphone/develop/jj207041
*/
using System;
using System.Windows;
using Microsoft.Phone.Controls;
using Windows.Phone.Speech.VoiceCommands;
using System.Windows.Navigation;
namespace Demo.Speech
{
public partial class VoiceCommands : PhoneApplicationPage
{
public VoiceCommands()
{
InitializeComponent();
this.Loaded += VoiceCommands_Loaded;
}
private async void VoiceCommands_Loaded(object sender, RoutedEventArgs e)
{
// 向系统注册本 app 的语音命令定义
await VoiceCommandService.InstallCommandSetsFromFileAsync(new Uri("ms-appx:///Speech/VoiceCommandDefinition.xml"));
// 获取语音命令定义的 CommandSet 中的内容,可以动态修改
// VoiceCommandService.InstalledCommandSets
}
protected override void OnNavigatedTo(NavigationEventArgs e)
{
// 通过语音命令启动时,url 类似如下:/Speech/VoiceCommands.xaml?voiceCommandName=PlayGame&reco=%E8%B4%AA%E5%90%83%E8%9B%87%20%E5%BC%80%E5%A7%8B
if (NavigationContext.QueryString.ContainsKey("voiceCommandName"))
{
lblMsg.Text = "语音命令的标识: " + NavigationContext.QueryString["voiceCommandName"];
lblMsg.Text += Environment.NewLine;
lblMsg.Text += "语音命令的内容: " + NavigationContext.QueryString["reco"];
}
base.OnNavigatedTo(e);
}
}
}
OK
[源码下载]