一 . 语音识别
1.1 下载FFmpeg,然后配置环境变量
1.2 pip install baidu-aip(技术信息来自ai.baidu.com)
1.3 建一个py文件
# audio2text.py文件
import os from aip import AipSpeech """ 你的 APPID AK SK 创建应用的时候生成的""" APP_ID = '16027552' API_KEY = 'ydZfy8GRB7Bz02UGeaXh4hGE' SECRET_KEY = 'zUbGAD21x4I6abGYhwo9jAfERCZzeGpA' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件
def get_file_content(filePath): # 这个是固定写法,用ffmpeg把别的格式的音频转化为.pcm格式的
os.system(f'ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm') with open(f'{filePath}.pcm', 'rb') as fp: return fp.read() # 识别本地文件,hot.m4a是放在本地的音频文件,下面的参数ai.baidu.com有详解
reg = client.asr(get_file_content('hot.m4a'), 'pcm', 16000, { 'dev_pid': 1536, }) print(reg['result'][0]) # 打印今天真热,音频录的也是今天真热
二 . 语音合成
# text2audio文件
from aip import AipSpeech """ 你的 APPID AK SK """ APP_ID = '16027552' API_KEY = 'ydZfy8GRB7Bz02UGeaXh4hGE' SECRET_KEY = 'zUbGAD21x4I6abGYhwo9jAfERCZzeGpA' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 以下参数在ai.baidu.com都有详细解释
result = client.synthesis('唧唧复唧唧,木兰当户织', 'zh', 1, { 'vol': 5, 'spd': 6, 'pit': 7, 'per': 4 }) # 识别正确返回语音二进制 错误则返回dict
if not isinstance(result, dict): with open('auido.mp3', 'wb') as f: f.write(result) # 生成auido.mp3音频
三 . 自然语言处理(low到爆)
1 import os 2 from aip import AipNlp, AipSpeech 3 4 """ 你的 APPID AK SK """ 5 APP_ID = '16027552' 6 API_KEY = 'ydZfy8GRB7Bz02UGeaXh4hGE' 7 SECRET_KEY = 'zUbGAD21x4I6abGYhwo9jAfERCZzeGpA' 8 9 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 10 client_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) 11 12 def textToaudio(text): 13 result = client.synthesis(text, 'zh', 1, { 14 'vol': 5, 15 'spd': 6, 16 'pit': 7, 17 'per': 4 18 }) 19 # 识别正确返回语音二进制 错误则返回dict 20 if not isinstance(result, dict): 21 with open('auido.mp3', 'wb') as f: 22 f.write(result) 23 return 'audio.mp3' 24 25 def audioTotext(filepath): 26 res = client.asr(get_file_content(filepath), 'pcm', 16000, { 27 'dev_pid': 1536, 28 }) 29 return res['result'][0] 30 31 def get_file_content(filePath): 32 os.system(f'ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm') 33 print(filePath) 34 with open(f'{filePath}.pcm', 'rb') as fp: 35 return fp.read() 36 37 # hot.m4a音频是今天真热 38 text = audioTotext('hot.m4a') 39 # 简单的自然语言处理 40 socre = client_nlp.simnet('今天怎么这么热', text).get('score') 41 # 当score >= 0.58就认为表达的意思一样 42 if socre >= 0.58: 43 filename = textToaudio('萧峰降龙十八掌天下第一!') 44 # os.system(f"ffplay {filename}") 播放音频 45 # 也是播放音频,只不过是简写 46 os.system(filename)
四 . 连接图灵机器人完成自然语言处理
1 import os 2 import requests 3 from aip import AipNlp, AipSpeech 4 5 """ 你的 APPID AK SK """ 6 APP_ID = '16027552' 7 API_KEY = 'ydZfy8GRB7Bz02UGeaXh4hGE' 8 SECRET_KEY = 'zUbGAD21x4I6abGYhwo9jAfERCZzeGpA' 9 10 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 11 client_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) 12 13 def textToaudio(text): 14 result = client.synthesis(text, 'zh', 1, { 15 'vol': 5, 16 'spd': 6, 17 'pit': 7, 18 'per': 4 19 }) 20 # 识别正确返回语音二进制 错误则返回dict 21 if not isinstance(result, dict): 22 with open('auido.mp3', 'wb') as f: 23 f.write(result) 24 return 'audio.mp3' 25 26 def audioTotext(filepath): 27 reg = client.asr(get_file_content(filepath), 'pcm', 16000, { 28 'dev_pid': 1536, 29 }) 30 print(reg['result'][0]) 31 return reg['result'][0] 32 33 def get_file_content(filePath): 34 os.system(f'ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm') 35 with open(f'{filePath}.pcm', 'rb') as fp: 36 return fp.read() 37 38 def go_tuling(text, uid): 39 # url就是图灵的街口,图灵官网:tuling123.com,可以查看接入教程 40 url = 'http://openapi.tuling123.com/openapi/api/v2' 41 data = { 42 "perception": { 43 "inputText": { 44 "text": "" 45 }, 46 }, 47 "userInfo": { 48 "apiKey": "34b2c01332074b2b9d9293f72547df39", 49 "userId": "" 50 } 51 } 52 data['perception']['inputText']['text'] = text 53 data['userInfo']['userId'] = uid 54 # 请求方式必须是HTTP POST, 参数格式必须是json 55 ret = requests.post(url, json=data) 56 # print(ret.json()) 57 return ret.json().get('results')[0].get('values').get('text') 58 59 # libai.wma是音频文件 60 text = audioTotext('libai.wma') 61 # 简单的自然语言处理 62 socre = client_nlp.simnet('今天天气怎么样', text).get('score') 63 if socre >= 0.58: 64 filename = textToaudio('萧峰降龙十八掌天下第一') 65 os.system(filename) 66 else: 67 # 当score < 0.58的时候,直接和图灵机器人对话. 68 answer = go_tuling(text, 'attila') 69 filename = textToaudio(answer) 70 os.system(filename)