【人工智障】之一 语音识别

时间:2024-03-09 15:58:33

首先,配置appid AK SK

from aip import AipSpeech

""" 你的 APPID AK SK """
APP_ID = \'11312730\'
API_KEY = \'I7rDa8SGYVM4yFHGjuzdgOrO\'
SECRET_KEY = \'BguQbuYvyeqhFsVFfNNe2hieYOsvp5yL\'

 

语音合成


  client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

result  = client.synthesis(\'我是李文强,给大家说句绕口令,\'
                           \'八百标兵奔北坡, 炮兵并排北边跑, 炮兵怕把标兵碰, \'
                           \'标兵怕碰炮兵炮. 八百标兵奔北坡, 北坡八百炮兵炮. \'
                           \'标兵怕碰炮兵炮, 炮兵怕把标兵碰.,\',
                           \'zh\', 1, {
    \'vol\': 5,
    \'per\': 4
})

# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
if not isinstance(result, dict):
    with open(\'audio.mp3\', \'wb\') as f:
    # with open(\'audio.pcm\', \'wb\') as f:
        f.write(result)

 

语音识别

# 读取文件
def get_file_content(filePath):
    os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
    # os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
    with open(f"{filePath}.pcm", \'rb\') as fp:
        return fp.read()

# 识别本地文件
res = client.asr(get_file_content(\'audio.mp3\'), \'pcm\', 16000, {
    \'dev_pid\': 1536,
})

print(res.get("result")[0])

 给我说(必须提供一个mp3文件)

from aip import AipSpeech
import os

APP_ID = \'11312730\'
API_KEY = \'I7rDa8SGYVM4yFHGjuzdgOrO\'
SECRET_KEY = \'BguQbuYvyeqhFsVFfNNe2hieYOsvp5yL\'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

def audio2text(filename):
    os.system(f"ffmpeg -y  -i {filename} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filename}.pcm")
    with open(f"{filename}.pcm","rb") as fp:
        res = client.asr(fp.read(), "pcm", 16000, {
            \'dev_pid\': 1536,
        })
        print(res.get("result")[0])
        return res.get("result")[0]

def text2audio(text):
    result = client.synthesis(text, \'zh\', 1, {
        "spd": 4,
        "vol": 5,
        "pit": 8,
        "per": 4
    })
    if not isinstance(result, dict):
        with open("audio.mp3", "wb") as f:
            f.write(result)
    os.system("audio.mp3")

text = audio2text("audio.mp3")
if "你叫什么名字" in text:
    text2audio("我的名字叫**")
else:
    text2audio(f"你刚刚是说,{text}")