"""
实时语音控制电脑音量
"""
import time
import speech_recognition as sr
import logging
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
logging.basicConfig(level=logging.DEBUG)
from aip import AipSpeech
# filename = './audio/test.wav'
BAIDU_APP_ID = '你的 App ID'
BAIDU_API_KEY = '你的 Api Key'
BAIDU_SECRET_KEY = '你的 Secret Key'
aip_speech = AipSpeech(BAIDU_APP_ID, BAIDU_API_KEY, BAIDU_SECRET_KEY)
#中文转数字
common_used_numerals_tmp ={'零':0, '一':1, '二':2, '两':2, '三':3, '四':4, '五':5, '六':6, '七':7, '八':8, '九':9, '十':10, '百':100, '千':1000, '万':10000, '亿':100000000}
def chinese2digits(uchars_chinese):
total = 0
r = 1 #表示单位:个十百千...
for i in range(len(uchars_chinese) - 1, -1, -1):
val = common_used_numerals_tmp.get(uchars_chinese[i])
if val >= 10 and i == 0: #应对 十三 十四 十*之类
if val > r:
r = val
total = total + val
else:
r = r * val
#total =total + r * x
elif val >= 10:
if val > r:
r = val
else:
r = r * val
else:
total = total + r * val
return total
#电脑声音控制
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
# 获取音量值,0.0代表最大,-65.25代表最小
vl = volume.GetMasterVolumeLevel()
print(vl)
dict = {0: -65.25, 1: -56.99, 2: -51.67, 3: -47.74, 4: -44.62, 5: -42.03, 6: -39.82, 7: -37.89, 8: -36.17,
9: -34.63, 10: -33.24,
11: -31.96, 12: -30.78, 13: -29.68, 14: -28.66, 15: -27.7, 16: -26.8, 17: -25.95, 18: -25.15, 19: -24.38,
20: -23.65,
21: -22.96, 22: -22.3, 23: -21.66, 24: -21.05, 25: -20.46, 26: -19.9, 27: -19.35, 28: -18.82, 29: -18.32,
30: -17.82,
31: -17.35, 32: -16.88, 33: -16.44, 34: -16.0, 35: -15.58, 36: -15.16, 37: -14.76, 38: -14.37, 39: -13.99,
40: -13.62,
41: -13.26, 42: -12.9, 43: -12.56, 44: -12.22, 45: -11.89, 46: -11.56, 47: -11.24, 48: -10.93, 49: -10.63,
50: -10.33,
51: -10.04, 52: -9.75, 53: -9.47, 54: -9.19, 55: -8.92, 56: -8.65, 57: -8.39, 58: -8.13, 59: -7.88,
60: -7.63,
61: -7.38, 62: -7.14, 63: -6.9, 64: -6.67, 65: -6.44, 66: -6.21, 67: -5.99, 68: -5.76, 69: -5.55, 70: -5.33,
71: -5.12, 72: -4.91, 73: -4.71, 74: -4.5, 75: -4.3, 76: -4.11, 77: -3.91, 78: -3.72, 79: -3.53, 80: -3.34,
81: -3.15, 82: -2.97, 83: -2.79, 84: -2.61, 85: -2.43, 86: -2.26, 87: -2.09, 88: -1.91, 89: -1.75,
90: -1.58,
91: -1.41, 92: -1.25, 93: -1.09, 94: -0.93, 95: -0.77, 96: -0.61, 97: -0.46, 98: -0.3, 99: -0.15, 100: 0.0}
r = sr.Recognizer()
# 麦克风
mic = sr.Microphone(sample_rate=16000)
while True:
logging.info('录音中...')
with mic as source:
r.adjust_for_ambient_noise(source)
audio = r.listen(source)
logging.info('录音结束,识别中...')
start_time = time.time()
print(type(audio))
audio_data = audio.get_wav_data()
print(type(audio_data))
# 识别本地文件
ret = aip_speech.asr(audio_data, 'wav', 16000, {'dev_pid': 1536, })
print(ret)
if ret and ret['err_no'] == 0:
result = ret['result'][0]
print(result)
status = result[4:].isdigit()
if not status:
myvol = chinese2digits(result[4:])
else:
myvol = result[4:]
print(myvol)
# 设置音量大小
volume.SetMasterVolumeLevel(dict[int(myvol)], None)
end_time = time.time()
print(end_time - start_time)
else:
print(ret['err_msg'])
logging.info('end')