JAVA调用腾讯云API-实现语音识别(ASR)(二)
本章分享调用腾讯云API文档,实现语音合成的技术。
package com.example.combat.controller; import com.example.combat.service.ASRService; import com.example.combat.asrutils.R; import com.example.combat.asrutils.param.CreateRecTask; import com.example.combat.asrutils.param.SentenceRecognition; import com.example.combat.afsutils.Base64ConvertUtils; import io.swagger.annotations.ApiImplicitParam; import io.swagger.annotations.ApiImplicitParams; import io.swagger.annotations.ApiOperation; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; /** * @description: * @author: zhucj * @date: 2019-11-25 9:30 */ @RestController @RequestMapping("/afs") public class ASRControllerl { @Autowired private ASRService asrService; @ApiOperation(value = "一句话语音识别,60秒时间") @ApiImplicitParams({ @ApiImplicitParam(name = "sourceType",value = "语音数据来源。0:语音 URL;1:语音数据(post body)",required = true,dataType = "Integer"), @ApiImplicitParam(name = "url",value = "语音 URL,公网可下载。当 SourceType 值为 0(语音 URL上传) 时须填写该字段",dataType = "String"), @ApiImplicitParam(name = "file",value ="语音数据,当SourceType 值为1(本地语音数据上传)时必须填写",dataType = "MultipartFile"), }) @PostMapping(value = "sentence") public R sentenceRecognition(@RequestParam(value = "sourceType") Integer sourceType, @RequestParam(value = "file",required = false)MultipartFile file, @RequestParam(value = "url",required = false) String url){ return asrService.sentenceRecognition(SentenceRecognition .builder() .sourceType(sourceType) .voiceFormat(file==null?url.substring(url.length()-3,url.length()):file.getOriginalFilename().substring(file.getOriginalFilename().length()-3,file.getOriginalFilename().length())) .dataLen(file==null?null:Integer.valueOf(String.valueOf(file.getSize()))) .data(file==null?null: Base64ConvertUtils.getImageStr(file)) .url(url==null?null:url) .build()); } @ApiOperation(value = "录音文件识别") @ApiImplicitParams({ @ApiImplicitParam(name = "sourceType",value = "语音数据来源。0:语音 URL;1:语音数据(post body)",required = true,dataType = "Integer"), @ApiImplicitParam(name = "url",value = "语音 URL,公网可下载。当 SourceType 值为 0(语音 URL上传) 时须填写该字段",dataType = "String"), @ApiImplicitParam(name = "file",value ="语音数据,当SourceType 值为1(本地语音数据上传)时必须填写",dataType = "MultipartFile"), }) @PostMapping(value = "createRecTask") public R createRecTask(@RequestParam(value = "sourceType") Integer sourceType, @RequestParam(value = "file",required = false)MultipartFile file, @RequestParam(value = "url",required = false) String url){ return asrService.createRecTask(CreateRecTask .builder() .sourceType(sourceType) .dataLen(file==null?null:Integer.valueOf(String.valueOf(file.getSize()))) .data(file==null?null: Base64ConvertUtils.getImageStr(file)) .url(url==null?null:url) .build()); } }
package com.example.combat.service; import com.example.combat.asrutils.R; import com.example.combat.asrutils.param.CreateRecTask; import com.example.combat.asrutils.param.SentenceRecognition; /** * @description: 语音识别接口 * @author: zhucj * @date: 2019-11-25 9:23 */ public interface ASRService { /** * 一句话识别接口 * @param param * @return */ R sentenceRecognition(SentenceRecognition param); /** * 录音文件识别 * @param createRecTask * @return */ R createRecTask(CreateRecTask createRecTask); }
package com.example.combat.service.Impl; import com.example.combat.service.ASRService; import com.example.combat.asrutils.ASRUtil; import com.example.combat.asrutils.R; import com.example.combat.asrutils.param.CreateRecTask; import com.example.combat.asrutils.param.SentenceRecognition; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; /** * @description: 语音识别实现类 * @author: zhucj * @date: 2019-11-25 9:28 */ @Service public class ASRServiceImpl implements ASRService { @Autowired private ASRUtil asrUtil; @Override public R sentenceRecognition(SentenceRecognition param) { return asrUtil.sentenceRecognition(param); } @Override public R createRecTask(CreateRecTask createRecTask) { return asrUtil.createRecTask(createRecTask); } }
package com.example.combat.asrutils; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.example.combat.config.constant.ContentTypeEnum; import com.example.combat.config.constant.HttpMethodEnum; import com.example.combat.config.constant.SignMenodEnum; import com.example.combat.asrutils.param.*; import com.example.combat.afsutils.HttpUtil; import com.example.combat.afsutils.SignUtils; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import org.springframework.util.StringUtils; import java.util.*; /** * @description: 语音识别工具类 * @author: zhucj * @date: 2019-11-23 15:30 */ @Component @Slf4j public class ASRUtil { @Value("${tencent.secretId}") private String sercretId; @Value("${tencent.secretKey}") private String sercretKey; /** * 一句话识别 * @param param * @return */ public R sentenceRecognition(SentenceRecognition param){ //获取公共请求参数 TreeMap treeMap = createPublicMap("SentenceRecognition", "2019-06-14"); HashMap<String,Object> hashMap = new HashMap<>(); hashMap.put("ProjectId",0); hashMap.put("SubServiceType",2); hashMap.put("EngSerViceType","8k"); hashMap.put("SourceType",param.getSourceType()); verifyVoiceFormat(param.getVoiceFormat()); hashMap.put("VoiceFormat",param.getVoiceFormat()); hashMap.put("UsrAudioKey",IDUtil.createIdbyUUID()); if (Objects.equals(0,param.getSourceType())){ if (StringUtils.isEmpty(param.getUrl())){ throw new ASRRuntimeException(SystemConstants.PARAM_INCORRECT_CODE,"传入语音Url类型时,传入的url不能为空"); } //如果是语音Url,只需要传Url hashMap.put("Url",param.getUrl()); }else if (Objects.equals(1,param.getSourceType())){ if (StringUtils.isEmpty(param.getData()) ||StringUtils.isEmpty(param.getDataLen()) ){ throw new ASRRuntimeException(SystemConstants.PARAM_INCORRECT_CODE,"传入语音数据类型时,传入的语音数据和长度不能为空"); } if(param.getDataLen()>614400){ throw new ASRRuntimeException(SystemConstants.PARAM_INCORRECT_CODE,"传入的音频文件不能超过600kb");} hashMap.put("Data",param.getData()); hashMap.put("DataLen",param.getDataLen()); }else { throw new ASRRuntimeException(SystemConstants.PARAM_INCORRECT_CODE,"语音数据来源,传入的类型错误"); } //签名,公共参数不需要放到body中 String sign = null; try { sign = SignUtils.sign(treeMap, HttpMethodEnum.POST, SignMenodEnum.TC3_HMAC_SHA256, JSON.toJSONString(hashMap) , SentenceRecognitionApi.SENTENCE_RECOGNITION, sercretKey, ContentTypeEnum.JSON); } catch (Exception e) { log.error("签名异常:{}",e.getMessage()); return R.error("签名异常").setCode(SystemConstants.SERVER_ERROR_CODE); } try { String respJson = HttpUtil.httpPost(SentenceRecognitionApi.SENTENCE_RECOGNITION, JSON.parseObject(sign,Map.class),hashMap); JSONObject jsonObject = JSON.parseObject(respJson); String response = jsonObject.getString("Response"); JSONObject error =(JSONObject) JSON.parseObject(response).get("Error"); if (Objects.nonNull(error)){ return R.error(String.valueOf(error.get("Message"))).setCode(SystemConstants.SERVER_ERROR_CODE); }else { SentenceResponse sentenceResponse = JSON.parseObject(response, SentenceResponse.class); return R.ok(sentenceResponse.getResult()).setCode(SystemConstants.SUCCESS_CODE); } } catch (Exception e) { log.error("语音识别失败:{}",e.getMessage()); return R.error("语音识别失败").setCode(SystemConstants.SERVER_ERROR_CODE); } } /** * 录音文件识别 * @param createRecTask * @return */ public R createRecTask(CreateRecTask createRecTask){ TreeMap treeMap = createPublicMap("CreateRecTask", "2019-06-14"); HashMap<String,Object> hashMap = new HashMap(); hashMap.put("EngineModelType","8k_0"); hashMap.put("ChannelNum",1); hashMap.put("ResTextFormat",0); hashMap.put("SourceType",createRecTask.getSourceType()); if (Objects.equals(createRecTask.getSourceType(),0)){ hashMap.put("Url",createRecTask.getUrl()); }else if (Objects.equals(createRecTask.getSourceType(),1)){ hashMap.put("Data",createRecTask.getData()); if (createRecTask.getDataLen()>5*1024*1024){ throw new ASRRuntimeException(SystemConstants.PARAM_INCORRECT_CODE,"录音文件不能超过5MB"); } hashMap.put("DataLen",createRecTask.getDataLen()); } //签名,公共参数不需要放到body中 String sign = null; try { sign = SignUtils.sign(treeMap, HttpMethodEnum.POST, SignMenodEnum.TC3_HMAC_SHA256, JSON.toJSONString(hashMap) , SentenceRecognitionApi.SENTENCE_RECOGNITION, sercretKey, ContentTypeEnum.JSON); } catch (Exception e) { log.error("签名异常:{}",e.getMessage()); return R.error("签名异常").setCode(SystemConstants.SERVER_ERROR_CODE); } try { String respJson = HttpUtil.httpPost(SentenceRecognitionApi.SENTENCE_RECOGNITION, JSON.parseObject(sign,Map.class),hashMap); JSONObject jsonObject = JSON.parseObject(respJson); String response = jsonObject.getString("Response"); JSONObject error =(JSONObject) JSON.parseObject(response).get("Error"); if (Objects.nonNull(error)){ return R.error(String.valueOf(error.get("Message"))).setCode(SystemConstants.SERVER_ERROR_CODE); }else { JSONObject data =(JSONObject) JSON.parseObject(response).get("Data"); String taskId =String.valueOf(data.get("TaskId")); //通过TaskId查询识别内容 return describeTaskStatus(taskId); } } catch (Exception e) { log.error("语音识别失败:{}",e.getMessage()); return R.error("语音识别失败").setCode(SystemConstants.SERVER_ERROR_CODE); } } /** * 通过任务ID查询识别结果 * @param taskId * @return */ public R describeTaskStatus(String taskId){ TreeMap treeMap = createPublicMap("DescribeTaskStatus", "2019-06-14"); HashMap<String,Object> hashMap = new HashMap<>(); hashMap.put("TaskId",Integer.valueOf(taskId)); //签名,公共参数不需要放到body中 String sign = null; try { sign = SignUtils.sign(treeMap, HttpMethodEnum.POST, SignMenodEnum.TC3_HMAC_SHA256, JSON.toJSONString(hashMap) , SentenceRecognitionApi.SENTENCE_RECOGNITION, sercretKey, ContentTypeEnum.JSON); } catch (Exception e) { log.error("签名异常:{}",e.getMessage()); return R.error("签名异常").setCode(SystemConstants.SERVER_ERROR_CODE); } try { String respJson = HttpUtil.httpPost(SentenceRecognitionApi.SENTENCE_RECOGNITION, JSON.parseObject(sign,Map.class),hashMap); JSONObject jsonObject = JSON.parseObject(respJson); String response = jsonObject.getString("Response"); CreateRecTaskResponse createRecTaskResponse= JSON.parseObject(response, CreateRecTaskResponse.class); Data data = createRecTaskResponse.getData(); if (Objects.equals(data.getStatus(),0)){ return describeTaskStatus(taskId); }else if (Objects.equals(data.getStatus(),1)){ return describeTaskStatus(taskId); }else if (Objects.equals(data.getStatus(),2)){ return R.ok(data.getResult()).setCode(SystemConstants.SUCCESS_CODE); }else { return R.error(data.getErrorMsg()).setCode(SystemConstants.SERVER_ERROR_CODE); } } catch (Exception e) { log.error("任务ID查询识别失败:{}",e.getMessage()); return R.error("任务ID查询识别失败").setCode(SystemConstants.SERVER_ERROR_CODE); } } /** * 封装请求公共参数 * @param action * @param version * @return */ public TreeMap createPublicMap(String action, String version){ TreeMap<String,Object> treeMap = new TreeMap<>(); treeMap.put("Action",action); treeMap.put("Version",version); treeMap.put("Timestamp",getCurrentTimestamp()); treeMap.put("Nonce",new Random().nextInt(Integer.MAX_VALUE)); treeMap.put("SecretId",sercretId); return treeMap; } /** * 获取当前时间戳,单位秒 * @return */ public static long getCurrentTimestamp() { return System.currentTimeMillis()/1000; } public void verifyVoiceFormat(String type){ if (Objects.equals("mp3",type) || Objects.equals("wav",type)){ return; }else { throw new ASRRuntimeException(SystemConstants.PARAM_INCORRECT_CODE,"传入识别音频的音频格式错误"); } } }
package com.example.combat.asrutils; import java.text.SimpleDateFormat; import java.util.Date; import java.util.UUID; /** * 名称:IDUtil <br> * 描述:ID 生成工具类<br> * * @author zhucj * @version 1.0 * @since 1.0.0 */ public class IDUtil { /** * 主要功能:生成流水号 yyyyMMddHHmmssSSS + 3位随机数 * 注意事项:无 * * @return 流水号 */ public static String createIdByDate() { // 精确到毫秒 SimpleDateFormat fmt = new SimpleDateFormat("(yyyyMMddHHmmssSSS)"); String suffix = fmt.format(new Date()); suffix = suffix + "-" + Math.round((Math.random() * 100000)); return suffix; } /** * 主要功能:生成uuid * 注意事项:无 * * @return uuid 32 位 */ public static String createIdbyUUID() { return UUID.randomUUID().toString().replaceAll("-", ""); } }
package com.example.combat.asrutils; import io.swagger.annotations.ApiModel; import io.swagger.annotations.ApiModelProperty; import lombok.ToString; import java.io.Serializable; /** * 返回类型 * @author choleece * @date 2018/9/27 */ @ApiModel @ToString public class R<T> implements Serializable { private static final long serialVersionUID = -6287952131441663819L; /** * 编码 */ @ApiModelProperty(value = "响应码", example = "200") private int code = 200; /** * 成功标志 */ @ApiModelProperty(value = "成功标志", example = "true") private Boolean success; /** * 返回消息 */ @ApiModelProperty(value = "返回消息说明", example = "操作成功") private String msg="操作成功"; /** * 返回数据 */ @ApiModelProperty(value = "返回数据") private T data; /** * 创建实例 * @return */ public static R instance() { return new R(); } public int getCode() { return code; } public R setCode(int code) { this.code = code; return this; } public Boolean getSuccess() { return success; } public R setSuccess(Boolean success) { this.success = success; return this; } public String getMsg() { return msg; } public R setMsg(String msg) { this.msg = msg; return this; } public T getData() { return data; } public R setData(T data) { this.data = data; return this; } public static R ok() { return R.instance().setSuccess(true); } public static R ok(Object data) { return ok().setData(data); } public static R ok(Object data, String msg) { return ok(data).setMsg(msg); } public static R error() { return R.instance().setSuccess(false); } public static R error(String msg) { return error().setMsg(msg); } /** * 无参 */ public R() { } public R(int code, String msg) { this.code = code; this.msg = msg; } public R(int code, T data){ this.code = code; this.data = data; } /** * 有全参 * @param code * @param msg * @param data * @param success */ public R(int code, String msg, T data, Boolean success) { this.code = code; this.msg = msg; this.data = data; this.success = success; } /** * 有参 * @param code * @param msg * @param data */ public R(int code, String msg, T data) { this.code = code; this.msg = msg; this.data = data; } }
package com.example.combat.asrutils.param; /** * @description: 语音识别异常类 * @author: 朱传捷 80004071 * @date: 2019-11-23 15:46 */ public class ASRRuntimeException extends RuntimeException{ /** * 错误码 */ private Integer errorCode; /** * 错误描述 */ private String errorMsg; public ASRRuntimeException(Integer errorCode, String errorMsg){ super(errorMsg); this.errorCode = errorCode; this.errorMsg = errorMsg; } public Integer getErrorCode() { return errorCode; } public String getErrorMsg() { return errorMsg; } }
package com.example.combat.asrutils.param; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; /** * @description: 录音文件识别请求参数 * @author: 朱传捷 80004071 * @date: 2019-11-25 12:22 */ @Data @AllArgsConstructor @NoArgsConstructor @Builder public class CreateRecTask { /** * 语音数据来源。0:语音 URL;1:语音数据(post body)。 */ private Integer sourceType; /** * 回调 URL,用户自行搭建的用于接收识别结果的服务器地址, 长度小于2048字节 */ private String CallbackUrl; /** * 语音的URL地址,需要公网可下载当 SourceType 值为 0 时须填写该字段,为 1 时不需要填写。 * 注意:请确保录音文件时长在一个小时之内,否则可能识别失败。请保证文件的下载速度,否则可能下载失败。 */ private String url; /** * 语音数据,当SourceType 值为1时必须填写,为0可不写。 */ private String data; /** * 数据长度,当 SourceType 值为1时必须填写,为0可不写(此数据长度为数据未进行base64编码时的数据长度)。 */ private Integer dataLen; }
package com.example.combat.asrutils.param; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; /** * @description: 录音识别返回结果 * @author: 朱传捷 80004071 * @date: 2019-11-25 14:05 */ @Data @AllArgsConstructor @NoArgsConstructor @Builder public class CreateRecTaskResponse extends Response { private com.example.combat.asrutils.param.Data Data; }
package com.example.combat.asrutils.param; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.NoArgsConstructor; /** * @description: * @author: 朱传捷 80004071 * @date: 2019-11-25 14:18 */ @lombok.Data @AllArgsConstructor @NoArgsConstructor @Builder public class Data { private Integer TaskId; private Integer Status; private String StatusStr; private String Result; private String ErrorMsg; }
package com.example.combat.asrutils.param; import lombok.Data; /** * @description: * @author: 朱传捷 80004071 * @date: 2019-10-18 15:58 */ @Data public class Error { private String Code; private String Messag; }
package com.example.combat.asrutils.param; import lombok.Data; /** * @description: 请求返回公共参数 * @author: 朱传捷 80004071 * @date: 2019-10-18 15:56 */ @Data public class Response { private String RequestId; private Error Error; }
package com.example.combat.asrutils.param; import lombok.Builder; import lombok.Data; import javax.validation.constraints.NotNull; /** * @description: 语音识别实体 * @author: 朱传捷 80004071 * @date: 2019-11-23 15:21 */ @Data @Builder public class SentenceRecognition { /** * 语音数据来源。0:语音 URL;1:语音数据(post body)。 */ @NotNull(message = "语音数据来源不能为空") private Integer sourceType; /** * 识别音频的音频格式。mp3、wav。 */ @NotNull(message = "识别音频的音频格式不为空") private String voiceFormat; /** * 语音 URL,公网可下载。当 SourceType 值为 0(语音 URL上传) 时须填写该字段, * 为 1 时不填;URL 的长度大于 0,小于 2048,需进行urlencode编码。音频时间长度要小于60s。 */ private String url; /** * 语音数据,当SourceType 值为1(本地语音数据上传)时必须填写,当SourceType 值为0(语音 URL上传)可不写。 * 要使用base64编码(采用python语言时注意读取文件应该为string而不是byte, * 以byte格式读取后要decode()。编码后的数据不可带有回车换行符)。音频数据要小于600KB。 */ private String data; /** * 数据长度,单位为字节。当 SourceType 值为1(本地语音数据上传)时必须填写,当 SourceType 值为0 * (语音 URL上传)可不写(此数据长度为数据未进行base64编码时的数据长度)。 */ private Integer dataLen; }
package com.example.combat.asrutils.param; /** * @description: 语音识别API * @author: 朱传捷 80004071 * @date: 2019-11-23 16:07 */ public class SentenceRecognitionApi { /** * 语音识别 */ public static final String SENTENCE_RECOGNITION = "https://asr.ap-shenzhen-fsi.tencentcloudapi.com"; }
package com.example.combat.asrutils.param; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; /** * @description: 返回响应参数 * @author: 朱传捷 80004071 * @date: 2019-11-23 15:32 */ @Data @Builder @NoArgsConstructor @AllArgsConstructor public class SentenceResponse extends Response { /** * 识别结果 */ private String Result; }
package com.example.combat.asrutils.param; /** * @ClassName SystemConstants * @Description 常量字段 * @Author YangLei * @Date 2019/5/7 11:13 * @Version 1.0 **/ public class SystemConstants { /** 传参不规范,code:400*/ public static final Integer PARAM_INCORRECT_CODE = 400; /** 成功,code:200*/ public static final Integer SUCCESS_CODE = 200; /** 服务内部调用失败,code:500*/ public static final Integer SERVER_ERROR_CODE = 500; /** 登录失效,code:401*/ public static final Integer AUTH_FAIL_CODE = 401; /** 无对应接口权限,code:402*/ public static final Integer HAVE_NOT_PERMISSION_CODE = 402; /** 操作无记录,code:403*/ public static final Integer NO_RECORD_OPERATION = 403; //http请求方式 public static final String HTTP_METHOD_POST = "POST"; public static final String HTTP_METHOD_GET = "GET"; public static final String HTTP_METHOD_PUT = "PUT"; public static final String HTTP_METHOD_DELETE = "DELETE"; }
#腾讯云服务配置 tencent: secretId: ********* secretKey: ********
<dependency>
<groupId>io.springfox</groupId>
<artifactId>springfox- swagger2</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>io.springfox</groupId>
<artifactId>springfox-swagger-ui</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.55</version>
</dependency>
<!--httpclient -->
<dependency> <groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>