Java 使用腾讯翻译 API 实现含 HTML 标签文本,json值,精准翻译工具-一-翻译标签文本工具

时间:2025-02-13 08:34:27

package org.springblade.common.utils;


import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TencentTranslationForHTML {
   
    public static void main(String[] args) {
        String htmlContent = "<div style=\"text-align:center\"><img src=\"http://192.168.0.137:8999/machine/upload/image/20250208/32551087576762.jpg\" alt=\"图片 alt\" width=\"350\" height=\"auto\" data-align=\"center\"></div><p style=\"text-align: center\">下料工序后的加工件</p>";
        try {
            String translatedHtml = translateHTML(htmlContent);
            System.out.println(translatedHtml);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static String translateHTML(String html) throws com.tencentcloudapi.common.exception.TencentCloudSDKException {
        // 匹配 HTML 标签
        Pattern tagPattern = Pattern.compile("<[^>]+>");
        Matcher tagMatcher = tagPattern.matcher(html);

        // 存储标签位置和内容
        List<int[]> tagPositions = new ArrayList<>();
        while (tagMatcher.find()) {
            tagPositions.add(new int[]{tagMatcher.start(), tagMatcher.end()});
        }

        // 提取文本内容进行翻译
        List<String> textParts = new ArrayList<>();
        int lastEnd = 0;
        for (int[] position : tagPositions) {
            String textPart = html.substring(lastEnd, position[0]);
            if (!textPart.isEmpty()) {
                textParts.add(textPart);
            }
            lastEnd = position[1];
        }
        String remainingText = html.substring(lastEnd);
        if (!remainingText.isEmpty()) {
            textParts.add(remainingText);
        }

        // 调用腾讯翻译 API 翻译文本
        List<String> translatedParts = new ArrayList<>();
        for (String text : textParts) {
//            String translated = translateText(text);
			String translated = TencentTranslationUtil.getText(text,"zh","en");
            translatedParts.add(translated);
        }

        // 重新组合翻译后的文本和 HTML 标签
        StringBuilder result = new StringBuilder();
        int textPartIndex = 0;
        lastEnd = 0;
        for (int[] position : tagPositions) {
            int start = position[0];
            int end = position[1];
            if (start > lastEnd) {
                result.append(translatedParts.get(textPartIndex++));
            }
            result.append(html, start, end);
            lastEnd = end;
        }
        if (lastEnd < html.length()) {
            result.append(translatedParts.get(textPartIndex));
        }

        return result.toString();
    }
	
}