词法分析

package com.mahuan.json;

import java.util.LinkedList;

import java.util.List;

/**

 * 词法分析

 */

public class Tokenizer {

    // 待分析的字符串

    private String json;

    // 读取字符时的索引位置

    private int index = 0;

    // 词法分析结果列表

    private List<Token> tokens = new LinkedList<Token>();

    // 获取词法分析结果时的索引位置

    private int tokenIndex = 0;

    /**

     * 构造函数，触发词法分析

     * @param json

     * @throws Exception

     */

    public Tokenizer(String json) throws Exception {

        this.json = json;

        this.init();

    }

    /**

     * 读取字符串中的字符，索引位置加1

     * @return

     */

    private Character read() {

        if (index < json.length())

            return json.charAt(index++);

        else

            return null;

    }

    /**

     * 读取字符串中的字符，索引位置减1

     */

    private void unread() {

        index--;

    }

    /**

     * 进行词法分析

     * @throws Exception

     */

    private void init() throws Exception {

        Token token = null;

        while ((token = token()) != null) {

            tokens.add(token);

        }

    }

    /**

     * 按顺序读取字符串，获取词法分析结果

     * @return

     * @throws Exception

     */

    private Token token() throws Exception {

        Character c = read();

        if (c == null)

            return null;

        // 忽略空白字符、换行符等

        while (isSpace(c)) {

            c = read();

        }

        if (isNull(c))

            return new Token(TokenType.Null, null);

        if (c == '{')

            return new Token(TokenType.ObjectStart, "{");

        if (c == '}')

            return new Token(TokenType.ObjectEnd, "}");

        if (c == '[')

            return new Token(TokenType.ArrayStart, "[");

        if (c == ']')

            return new Token(TokenType.ArrayEnd, "]");

        if (c == ',')

            return new Token(TokenType.Comma, ",");

        if (c == ':')

            return new Token(TokenType.Colon, ":");

        if (isTrue(c))

            return new Token(TokenType.Boolean, "true");

        if (isFalse(c))

            return new Token(TokenType.Boolean, "false");

        if (c == '"')

            return new Token(TokenType.String, readString());

        if (isNum(c)) {

            unread();

            return new Token(TokenType.Number, readNum());

        }

        throw new Exception("");

    }

    /**

     * 读取字符串

     * @return

     */

    private String readString() {

        char c = read();

        StringBuffer sb = new StringBuffer();

        while (c != '"') {

            sb.append(c);

            if (isEscape(c)) {

                c = read();

                sb.append(c);

            }

            c = read();

        }

        return sb.toString();

    }

    /**

     * 读取数字，还未考虑所有数字表达形式

     * @return

     */

    private String readNum() {

        char c = read();

        StringBuffer sb = new StringBuffer();

        while (c != '"' && c != ':' && c != ',' && c != ']' && c != '}') {

            sb.append(c);

            c = read();

        }

        unread();

        return sb.toString();

    }

    /**

     * 判断是否为数字开头的特征

     * @param c

     * @return

     */

    private boolean isNum(char c) {

        if (c == '-' || ('0' <= c && c <= '9'))

            return true;

        return false;

    }

    /**

     * 判断是否为转义字符

     * @param c

     * @return

     */

    private boolean isEscape(char c) {

        if (c == '\\')

            return true;

        return false;

    }

    /**

     * 是否为true字符串

     * @param c

     * @return

     * @throws Exception

     */

    private boolean isTrue(char c) throws Exception {

        if (c == 't') {

            c = read();

            if (c == 'r') {

                c = read();

                if (c == 'u') {

                    c = read();

                    if (c == 'e') {

                        return true;

                    } else {

                        throw new Exception("Invalid JSON input.");

                    }

                } else {

                    throw new Exception("Invalid JSON input.");

                }

            } else {

                throw new Exception("Invalid JSON input.");

            }

        } else {

            return false;

        }

    }

    /**

     * 是否为false字符串

     * @param c

     * @return

     * @throws Exception

     */

    private boolean isFalse(char c) throws Exception {

        if (c == 'f') {

            c = read();

            if (c == 'a') {

                c = read();

                if (c == 'l') {

                    c = read();

                    if (c == 's') {

                        c = read();

                        if (c == 'e') {

                            return true;

                        } else {

                            throw new Exception("Invalid JSON input.");

                        }

                    } else {

                        throw new Exception("Invalid JSON input.");

                    }

                } else {

                    throw new Exception("Invalid JSON input.");

                }

            } else {

                throw new Exception("Invalid JSON input.");

            }

        } else {

            return false;

        }

    }

    /**

     * 是否为null字符串

     * @param c

     * @return

     * @throws Exception

     */

    private boolean isNull(char c) throws Exception {

        if (c == 'n') {

            c = read();

            if (c == 'u') {

                c = read();

                if (c == 'l') {

                    c = read();

                    if (c == 'l') {

                        return true;

                    } else {

                        throw new Exception("Invalid JSON input.");

                    }

                } else {

                    throw new Exception("Invalid JSON input.");

                }

            } else {

                throw new Exception("Invalid JSON input.");

            }

        } else {

            return false;

        }

    }

    /**

     * 是否为空字符

     * @param c

     * @return

     */

    private boolean isSpace(char c) {

        if (c == '\t')

            return true;

        if (c == '\n')

            return true;

        if (c == '\r')

            return true;

        if (c == '\0')

            return true;

        if (c == ' ')

            return true;

        return false;

    }

    /**

     * 获取词法分析的下一个结果

     * @return

     */

    public Token next() {

        if (tokenIndex + 1 < tokens.size())

            return tokens.get(++tokenIndex);

        return null;

    }

    /**

     * 获取当前位置的词法分析结果

     * @return

     */

    public Token get() {

        if (tokenIndex < tokens.size())

            return tokens.get(tokenIndex);

        return null;

    }

}

/**

 * 词法分析类型

 */

enum TokenType {

    // object开始

    ObjectStart,

    // object结束

    ObjectEnd,

    // 数组开始

    ArrayStart,

    // 数组结束

    ArrayEnd,

    // 字符串

    String,

    // 数字

    Number,

    // boolean

    Boolean,

    // 空

    Null,

    // ,

    Comma,

    // :

    Colon

}

/**

 * 词法分析单元

 * @author mahuan

 * @version 2017年12月13日

 */

class Token {

    public TokenType type;

    public String value;

    public Token(TokenType type, String value) {

        this.type = type;

        this.value = value;

    }

}

语法分析

package com.mahuan.json;

import java.util.HashMap;

import java.util.LinkedList;

import java.util.List;

import java.util.Map;

/**

 * 语法分析

 */

public class Parser {

    /**

     * 分析Object，使用map数据结构标识

     * @param tokenizer

     * @return

     * @throws Exception

     */

    public static Map<String, Object> parserObject(Tokenizer tokenizer) throws Exception {

        Map<String, Object> map = new HashMap<>();

        Token token = null;

        while (true) {

            token = tokenizer.get();

            if (token.type == TokenType.ObjectEnd)

                break;

            if (token.type == TokenType.ObjectStart) {

                tokenizer.next();

                continue;

            }

            if (token.type == TokenType.Comma) {

                tokenizer.next();

                continue;// 跳过,

            }

            String key = token.value;

            token = tokenizer.next();

            if (token.type != TokenType.Colon)

                throw new Exception();

            tokenizer.next();

            map.put(key, parserValue(tokenizer));

        }

        return map;

    }

    /**

     * 分析Array，使用list数据结构标识

     * @param tokenizer

     * @return

     * @throws Exception

     */

    public static List<Object> parserArray(Tokenizer tokenizer) throws Exception {

        List<Object> list = new LinkedList<>();

        Token token = null;

        while (true) {

            token = tokenizer.get();

            if (token.type == TokenType.ArrayEnd)

                break;

            if (token.type == TokenType.ArrayStart) {

                tokenizer.next();

                continue;

            }

            if (token.type == TokenType.Comma) {

                tokenizer.next();

                continue;

            }

            list.add(parserValue(tokenizer));

        }

        return list;

    }

    /**

     * 分析值，根据token再判断值的具体类型

     * @param tokenizer

     * @return

     * @throws Exception

     */

    public static Object parserValue(Tokenizer tokenizer) throws Exception {

        Token token = tokenizer.get();

        try {

            if (token.type == TokenType.ObjectStart)

                return parserObject(tokenizer);

            else if (token.type == TokenType.ArrayStart)

                return parserArray(tokenizer);

            else if (token.type == TokenType.Boolean)

                return Boolean.valueOf(token.value);

            else if (token.type == TokenType.String)

                return token.value;

            else if (token.type == TokenType.Number)

                return token.value;

            else if (token.type == TokenType.Null)

                return null;

            throw new Exception("");

        } finally {

            // object和array分析完后，要跳过其end的token

            // 其他类型分析完后，要跳过自身

            tokenizer.next();

        }

    }

}

测试代码

package com.mahuan.json;

public class Test {

    public static void main(String[] args) throws Exception {

        String json = "{  \"success\": true,  \"message\": \"123\",  \"result\": [    -2146464718]}";

        Tokenizer tokenizer = new Tokenizer(json);

        Object map = Parser.parserValue(tokenizer);

        System.out.println(map);

    }

}

秒客网

一个简单的json解析器

词法分析

语法分析

测试代码

相关文章