1. 编写Schema
1.1 student.proto
- package protobuf;
- option java_package = "com.topsec.trd";
- option java_outer_classname = "StudentProto";
- message Student {
- required string name = 1;
- required int32 age = 2;
- required int32 sex = 3;
- optional string alias = 4;
- repeated string interest=5;
- }
1.2 studnet.avsc
- namespace java com.topsec.trd
- struct StudentThrift {
- 1: string name,
- 2: i32 age,
- 3: i32 sex,
- 4: optional string aliass,
- 5: list<string> interest
- }
1.3 studnet.avsc
- {"namespace": "com.topsec.trd",
- "type": "record",
- "name": "StudentAvro",
- "fields": [
- {"name": "name", "type": "string"},
- {"name": "alias", "type": ["string", "null"]},
- {"name": "age", "type": ["int", "null"]},
- {"name": "sex", "type": ["string", "null"]},
- {"name": "interet", "type": {"type": "array", "items": "string"}}
- ]
- }
2. 生成bean
生成protobuf bean
protoc --java_out=. student.proto
生成thrift bean
thrift-0.10.0.exe -gen java student.thrift
生成avro bean
java -jar lib/avro-tools-1.7.7.jar compile schema src/main/resource/student.avsc src/main/java
3. 编写测试代码
- public class ProtoBuf {
- private final static int TIMES = 10000000;
- public static void main(String[] args) throws IOException {
- long start = System.currentTimeMillis();
- for(int i = 0; i < TIMES; i++) {
- deserialize();
- }
- long end = System.currentTimeMillis();
- System.out.println("ProtoBuf total time \t" + (end -start));
- }
- public static byte [] serializeAsBytes() {
- return makeStudent().build().toByteArray();
- }
- public static InputStream serializeAsStream() throws IOException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- makeStudent().build().writeDelimitedTo(baos);
- return new ByteArrayInputStream(baos.toByteArray());
- }
- private static Builder makeStudent() {
- StudentProto.Student.Builder student = StudentProto.Student.newBuilder();
- student.setName("小明");
- student.setSex(0);
- student.setAge(18);
- List<String> interests = new ArrayList<String>();
- interests.add("吃饭");
- interests.add("睡觉");
- interests.add("打豆豆");
- student.addAllInterest(interests);
- return student;
- }
- public static void deserialize() throws IOException {
- byte [] bytes = serializeAsBytes();
- StudentProto.Student student = StudentProto.Student.parseFrom(bytes);
- System.out.println(student.getName());
- }
- }
- public class Thrift {
- private final static int TIMES = 10000000;
- private final static TSerializer SERIALIZER = new TSerializer(new TBinaryProtocol.Factory());
- private final static TDeserializer DESERIALIZER = new TDeserializer(new TBinaryProtocol.Factory());
- public static void main(String[] args) throws TException {
- long start = System.currentTimeMillis();
- for(int i = 0; i < TIMES; i++) {
- deserialize();
- }
- long end = System.currentTimeMillis();
- System.out.println("Thrift total time \t" + (end -start));
- }
- public static byte [] serialize() throws TException {
- StudentThrift stu = new StudentThrift();
- stu.setName("小明");
- stu.setSex(0);
- stu.setAge(18);
- List<String> interests = new ArrayList<String>();
- interests.add("吃饭");
- interests.add("睡觉");
- interests.add("打豆豆");
- stu.setInterest(interests);
- return SERIALIZER.serialize(stu);
- }
- public static void deserialize() throws TException {
- byte [] bytes = serialize();
- StudentThrift stu = new StudentThrift();
- DESERIALIZER.deserialize(stu, bytes);
- // System.out.println(stu);
- }
- }
- public class AVRO {
- private final static int TIMES = 10000000;
- public static void main(String[] args) throws IOException {
- long start = System.currentTimeMillis();
- deserializeAsBytes();
- long end = System.currentTimeMillis();
- System.out.println("Avro total time \t" + (end -start));
- }
- public static byte [] serializeAsBytes() throws IOException {
- StudentAvro student = new StudentAvro();
- student.setName("小明");
- student.setSex("女");
- student.setAge(18);
- List<CharSequence> interests = new ArrayList<CharSequence>();
- interests.add("吃饭");
- interests.add("睡觉");
- interests.add("打豆豆");
- student.setInteret(interests);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DatumWriter<StudentAvro> userDatumWriter = new SpecificDatumWriter<StudentAvro>(StudentAvro.class);
- DataFileWriter<StudentAvro> dataFileWriter = new DataFileWriter<StudentAvro>(userDatumWriter);
- dataFileWriter.create(student.getSchema(), baos);
- dataFileWriter.append(student);
- dataFileWriter.close();
- return baos.toByteArray();
- }
- public static byte [] serializeAsBytes(int times) throws IOException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DatumWriter<StudentAvro> userDatumWriter = new SpecificDatumWriter<StudentAvro>(StudentAvro.class);
- DataFileWriter<StudentAvro> dataFileWriter = new DataFileWriter<StudentAvro>(userDatumWriter);
- dataFileWriter.create(new StudentAvro().getSchema(), baos);
- for(int i = 0; i < times; i++) {
- StudentAvro student = new StudentAvro();
- student.setName("小明");
- student.setSex("女");
- student.setAge(18);
- List<CharSequence> interests = new ArrayList<CharSequence>();
- interests.add("吃饭");
- interests.add("睡觉");
- interests.add("打豆豆");
- student.setInteret(interests);
- dataFileWriter.append(student);
- }
- dataFileWriter.close();
- return baos.toByteArray();
- }
- public static void deserializeAsBytes() throws IOException {
- SeekableByteArrayInput sbai = new SeekableByteArrayInput(serializeAsBytes(TIMES));
- DatumReader<StudentAvro> datumReader = new SpecificDatumReader<StudentAvro>(StudentAvro.class);
- DataFileReader<StudentAvro> dataFileReader = new DataFileReader<StudentAvro>(sbai, datumReader);
- StudentAvro user = null;
- while (dataFileReader.hasNext()) {
- user = dataFileReader.next(user);
- System.out.println(user.getName());
- }
- dataFileReader.close();
- }
- }
4. 测试结果
private final static int TIMES = 100000;
ProtoBuf total time 282
ProtoBuf total time 282
Thrift total time
229
Avro total time 694
private final static int TIMES = 1000000;
ProtoBuf total time 988
Avro total time 694
private final static int TIMES = 1000000;
ProtoBuf total time 988
Thrift total time
1248
Avro total time 2079
private final static int TIMES = 10000000;
ProtoBuf total time 7368
Avro total time 2079
private final static int TIMES = 10000000;
ProtoBuf total time 7368
Thrift total time
10675
Avro total time 15025
Avro total time 15025
4.1 小结
(包含测试代码及三种schema和生成bean的工具等)
6.protobuf分析
6.1 protobuf特点
(a)占用空间小
一条消息数据,用protobuf序列化后的大小是json的10分之一,xml格式的20分之一,是二进制序列化的10分之一(极端情况下,会大于等于直接序列化),总体看来ProtoBuf的优势还是很明显的。
(b)解析速度快
解析速度快,主要归功于protobuf对message 没有动态解析,没有了动态解析的处理序列化速度自然快了。就比如xml ,获取文件之后,还需要解析标签、节点、字段,每一个都需要遍历,而protobuf不需要,直接将field装入流。
(c)兼容性好
fieldNumber 为每个field定义一个编号,其一保证不重复,其二保证其在流中的位置。如若当前数据流中有某个字段,而解析方没有相关的解析代码,解析放会直接skip 吊这个field,而且读数据的position也会后移,保证后续读取不出问题。
- private StudentProto.Student request(int age) {
- StudentProto.Student.Builder builder = StudentProto.Student.newBuilder();
- builder.setName("小明");
- builder.setSex(0);
- builder.setAge(age);
- return builder.build();
- }
- static final int TAG_TYPE_BITS = 3;
- /** Makes a tag value given a field number and wire type. */
- static int makeTag(final int fieldNumber, final int wireType) {
- return (fieldNumber << TAG_TYPE_BITS) | wireType;
- }
字符串存储 :leg+value,leg是字符串的长度
分析字节流: