jvm开发笔记---class文件解析器(转)

时间:2021-03-26 17:19:51

笔 者最近对java虚拟机产生了浓厚的兴趣, 想了解下最简单的jvm是如何写出来的,于是看起了《java虚拟机规范》,这个规范如同intel开发手册一样,是每个jvm开发人员必须掌握的。 要想翻译执行java byte code, 首先得从java class文件中把Code属性解析出来才行。 在笔者看来, java的class文件结构着实比elf文件结构复杂很多,不过在复杂的结构, 只要耐心对照着手册中的结构一一解析即可, 经过几天的努力, 用c实现了一个class文件解析器,目前它只能解析手册中规定的jvm最基本的要解析出来的一些属性:Code, StackMapTable, LineNumberTable。当然, 随着开发的深入, 它会不断的健壮起来。

下面说说我在解析java class文件格式中碰到的几个问题, 帮助后面也要自己动手写解析器的朋友少走一点弯路:

1、为了提高解析性能, 使用了mmap讲class文件全部映射到内存中, 而不是每次解析都要使用read读磁盘文件。

int mmap_class_file(const char *class_file)
{
struct stat f_stat;

class_fd = open(class_file, O_RDONLY);
if (class_fd == -1) {
perror("open");
return -1;
}

if (stat(class_file, &f_stat) == -1) {
perror("stat");
close(class_fd);
return -1;
}

class_file_len = f_stat.st_size;
printf("%s file len: %d\n", class_file, class_file_len);

class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0);
if (!class_start_mem) {
perror("mmap");
close(class_fd);
return -1;
}
printf("mmap %s at %p\n", class_file, class_start_mem);

return 0;
}

 

2、java class使用的是big-endian字节序,x86使用的litte-endian字节序, 所以要转换一下,就是移位操作而已。

#define CLASS_READ_U4(s, p)                             \
do { \
s = (((p >> 24) & 0x000000ff) | \
((p >> 8) & 0x0000ff00) | \
((p << 24) & 0xff000000) | \
((p << 8) & 0x00ff0000)); \
} while (0);

#define CLASS_READ_U2(s, p) \
do { \
s = (((p >> 8) & 0x00ff) | \
((p << 8) & 0xff00)); \
} while (0);

#define CLASS_READ_U1(s, p) \
do { \
s = p; \
} while (0);

 

例如读一个4字节内容:

 

u4 class_magic;

/* read class magic number. */
CLASS_READ_U4(class_magic, (*(u4 *)p_mem))
p_mem = 4;

printf("magic: 0x%x\n", class_magic);

 



////////////////////////////////////////////////////////////////////////////////////////////

下面是全部的源码:

 

jvm.h

#ifndef JVM_H
#define JVM_H

#define JVM_CLASS_MAGIC 0xcafebabe

#define CLASS_READ_U4(s, p) \
do { \
s = (((p >> 24) & 0x000000ff) | \
((p >> 8) & 0x0000ff00) | \
((p << 24) & 0xff000000) | \
((p << 8) & 0x00ff0000)); \
} while (0);

#define CLASS_READ_U2(s, p) \
do { \
s = (((p >> 8) & 0x00ff) | \
((p << 8) & 0xff00)); \
} while (0);

#define CLASS_READ_U1(s, p) \
do { \
s = p; \
} while (0);

#define CLASS_READ_STRING(s, p, len) \
do { \
memcpy(s, p, len); \
} while (0);

typedef unsigned int u4;
typedef unsigned short u2;
typedef unsigned char u1;

#define CONSTANT_Class 7
#define CONSTANT_Fieldref 9
#define CONSTANT_Methodref 10
#define CONSTANT_InterfaceMethodref 11
#define CONSTANT_String 8
#define CONSTANT_Integer 3
#define CONSTANT_Float 4
#define CONSTANT_Long 5
#define CONSTANT_Double 6
#define CONSTANT_NameAndType 12
#define CONSTANT_Utf8 1
#define CONSTANT_MethodHandle 15
#define CONSTANT_MethodType 16
#define CONSTANT_InvokeDynamic 18

#define ACC_PUBLIC 0x0001
#define ACC_FINAL 0x0010
#define ACC_SUPER 0x0020
#define ACC_INTERFACE 0x0200
#define ACC_ABSTRACT 0X0400
#define ACC_SYNTHETIC 0x1000
#define ACC_ANNOTATION 0x2000
#define ACC_ENUM 0x4000

#define METHOD_ACC_PUBLIC 0x0001
#define METHOD_ACC_PRIVATE 0x0002
#define METHOD_ACC_PROTECTED 0x0004
#define METHOD_ACC_STATIC 0x0008
#define METHOD_ACC_FINAL 0x0010
#define METHOD_ACC_SYNCHRONIED 0x0020
#define METHOD_ACC_BRIDGE 0x0040
#define METHOD_ACC_VARARGS 0x0080
#define METHOD_ACC_NATIVE 0x0100
#define METHOD_ACC_ABSTRACT 0x0400
#define METHOD_ACC_STRICT 0x0800
#define METHOD_ACC_SYNTHETIC 0x1000

#define ITEM_Top 0
#define ITEM_Integer 1
#define ITEM_Float 2
#define ITEM_Double 3
#define ITEM_Long 4
#define ITEM_Null 5
#define ITEM_UninitializedThis 6
#define ITEM_Object 7
#define ITEM_Uninitialized 8

struct constant_info_st {
u2 index;
u1 *base;
}__attribute__ ((packed));

struct cp_info {
u1 tag;
u1 info[];
}__attribute__ ((packed));

struct CONSTANT_Class_info {
//u1 tag;
u2 name_index;
}__attribute__ ((packed));

struct CONSTANT_Fieldref_info {
//u1 tag;
u2 class_index;
u2 name_and_type_index;
}__attribute__ ((packed));

struct CONSTANT_Methodref_info {
//u1 tag;
u2 class_index;
u2 name_and_type_index;
}__attribute__ ((packed));

struct CONSTANT_InterfaceMethodref_info {
//u1 tag;
u2 class_index;
u2 name_and_type_inex;
}__attribute__ ((packed));

struct CONSTANT_String_info {
//u1 tag;
u2 string_index;
}__attribute__ ((packed));

struct CONSTANT_Integer_info {
//u1 tag;
u4 bytes;
}__attribute__ ((packed));

struct CONSTANT_Float_info {
//u1 tag;
u4 bytes;
}__attribute__ ((packed));

struct CONSTANT_Long_info {
//u1 tag;
u4 high_bytes;
u4 low_bytes;
}__attribute__ ((packed));

struct CONSTANT_Double_info {
//u1 tag;
u4 high_bytes;
u4 low_bytes;
}__attribute__ ((packed));

struct CONSTANT_NameAndType_info {
//u1 tag;
u2 name_index;
u2 descriptor_index;
}__attribute__ ((packed));

struct CONSTANT_Utf8_info {
//u1 tag;
u2 length;
u1 bytes[];
}__attribute__ ((packed));

struct CONSTANT_MethodHandle_info {
//u1 tag;
u1 reference_kind;
u2 reference_index;
}__attribute__ ((packed));

struct CONSTANT_MethodType_info {
//u1 tag;
u2 descriptor_index;
}__attribute__ ((packed));

struct CONSTANT_InvokeDynamic_info {
//u1 tag;
u2 bootstrap_method_attr_index;
u2 name_and_type_index;
}__attribute__ ((packed));

#endif

////////////////////////////////////////////////////////////////////////

classreader.c:

/*
* classreader.c - jvm class file parser.
*
* (c) wzt 2012 http://www.cloud-sec.org
*
*/

#include
#include
#include
#include
#include
#include
#include
#include
#include

#include "jvm.h"

static int class_fd;
static int class_file_len;
static void *class_start_mem;
static char *p_mem;
static struct constant_info_st *constant_info;

int mmap_class_file(const char *class_file)
{
struct stat f_stat;

class_fd = open(class_file, O_RDONLY);
if (class_fd == -1) {
perror("open");
return -1;
}

if (stat(class_file, &f_stat) == -1) {
perror("stat");
close(class_fd);
return -1;
}

class_file_len = f_stat.st_size;
printf("%s file len: %d\n", class_file, class_file_len);

class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0);
if (!class_start_mem) {
perror("mmap");
close(class_fd);
return -1;
}
printf("mmap %s at %p\n", class_file, class_start_mem);

return 0;
}

int mmap_exit(void)
{
if (munmap(class_start_mem, class_file_len) == -1) {
perror("munmap");
return -1;
}

close(class_fd);
return 0;
}

int parse_class_magic(void)
{
u4 class_magic;

/* read class magic number. */
CLASS_READ_U4(class_magic, (*(u4 *)p_mem))
p_mem = 4;

printf("magic: 0x%x\n", class_magic);
if (class_magic != JVM_CLASS_MAGIC) {
printf("jvm class magic not match.\n");
return -1;
}
printf("jvm class magic match: 0x%x\n", class_magic);
return 0;
}

int parse_class_version(void)
{
u2 minor_version, major_version;
u2 constant_pool_count;

/* read class minor_version. */
CLASS_READ_U2(minor_version, (*(u2 *)p_mem))
p_mem = 2;
printf("jvm class minor_version: %d\n", minor_version);

/* read class major_version. */
CLASS_READ_U2(major_version, (*(u2 *)p_mem))
p_mem = 2;
printf("jvm class major_version: %d\n", major_version);

return 0;
}

int parse_class_constant(void)
{
u2 constant_pool_count;
u1 constant_tag;
u2 idx;

printf("\n-----------parse contant pool count----------------------:\n\n");
/* read constant_pool_count */
CLASS_READ_U2(constant_pool_count, (*(u2 *)p_mem))
p_mem = 2;
printf("jvm constant_pool_count: %d\n", constant_pool_count);

constant_info = (struct constant_info_st *)
malloc(sizeof(struct constant_info_st) *
constant_pool_count);
if (!constant_info) {
printf("Malloc failed.\n");
return -1;
}

for (idx = 1; idx <= constant_pool_count - 1; idx ) {
CLASS_READ_U1(constant_tag, (*(u1 *)p_mem))
p_mem = 1;
printf("- idx: - constant tag: %d\t", idx, (int)constant_tag);
switch (constant_tag) {
case CONSTANT_Fieldref:
case CONSTANT_Methodref:
case CONSTANT_InterfaceMethodref:
{
struct CONSTANT_Methodref_info methodref_info;

CLASS_READ_U2(methodref_info.class_index, (*(u2 *)p_mem));
p_mem = 2;
assert(methodref_info.class_index > 0 &&
methodref_info.class_index < constant_pool_count);

CLASS_READ_U2(methodref_info.name_and_type_index, (*(u2 *)p_mem));
p_mem = 2;
assert(methodref_info.class_index > 0 &&
methodref_info.class_index < constant_pool_count);

printf("class_index: %d, name_and_type_index: %d\n",
methodref_info.class_index,
methodref_info.name_and_type_index);
break;
}
case CONSTANT_Class:
{
struct CONSTANT_Class_info class_info;

CLASS_READ_U2(class_info.name_index, (*(u2 *)p_mem));
p_mem = 2;
assert(class_info.name_index > 0 &&
class_info.name_index < constant_pool_count);
printf("name_index: %d\n", class_info.name_index);
break;
}
case CONSTANT_String:
{
struct CONSTANT_String_info string_info;

CLASS_READ_U2(string_info.string_index, (*(u2 *)p_mem));
p_mem = 2;
assert(string_info.string_index > 0 &&
string_info.string_index < constant_pool_count);
printf("string index: %d\n", string_info.string_index);
break;
}
case CONSTANT_Long:
{
struct CONSTANT_Long_info long_info;

CLASS_READ_U2(long_info.high_bytes, (*(u2 *)p_mem));
p_mem = 2;

CLASS_READ_U2(long_info.low_bytes, (*(u2 *)p_mem));
p_mem = 2;

printf("high bytes: %d, low bytes: %d\n",
long_info.high_bytes, long_info.low_bytes);
break;
}
case CONSTANT_Integer:
{
struct CONSTANT_Integer_info integer_info;

CLASS_READ_U4(integer_info.bytes, (*(u4 *)p_mem));
p_mem = 4;
printf("bytes: %d\n", integer_info.bytes);
break;
}
case CONSTANT_Float:
{
struct CONSTANT_Float_info float_info;

CLASS_READ_U4(float_info.bytes, (*(u4 *)p_mem));
p_mem = 4;
printf("bytes: %d\n", float_info.bytes);
break;
}
case CONSTANT_Double:
{
struct CONSTANT_Double_info double_info;

CLASS_READ_U4(double_info.high_bytes, (*(u4 *)p_mem));
p_mem = 4;

CLASS_READ_U4(double_info.low_bytes, (*(u4 *)p_mem));
p_mem = 4;
printf("high_bytes: %d, low_bytes: %d\n",
double_info.high_bytes, double_info.low_bytes);
break;
}
case CONSTANT_NameAndType:
{
struct CONSTANT_NameAndType_info name_type_info;

CLASS_READ_U2(name_type_info.name_index, (*(u2 *)p_mem));
p_mem = 2;

CLASS_READ_U2(name_type_info.descriptor_index, (*(u2 *)p_mem));
p_mem = 2;

printf("name_index: %d, descriptor_index: %d\n",
name_type_info.name_index, name_type_info.descriptor_index);
break;
}
case CONSTANT_MethodHandle:
{
struct CONSTANT_MethodHandle_info method_handle_info;

CLASS_READ_U1(method_handle_info.reference_kind, (*(u1 *)p_mem));
p_mem = 1;

CLASS_READ_U2(method_handle_info.reference_index, (*(u2 *)p_mem));
p_mem = 2;

printf("reference_kind: %d, reference_index: %d\n",
method_handle_info.reference_kind,
method_handle_info.reference_index);
break;
}
case CONSTANT_MethodType:
{
struct CONSTANT_MethodType_info method_type_info;

CLASS_READ_U2(method_type_info.descriptor_index, (*(u2 *)p_mem));
p_mem = 2;

printf("descriptor_index %d\n", method_type_info.descriptor_index);
break;
}
case CONSTANT_InvokeDynamic:
{
struct CONSTANT_InvokeDynamic_info invoke_dyc_info;

CLASS_READ_U2(invoke_dyc_info.bootstrap_method_attr_index, (*(u2 *)p_mem));
p_mem = 2;

CLASS_READ_U2(invoke_dyc_info.name_and_type_index, (*(u2 *)p_mem));
p_mem = 2;

printf("bootstrap_method_attr_index: %d, name_and_type_index: %d\n",
invoke_dyc_info.bootstrap_method_attr_index,
invoke_dyc_info.name_and_type_index);
break;
}
case CONSTANT_Utf8:
{
u2 len;
char *buf;

CLASS_READ_U2(len, (*(u2 *)p_mem));
p_mem = 2;

buf = malloc(len 1);
buf[len] = '\0';
assert(buf != NULL);

memcpy(buf, p_mem, len);
printf("len: %d\t%s\n", len, buf);
p_mem = len;

constant_info[idx].index = idx;
constant_info[idx].base = buf;
break;
}
default:
;
}
}
printf("\n");
/*
for (idx = 1; idx <= constant_pool_count - 1; idx )
printf("%d: %s\n", constant_info[idx].index, constant_info[idx].base);
*/
return 0;

out:
mmap_exit();
return -1;
}

int parse_class_access_flag(void)
{
u2 access_flag;

/* read class access flag. */
CLASS_READ_U2(access_flag, (*(u2 *)p_mem))
p_mem = 2;

printf("access_flag: 0x%x\n", access_flag);
return 0;
}
int parse_class_this_super(void)
{
u2 this_class;
u2 super_class;

CLASS_READ_U2(this_class, (*(u2 *)p_mem))
p_mem = 2;

CLASS_READ_U2(super_class, (*(u2 *)p_mem))
p_mem = 2;

printf("this_class: %d\tsuper_class: %d\n\n", this_class, super_class);
return 0;
}

int parse_class_interface(void)
{
u2 interfaces_count;
u2 idx, index;

CLASS_READ_U2(interfaces_count, (*(u2 *)p_mem))
p_mem = 2;
printf("interfaces_count: %d\n", interfaces_count);

for (idx = 0; idx < interfaces_count; idx ) {
CLASS_READ_U2(index, (*(u2 *)p_mem));
p_mem = 2;
printf("index: %d\n", index);
}

return 0;
}

int parse_class_filed(void)
{
u2 fileds_count;
u2 idx;

CLASS_READ_U2(fileds_count, (*(u2 *)p_mem))
p_mem = 2;
printf("filed_count: %d\n", fileds_count);

return 0;
}
int __parse_exception_table(int len)
{
u2 start_pc, end_pc;
u2 handler_pc, catch_type;
u2 idx;

for (idx = 0; idx < len; idx ) {
CLASS_READ_U2(start_pc, (*(u2 *)p_mem))
p_mem = 2;
printf("start_pc: %d\n", start_pc);

CLASS_READ_U2(end_pc, (*(u2 *)p_mem))
p_mem = 2;
printf("end_pc: %d\n", end_pc);

CLASS_READ_U2(handler_pc, (*(u2 *)p_mem))
p_mem = 2;
printf("handler_pc: %d\n", handler_pc);

CLASS_READ_U2(catch_type, (*(u2 *)p_mem))
p_mem = 2;
printf("catch_type: %d\n", catch_type);
}

return 0;
}

int __parse_line_number_table(void)
{
u4 attribute_length;
u2 line_number_table_length;
u2 start_pc, line_number;
u2 idx;

CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
p_mem = 4;
printf("\t\tattribute_length: %d\n", attribute_length);

CLASS_READ_U2(line_number_table_length, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tline_number_table_length: %d\n", line_number_table_length);

for (idx = 0; idx < line_number_table_length; idx ) {
CLASS_READ_U2(start_pc, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tstart_pc: %d\n", start_pc);

CLASS_READ_U2(line_number, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tline_number: %d\n", line_number);
}

return 0;
}

int __parse_verification_type_info(u1 number)
{
u1 idx, tag;

for (idx = 0; idx < number; idx ) {
CLASS_READ_U1(tag, (*(u1 *)p_mem))
p_mem = 1;
printf("\t\ttag: %d\n", tag);
switch (tag) {
case ITEM_Top:
printf("\t\tITEM_Top.\n");
break;
case ITEM_Integer:
printf("\t\tITEM_Integer.\n");
break;
case ITEM_Float:
printf("\t\tITEM_float.\n");
break;
case ITEM_Double:
printf("\t\tITEM_Double.\n");
break;
case ITEM_Long:
printf("\t\tITEM_Long.\n");
break;
case ITEM_Null:
printf("\t\tITEM_Long.\n");
break;
case ITEM_UninitializedThis:
printf("\t\tITEM_UninitializedThis.\n");
break;
case ITEM_Object:
{
u2 cpool_index;

printf("\t\tITEM_Object.\n");
CLASS_READ_U2(cpool_index, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tcpool_index: %d\n", cpool_index);
break;
}
case ITEM_Uninitialized:
{
u2 offset;

printf("\t\tITEM_Uninitialized.\n");
CLASS_READ_U2(offset, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\toffset: %d\n", offset);
break;
}
default:
return -1;
}
}

return 0;
}

int __parse_stack_map_frame(u2 number)
{
u1 frame_type;
u1 offset_delta;
u2 idx;
u1 stack_num;
u1 locals_num;
u1 local_idx;

for (idx = 0; idx < number; idx ) {
CLASS_READ_U1(frame_type, (*(u1 *)p_mem))
p_mem = 1;
printf("\t\tframe_type: %d\n", frame_type);

if (frame_type >= 0 && frame_type <= 63) {
offset_delta = frame_type;
printf("\t\tsame_frame\toffset_delta: %d\n", offset_delta);
}
if (frame_type >= 64 && frame_type <= 127) {
offset_delta = frame_type - 64;
stack_num = 1;
printf("\t\tsame_locals_l_stack_item_frame\toffset_delta: %d\n",
offset_delta);
__parse_verification_type_info(stack_num);
}
if (frame_type == 247) {
stack_num = 1;
CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tsame_locals_l_stack_item_frame_extendedn\toffset_delta: %d\n",
offset_delta);
__parse_verification_type_info(stack_num);
}
if (frame_type >= 248 && frame_type <= 250) {
CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tsame_locals_l_stack_item_frame_extended\toffset_delta: %d\n",
offset_delta);
}
if (frame_type == 251) {
CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tsame_frame_extended\toffset_delta: %d\n", offset_delta);
}
if (frame_type >= 252 && frame_type <= 254) {
CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tappend_frame\toffset_delta: %d\n", offset_delta);

locals_num = frame_type - 251;
printf("\t\tlocals_num: %d\n", locals_num);

__parse_verification_type_info(locals_num);
}
}
}
int __parse_stack_map_table(void)
{
u4 attribute_length;
u2 number_of_entries;
u2 idx;

CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
p_mem = 4;
printf("\t\tattribute_length: %d\n", attribute_length);

CLASS_READ_U2(number_of_entries, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tnumber_of_entries: %d\n", number_of_entries);

__parse_stack_map_frame(number_of_entries);

return 0;
}
/* attribute_name_index has been parsed before. */
int parse_code_attribute(void)
{
u2 attribute_name_index;
u4 attribute_length;
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 *code;
u2 exception_table_length;
u2 attributes_count;
u2 idx;

CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
p_mem = 4;
printf("\tattribute_length: %d\n", attribute_length);

CLASS_READ_U2(max_stack, (*(u2 *)p_mem))
p_mem = 2;
printf("\tmax_stack: %d\n", max_stack);

CLASS_READ_U2(max_locals, (*(u2 *)p_mem))
p_mem = 2;
printf("\tmax_locals: %d\n", max_locals);

CLASS_READ_U4(code_length, (*(u4 *)p_mem))
p_mem = 4;
printf("\tcode_length: %d\n", code_length);

code = (u1 *)malloc(code_length 1);
if (!code) {
printf("Malloc failed.\n");
return -1;
}
memcpy(code, p_mem, code_length);
code[code_length] = '\0';
p_mem = code_length;

CLASS_READ_U2(exception_table_length, (*(u2 *)p_mem))
p_mem = 2;
printf("\texception_table_length: %d\n", exception_table_length);

__parse_exception_table(exception_table_length);

CLASS_READ_U2(attributes_count, (*(u2 *)p_mem))
p_mem = 2;
printf("\tattributes_count: %d\n", attributes_count);

/* parse attributes */
for (idx = 0; idx < attributes_count; idx ) {
CLASS_READ_U2(attribute_name_index, (*(u2 *)p_mem))
p_mem = 2;
printf("\tidx: %d attribute_name_index: %d", idx 1, attribute_name_index);

if (!strcmp(constant_info[attribute_name_index].base, "LineNumberTable")) {
printf("\n\tparse LineNumberTable:\n");
__parse_line_number_table();
}
if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) {
printf("\n\tparse StackMapTable:\n");
__parse_stack_map_table();
}
if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTable")) {
;
}
if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTypeTable")) {
;
}
if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) {
;
}
}

return 0;
}

int parse_class_method(void)
{
u2 method_count;
u2 access_flags, name_index;
u2 descriptor_index, attributes_count;
u2 idx;

printf("\n---------------parse class method-------------------------:\n\n");
CLASS_READ_U2(method_count, (*(u2 *)p_mem))
p_mem = 2;
printf("method_count: %d\n", method_count);

for (idx = 0; idx < method_count; idx ) {
CLASS_READ_U2(access_flags, (*(u2 *)p_mem))
p_mem = 2;
printf("access_flags: 0x%x\n", access_flags);

CLASS_READ_U2(name_index, (*(u2 *)p_mem))
p_mem = 2;
printf("name_index: %d\n", name_index);

CLASS_READ_U2(descriptor_index, (*(u2 *)p_mem))
p_mem = 2;
printf("descriptor_index: %d\n", descriptor_index);

CLASS_READ_U2(attributes_count, (*(u2 *)p_mem))
p_mem = 2;
printf("attributes_count: %d\n\n", attributes_count);

/* parse attributes */
CLASS_READ_U2(name_index, (*(u2 *)p_mem))
p_mem = 2;
printf("attritbutes name_index: %d\n", name_index);

if (!strcmp(constant_info[name_index].base, "Code")) {
printf("parse code attribute:\n");
parse_code_attribute();
}
if (!strcmp(constant_info[name_index].base, "Exceptions")) {
;
}
if (!strcmp(constant_info[name_index].base, "Signature")) {
;
}
}

return 0;
}

int jvm_parse_class_file(const char *class_file)
{
assert(class_file != NULL);
if (mmap_class_file(class_file) == -1)
return -1;

p_mem = class_start_mem;
if (parse_class_magic() == -1)
goto out;

if (parse_class_version() == -1)
goto out;

if (parse_class_constant() == -1)
goto out;

if (parse_class_access_flag() == -1)
goto out;

if (parse_class_this_super() == -1)
goto out;

if (parse_class_interface() == -1)
goto out;

if (parse_class_filed() == -1)
goto out;

if (parse_class_method() == -1)
goto out;

mmap_exit();
return 0;
out:
mmap_exit();
return -1;
}

void jvm_usage(const char *proc)
{
fprintf(stdout, "usage: %s \n", proc);
}

int main(int argc, char **argv)
{
if (argc == 1) {
jvm_usage(argv[0]);
return 0;
}

jvm_parse_class_file(argv[1]);

return 0;
}


////////////////////////////////////////////////////////////////////////////
root@localhost.localdomain # gcc -o classreader classreader.c -w
root@localhost.localdomain # ./classreader test.class
test.class file len: 462
mmap test.class at 0x2b0b78fa5000
magic: 0xcafebabe
jvm class magic match: 0xcafebabe
jvm class minor_version: 0
jvm class major_version: 50

-----------parse contant pool count----------------------:

jvm constant_pool_count: 30
- idx: 1 constant tag: 10 class_index: 6, name_and_type_index: 16
- idx: 2 constant tag: 9 class_index: 17, name_and_type_index: 18
- idx: 3 constant tag: 8 string index: 19
- idx: 4 constant tag: 10 class_index: 20, name_and_type_index: 21
- idx: 5 constant tag: 7 name_index: 22
- idx: 6 constant tag: 7 name_index: 23
- idx: 7 constant tag: 1 len: 6
- idx: 8 constant tag: 1 len: 3 ()V
- idx: 9 constant tag: 1 len: 4 Code
- idx: 10 constant tag: 1 len: 15 LineNumberTable
- idx: 11 constant tag: 1 len: 4 main
- idx: 12 constant tag: 1 len: 22 ([Ljava/lang/String;)V
- idx: 13 constant tag: 1 len: 13 StackMapTable
- idx: 14 constant tag: 1 len: 10 SourceFile
- idx: 15 constant tag: 1 len: 9 test.java
- idx: 16 constant tag: 12 name_index: 7, descriptor_index: 8
- idx: 17 constant tag: 7 name_index: 24
- idx: 18 constant tag: 12 name_index: 25, descriptor_index: 26
- idx: 19 constant tag: 1 len: 4 hehe
- idx: 20 constant tag: 7 name_index: 27
- idx: 21 constant tag: 12 name_index: 28, descriptor_index: 29
- idx: 22 constant tag: 1 len: 4 test
- idx: 23 constant tag: 1 len: 16 java/lang/Object
- idx: 24 constant tag: 1 len: 16 java/lang/System
- idx: 25 constant tag: 1 len: 3 out
- idx: 26 constant tag: 1 len: 21 Ljava/io/PrintStream;
- idx: 27 constant tag: 1 len: 19 java/io/PrintStream
- idx: 28 constant tag: 1 len: 7 println
- idx: 29 constant tag: 1 len: 21 (Ljava/lang/String;)V

access_flag: 0x21
this_class: 5 super_class: 6

interfaces_count: 0
filed_count: 0

---------------parse class method-------------------------:

method_count: 2
access_flags: 0x1
name_index: 7
descriptor_index: 8
attributes_count: 1

attritbutes name_index: 9
parse code attribute:
attribute_length: 29
max_stack: 1
max_locals: 1
code_length: 5
exception_table_length: 0
attributes_count: 1
idx: 1 attribute_name_index: 10
parse LineNumberTable:
attribute_length: 6
line_number_table_length: 1
start_pc: 0
line_number: 5
access_flags: 0x9
name_index: 11
descriptor_index: 12
attributes_count: 1

attritbutes name_index: 9
parse code attribute:
attribute_length: 77
max_stack: 2
max_locals: 2
code_length: 24
exception_table_length: 0
attributes_count: 2
idx: 1 attribute_name_index: 10
parse LineNumberTable:
attribute_length: 22
line_number_table_length: 5
start_pc: 0
line_number: 7
start_pc: 2
line_number: 9
start_pc: 9
line_number: 10
start_pc: 17
line_number: 9
start_pc: 23
line_number: 11
idx: 2 attribute_name_index: 13
parse StackMapTable:
attribute_length: 7
number_of_entries: 2
frame_type: 252
append_frame offset_delta: 4
locals_num: 1
tag: 1
ITEM_Integer.
frame_type: 18
same_frame offset_delta: 18
root@localhost.localdomain #