User:Kmcguire/JavaClassLoadAndExecute

From OSDev.wiki
Revision as of 01:04, 14 May 2012 by Pancakes (talk | contribs) (added link the extended project based from sources)
Jump to navigation Jump to search

This page is about reading and interpreting Java bytecode. The code examples below are old and are by no means complete. However, they may serve some purpose as guidance. I have been working on a VM and have come a long ways from the code below, but also my code has become more complex and for someone who is interested in writting a VM for their OS the code below may prove to be much more useful than my full project.

You can find the project here. It is still simple by my effort but it is much larger. I hope to finish it to a point where it can be useful as a base for usage in my own toy OS. Or, useful in yours. I have made effort so far as to not reply on but the bare minimal of the standard library. Also, one day I will change out my malloc usage for a more in house solution.

https://code.google.com/p/rhino-java-virtual-machine/

Here are some resources which will allow you to implement a full virtual machine:

http://murrayc.com/learning/java/java_classfileformat.shtml
http://en.wikipedia.org/wiki/Java_bytecode_instruction_listings
http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.7.3

Like I said the code below is sort of a guide so you can kind of see what is important to implement. That will help keep you going by being able to run your test files while you build it. For instance most of the constant pool types are not going to be used but by looking at just the specification it is hard to tell.

Also, most instructions are not used. So you can pretty much work through it as you go by adding in some catch when it finds an unknown opcode.

Below is an example of loading multiple class files and interpreting some basic instructions. Also demonstrated is maintaining type information on the stack and in local variables. No garbage collection is needed for the Python code. In the C example code the infrastructure for garbage collection is there, but no code written to actually perform the collection.

C Example

#include <stdio.h>
#include <malloc.h>
#include <string.h>

typedef unsigned long long int  uint64;
typedef signed long long int    int64;
typedef unsigned int            uint32;
typedef unsigned short          uint16;
typedef unsigned char           uint8;
typedef signed int              int32;
typedef signed short            int16;
typedef signed char             int8;

#define JVM_SUCCESS                      1
#define JVM_ERROR_METHODNOTFOUND        -1
#define JVM_ERROR_OUTOFMEMORY           -2
#define JVM_ERROR_UNKNOWNOPCODE         -3
#define JVM_ERROR_CLASSNOTFOUND         -4

#define JVM_STACK_ISOBJECTREF   0x00000001
#define JVM_STACK_ISBYTE        0x00000002
#define JVM_STACK_ISCHAR        0x00000003
#define JVM_STACK_ISDOUBLE      0x00000004
#define JVM_STACK_ISFLOAT       0x00000005
#define JVM_STACK_ISINT         0x00000006
#define JVM_STACK_ISLONG        0x00000007
#define JVM_STACK_ISSHORT       0x00000008
#define JVM_STACK_ISBOOL        0x00000009
#define JVM_STACK_ISARRAY       0x00000010

typedef struct _JVMStack {
  uint64                *data;
  uint32                *flags;
  uint32                pos;
  uint32                max;
} JVMStack;

typedef struct _JVMLocal {
  uint64                data;
  uint32                flags;
} JVMLocal;

void jvm_StackInit(JVMStack *stack, uint32 max) {
  stack->max = max;
  stack->pos = 0;
  stack->data = (uint64*)malloc(sizeof(uint64) * max);
  stack->flags = (uint32*)malloc(sizeof(uint32) * max);
}

int jvm_StackMore(JVMStack *stack) {
  return stack->pos;
}

void jvm_StackPush(JVMStack *stack, uint64 value, uint32 flags) {
  stack->flags[stack->pos] = flags;
  stack->data[stack->pos++] = value;
}

void jvm_StackPop(JVMStack *stack, JVMLocal *local) {
  local->flags = stack->flags[--stack->pos];
  local->data = stack->data[stack->pos];
}

typedef struct _JVMConstPoolItem {
  uint8			type;
} JVMConstPoolItem;

typedef struct _JVMConstPoolMethodRef {
  JVMConstPoolItem	hdr;
  uint32		nameIndex;		// name index
  uint32		descIndex;		// descriptor index
} JVMConstPoolMethodRef;

typedef struct _JVMConstPoolClassInfo {
  JVMConstPoolItem	hdr;
  uint32		nameIndex;
} JVMConstPoolClassInfo;

typedef struct _JVMConstPoolUtf8 {
  JVMConstPoolItem	hdr;
  uint16		size;
  uint8			*string;
} JVMConstPoolUtf8;

typedef struct _JVMConstPoolNameAndType {
  JVMConstPoolItem	hdr;
  uint32		nameIndex;
  uint32		descIndex;
} JVMConstPoolNameAndType;

typedef struct _JVMConstPoolFieldRef {
  JVMConstPoolItem	hdr;
  uint32		classIndex;
  uint32		nameAndTypeIndex;
} JVMConstPoolFieldRef;

typedef struct _JVMClassField {
  uint16		accessFlags;
  uint16		nameIndex;
  uint16		descIndex;
  uint16		attrCount;
} JVMClassField;

typedef struct _JVMAttribute {
  uint16		nameIndex;
  uint32		length;
  uint8			*info;
} JVMAttribute;

typedef struct _JVMMethod {
  uint16		accessFlags;
  uint16		nameIndex;
  uint16		descIndex;
  uint16		attrCount;
  JVMAttribute		*attrs;
} JVMMethod;

typedef struct _JVMClass {
  uint16		poolCnt;
  JVMConstPoolItem	**pool;
  uint16		accessFlags;
  uint16		thisClass;
  uint16		superClass;
  uint16		ifaceCnt;
  uint16		*interfaces;
  uint16		fieldCnt;
  JVMClassField		*fields;
  uint16		methodCnt;
  JVMMethod		*methods;
  uint16		attrCnt;
  JVMAttribute		*attrs;
} JVMClass;

typedef struct _JVMBundleClass {
  struct _JVMBundleClass	*next;
  JVMClass			*jclass;
  const char                    *nameSpace;
} JVMBundleClass;

typedef struct _JVMBundle {
  JVMBundleClass		*first;
} JVMBundle;

typedef struct _JVMMemoryStream {
  uint8			*data;
  uint32		pos;
  uint32		size;
} JVMMemoryStream;

typedef struct _JVMObject {
  struct _JVMObject             *next;
  JVMClass                      *class;
  uint64                        *fields;
  struct _JVMObject             *refs;
  int32                         stackCnt;
  
} JVMObject;

typedef struct _JVM {
  JVMObject             *objects;
} JVM;


// java stores all integers in big-endian
#define LENDIAN
#ifdef LENDIAN
#define noths(x) ((x) >> 8 | ((x) & 0xff) << 8)
#define nothl(x) ((x) >> 24 | ((x) & 0xff0000) >> 8 | ((x) & 0xff00) << 8 | (x) << 24)
#endif
#ifdef BENDIAN
#define noths(x) x
#define nothl(x) x
#endif

void msWrap(JVMMemoryStream *m, void *buf, uint32 size) {
  m->pos = 0;
  m->data = buf;
  m->size = size;
}

uint32 msRead32(JVMMemoryStream *m) {
  uint32		v;
  v = ((uint32*)&m->data[m->pos])[0];
  m->pos += 4;
  return nothl(v);
}
uint16 msRead16(JVMMemoryStream *m) {
  uint16		v;
  v = ((uint16*)&m->data[m->pos])[0];
  m->pos += 2;
  return noths(v);
}

uint8 msRead8(JVMMemoryStream *m) {
  uint8			v;
  v = ((uint8*)&m->data[m->pos])[0];
  m->pos += 1;
  return v;
}

uint8* msRead(JVMMemoryStream *m, uint32 sz, uint8 *buf) {
  uint32	x;
  uint32	p;
  
  p = m->pos;
  for (x = 0; x < sz; ++x)
    buf[x] = m->data[x + p];
  
  m->pos += sz;
  return buf;
}

#define TAG_METHODREF			10
#define TAG_CLASSINFO			7
#define TAG_NAMEANDTYPE			12
#define TAG_UTF8			1
#define TAG_FIELDREF			9

JVMClass* jvm_LoadClass(JVMMemoryStream *m) {
  uint16			vmin;
  uint16			vmaj;
  uint32			magic;
  uint16			cpoolcnt;
  uint8				tag;
  JVMConstPoolItem		**pool;
  JVMConstPoolItem		*pi;
  int				x, y;
  JVMConstPoolMethodRef		*pimr;
  JVMConstPoolClassInfo		*pici;
  JVMConstPoolUtf8		*piu8;
  JVMConstPoolNameAndType	*pint;
  JVMConstPoolFieldRef		*pifr;
  JVMClass			*class;
  
  magic = msRead32(m);
  vmin = msRead16(m);
  vmaj = msRead16(m);
  
  class = (JVMClass*)malloc(sizeof(JVMClass));
  /*
    ==================================
    LOAD CONSTANT POOL TABLE
    ==================================
  */
  cpoolcnt = msRead16(m); 
  pool = (JVMConstPoolItem**)malloc(sizeof(JVMConstPoolItem*) * cpoolcnt);
  class->poolCnt = cpoolcnt;
  class->pool = pool;
  fprintf(stderr, "cpoolcnt:%u", cpoolcnt);
  for (x = 0; x < cpoolcnt - 1; ++x) {
    tag = msRead8(m);
    switch (tag) {
      case TAG_METHODREF:
	pimr = (JVMConstPoolMethodRef*)malloc(sizeof(JVMConstPoolMethodRef));
	pool[x] = (JVMConstPoolItem*)pimr;
	pimr->nameIndex = msRead16(m);
	pimr->descIndex = msRead16(m);
	pimr->hdr.type = TAG_METHODREF;
	fprintf(stderr, "nameIndex:%u descIndex:%u\n", pimr->nameIndex, pimr->descIndex);
	break;
      case TAG_CLASSINFO:
	pici = (JVMConstPoolClassInfo*)malloc(sizeof(JVMConstPoolClassInfo));
	pool[x] = (JVMConstPoolItem*)pici;
	pici->nameIndex = msRead16(m);
	pici->hdr.type = TAG_CLASSINFO;
	break;
      case TAG_UTF8:
	piu8 = (JVMConstPoolUtf8*)malloc(sizeof(JVMConstPoolUtf8));
	pool[x] = (JVMConstPoolItem*)piu8;
	piu8->size = msRead16(m);
	piu8->string = (uint8*)malloc(piu8->size + 1);
	msRead(m, piu8->size, piu8->string);
	piu8->string[piu8->size] = 0;
	piu8->hdr.type = TAG_UTF8;
	fprintf(stderr, "TAG_UTF8: size:%u string:%s\n", piu8->size, piu8->string);
	break;
      case TAG_NAMEANDTYPE:
	pint = (JVMConstPoolNameAndType*)malloc(sizeof(JVMConstPoolNameAndType));
	pool[x] = (JVMConstPoolItem*)pint;
	pint->nameIndex = msRead16(m);
	pint->descIndex = msRead16(m);
	pint->hdr.type = TAG_NAMEANDTYPE;
	fprintf(stderr, "TAG_NAMEANDTYPE: nameIndex:%u descIndex:%u\n", pint->nameIndex,
		pint->descIndex);
	break;
      case TAG_FIELDREF:
	pifr = (JVMConstPoolFieldRef*)malloc(sizeof(JVMConstPoolFieldRef));
	pool[x] = (JVMConstPoolItem*)pifr;
	pifr->classIndex = msRead16(m);
	pifr->nameAndTypeIndex = msRead16(m);
	pifr->hdr.type = TAG_FIELDREF;
	fprintf(stderr, "classIndex:%u nameAndTypeIndex:%u\n", pifr->classIndex, 
		pifr->nameAndTypeIndex);
	break;
      default:
	fprintf(stderr, "unknown tag %u in constant pool\n\n", tag);
	exit(-1);
    }
  }
  /*
    ====================
    LOAD SMALL PARAMETERS
    ====================
  */
  class->accessFlags = msRead16(m);
  class->thisClass = msRead16(m);
  class->superClass = msRead16(m);
  /*
    =====================
    LOAD INTERFACES
    =====================
  */
  class->ifaceCnt = msRead16(m);
  printf("class->ifaceCnt:%u\n", class->ifaceCnt);
  class->interfaces = (uint16*)malloc(class->ifaceCnt);
  for (x = 0; x < class->ifaceCnt; ++x)
    class->interfaces[x] = msRead16(m);
  /*
    ======================
    LOAD FIELDS
    ======================
  */
  class->fieldCnt = msRead16(m);
  class->fields = (JVMClassField*)malloc(sizeof(JVMClassField) * class->fieldCnt);
  for (x = 0; x < class->fieldCnt; ++x) {
    class->fields[x].accessFlags = msRead16(m);
    class->fields[x].nameIndex = msRead16(m);
    class->fields[x].descIndex = msRead16(m);
    class->fields[x].attrCount = msRead16(m);
    fprintf(stderr, "accessFlags:%u nameIndex:%u descIndex:%u attrCount:%u\n",
	    class->fields[x].accessFlags,
	    class->fields[x].nameIndex,
	    class->fields[x].descIndex,
	    class->fields[x].attrCount
    );
  }
  /*
    =======================
    LOAD METHODS
    =======================
  */
  class->methodCnt = msRead16(m);
  class->methods = (JVMMethod*)malloc(sizeof(JVMMethod) * class->methodCnt);
  for (x = 0; x < class->methodCnt; ++x) {
    class->methods[x].accessFlags = msRead16(m);
    class->methods[x].nameIndex = msRead16(m);
    class->methods[x].descIndex = msRead16(m);
    class->methods[x].attrCount = msRead16(m);
    class->methods[x].attrs = (JVMAttribute*)malloc(sizeof(JVMAttribute) * 
      class->methods[x].attrCount);
    for (y = 0; y < class->methods[x].attrCount; ++y) {
      class->methods[x].attrs[y].nameIndex = msRead16(m);
      class->methods[x].attrs[y].length = msRead32(m);
      class->methods[x].attrs[y].info = (uint8*)malloc(
	      class->methods[x].attrs[y].length);
      fprintf(stderr, "name:%s\n", 
	      ((JVMConstPoolUtf8*)class->pool[class->methods[x].attrs[y].nameIndex - 1])->string
	      );
      fprintf(stderr, "attrlen:%u\n", class->methods[x].attrs[y].length);
      msRead(m, class->methods[x].attrs[y].length, class->methods[x].attrs[y].info);
    }
  }
  /*
    ======================
    LOAD ATTRIBUTES
    ======================
  */
  class->attrCnt = msRead16(m);
  class->attrs = (JVMAttribute*)malloc(sizeof(JVMAttribute) * class->attrCnt);
  for (x = 0; x < class->attrCnt; ++x) {
    class->attrs[x].nameIndex = msRead16(m);
    class->attrs[x].length = msRead32(m);
    class->attrs[x].info = (uint8*)malloc(class->attrs[x].length);
    msRead(m, class->attrs[x].length, class->attrs[x].info);
  }
  
  return class;
}

JVMClass* jvm_FindClassInBundle(JVMBundle *bundle, const char *className) {
  JVMBundleClass	        *jbclass;
  JVMConstPoolUtf8              *u;
  uint32                        a;
  JVMConstPoolClassInfo         *b;
  JVMConstPoolUtf8              *c;
  
  for (jbclass = bundle->first; jbclass != 0; jbclass = jbclass->next) {
    
    a = jbclass->jclass->thisClass;
    b = (JVMConstPoolClassInfo*)jbclass->jclass->pool[a - 1];
    c = (JVMConstPoolUtf8*)jbclass->jclass->pool[b->nameIndex - 1];
    printf("looking for class [%s]=?[%s]\n", className, c->string);
    if (strcmp(c->string, className) == 0)
      return jbclass->jclass;
  }
  /*
    This is where you want to do code to search externally to the
    bundle, and if needed load the missing class into memory.
  */
  return 0;
}

JVMMethod* jvm_FindMethodInClass(JVMClass *jclass, const char *methodName, const char *methodType) {
  JVMConstPoolUtf8      *a;
  JVMConstPoolUtf8      *b;
  int                   x;
  
  for (x = 0; x < jclass->methodCnt; ++x) {
    /// get method name
    a = (JVMConstPoolUtf8*)jclass->pool[jclass->methods[x].nameIndex - 1];
    /// get method type
    b = (JVMConstPoolUtf8*)jclass->pool[jclass->methods[x].descIndex - 1];
    fprintf(stderr, "findmeth:%s%s\n", a->string, b->string);
    if (strcmp(a->string, methodName) == 0)
      if (strcmp(b->string, methodType) == 0)
        return &jclass->methods[x];
  }
  fprintf(stderr, "could not find method %s of type %s\n", methodName, methodType);
  return 0;
}

int jvm_IsMethodReturnTypeVoid(const char *typestr) {
  int           x;

  /// go all the way to the return type part
  for (x = 0; typestr[x] != ')'; ++x);
  ++x;
  /// anything other than 'V' is non-void
  if (typestr[x] == 'V')
    return 1;
  return 0;
}

int jvm_GetMethodTypeArgumentCount(const char *typestr) {
  /*
      B byte
      C char
      D double
      F float
      I int
      J long
      L classname;
      S short
      Z boolean
  */
  int           x;
  int           c;
  
  /// chop off first parathesis
  typestr++;
  c = 0;
  /// read each type
  for (x = 0; typestr[x] != ')'; ++x) {
    switch (typestr[x]) {
      case 'L':
        c++;
        /// run until we find semi-colon
        for (++x; typestr[x] != ';' && x < 20; ++x);
        break;
      default:
        c++;
        break;
    }
  }

  return c;
}

static int g_dbg_ec = 0;

int jvm_ExecuteObjectMethod(JVM *jvm, JVMBundle *bundle, JVMClass *jclass, JVMObject *jobject,
                         const char *methodName, const char *methodType,
                         JVMLocal *_locals, uint8 localCnt, JVMLocal *_result) {
  JVMMethod                     *method;
  JVMConstPoolUtf8              *a;
  int                           x, y;
  JVMLocal                      *locals;
  uint32                        codesz;
  uint8                         *code;
  uint8                         opcode;
  JVMStack                      stack;
  JVMLocal                      result;

  JVMClass                      *_jclass;
  JVMObject                     *_jobject;
  JVMMethod                     *_method;
  JVMConstPoolMethodRef         *b;
  JVMConstPoolClassInfo         *c;
  JVMConstPoolNameAndType       *d;
  int                           argcnt;
  uint8                         *mclass;
  uint8                         *mmethod;
  uint8                         *mtype;

  g_dbg_ec++;
  if (g_dbg_ec == 3) {
    printf("g_dbg_ec=%u\n", g_dbg_ec);
    exit(-9);
  }
  
  jvm_StackInit(&stack, 1024);
  
  printf("executing %s\n", methodName);
  
  /// 255 should be maximum local addressable
  locals = (JVMLocal*)malloc(sizeof(JVMLocal) * 256);
  /// copy provided arguments into locals
  for (x = 0; x < localCnt; ++x) {
    locals[x].data = _locals[x].data;
    locals[x].flags = _locals[x].flags;
  }
  /// load object into local_0
  locals[0].data = (uint64)jobject;
  locals[0].flags = JVM_STACK_ISOBJECTREF;
  /// the stack references the objects
  jobject->stackCnt++;
  /// find method specifiee
  method = jvm_FindMethodInClass(jclass, methodName, methodType);
  if (!method) {
    fprintf(stderr, "JVM_ERROR_METHODNOTFOUND");
    return JVM_ERROR_METHODNOTFOUND;
  }
  /// find code attribute
  for (x = 0; x < method->attrCount; ++x) {
    a = (JVMConstPoolUtf8*)jclass->pool[method->attrs[x].nameIndex - 1];
    if (strcmp(a->string, "Code") == 0) {
      code = method->attrs[x].info;
      codesz = method->attrs[x].length;
      break;
    }
  }

  printf("execute code\n");
  /// execute code
  for (x = 0; x < codesz;) {
    opcode = code[x];
    printf("opcode(%u/%u):%x\n", x, codesz, opcode);
    switch (opcode) {
      /// nop: no operation
      case 0:
        x += 2;
        break;
      /// aload
      case 0x19:
        y = code[x+1];
        jvm_StackPush(&stack, locals[y].data, locals[y].flags);
        if (locals[x].flags == JVM_STACK_ISOBJECTREF)
          ((JVMObject*)locals[0].data)->stackCnt++;
        x += 2;
        break;
      /// aload_0: load a reference onto the stack from local variable 0
      case 0x2a:
        jvm_StackPush(&stack, locals[0].data, locals[0].flags);
        if (locals[0].flags == JVM_STACK_ISOBJECTREF)
          ((JVMObject*)locals[0].data)->stackCnt++;
        x += 1;
        break;
      /// aload_1
      case 0x2b:
        jvm_StackPush(&stack, locals[1].data, locals[1].flags);
        if (locals[1].flags == JVM_STACK_ISOBJECTREF)
          ((JVMObject*)locals[1].data)->stackCnt++;
        x += 1;
        break;
      /// aload_2
      case 0x2c:
        jvm_StackPush(&stack, locals[2].data, locals[2].flags);
        if (locals[2].flags == JVM_STACK_ISOBJECTREF)
          ((JVMObject*)locals[2].data)->stackCnt++;
        x += 1;
        break;
      /// aload_3
      case 0x2d:
        jvm_StackPush(&stack, locals[3].data, locals[3].flags);
        if (locals[3].flags == JVM_STACK_ISOBJECTREF)
          ((JVMObject*)locals[3].data)->stackCnt++;
        x += 1;
        break;
      /// invokevirtual
      case 0xb6:
      /// invokespecial
      case 0xb7:
         /*
            (1) verify objref is indeed an object reference
                use the type info on the stack
            (2) verify objref is a reference to the described object
            (3) verify the number of arguments are correct
         */
         y = code[x+1] << 8 | code[x+2];
         
         b = (JVMConstPoolMethodRef*)jclass->pool[y - 1];
         c = (JVMConstPoolClassInfo*)jclass->pool[b->nameIndex - 1];
         a = (JVMConstPoolUtf8*)jclass->pool[c->nameIndex - 1];
         // a->string is className of class we are calling method on
         mclass = a->string;

         /// if java/lang/Object just pretend we did
         if (strcmp(mclass, "java/lang/Object") == 0) {
          printf("caught java/lang/Object call and skipped it\n");
          x +=3 ;
          break;
         }
         
         d = (JVMConstPoolNameAndType*)jclass->pool[b->descIndex - 1];
         a = (JVMConstPoolUtf8*)jclass->pool[d->nameIndex - 1];
         // a->string is the method of the class
         _jclass = jvm_FindClassInBundle(bundle, mclass);
         mmethod = a->string;

         a = (JVMConstPoolUtf8*)jclass->pool[d->descIndex - 1];
         // a->string is the type description of the method
         mtype = a->string;

         _method = jvm_FindMethodInClass(_jclass, mmethod, mtype);
         
         argcnt = jvm_GetMethodTypeArgumentCount(a->string);
         
         printf("invokespecial: %s:%s[%u] in %s\n", mmethod, mtype, argcnt, mclass);

         /// pop locals from stack into local variable array
         _locals = (JVMLocal*)malloc(sizeof(JVMLocal) * (argcnt + 2));
         _locals[0].data = (uint64)jobject;
         _locals[0].flags = JVM_STACK_ISOBJECTREF;
         for (y = 0; y < argcnt; ++y) {
           jvm_StackPop(&stack, &result); 
           _locals[y + 1].data = result.data;
           _locals[y + 1].flags = result.flags;
         }
         /// pop object reference from stack
         jvm_StackPop(&stack, &result);
         if (!(result.flags & JVM_STACK_ISOBJECTREF)) {
           fprintf(stderr, "object from stack is not object reference!");
           exit(-8);
         }
         
         /// stays the same since we poped it then placed it into locals
         //((JVMObject*)result.data)->stackCnt

         _locals[0].data = result.data;
         _locals[1].flags = result.flags;

         printf("########################");
         jvm_ExecuteObjectMethod(jvm, bundle, _jclass, jobject, mmethod, mtype, locals, argcnt + 1, &result);
         free(_locals);

         printf("@@@@@@@@@@@%s\n", mtype);

         /// need to know if it was a void return or other
         if (!jvm_IsMethodReturnTypeVoid(mtype)) {
          /// push result onto stack
          printf("return type not void!\n");
          jvm_StackPush(&stack, result.data, result.flags);
         } else {
           printf("return type void..\n");
         }
         
         x += 3;
         break;
      /// return: void from method
      case 0xb1:
         /// need to go through and decrement reference count of objects on stack and local
         while (jvm_StackMore(&stack)) {
           jvm_StackPop(&stack, &result);
           if (result.flags & JVM_STACK_ISOBJECTREF) {
             fprintf(stderr, "function return void decrement stack object ref %u\n", ((JVMObject*)result.data)->stackCnt);
             ((JVMObject*)result.data)->stackCnt--;
           }
         }
         /// also do local variables the same, decrement obj references
         for (y = 0; y < 256; ++y) {
           if (locals[y].flags & JVM_STACK_ISOBJECTREF) {
              fprintf(stderr, "function return void decrement local object ref %u\n", ((JVMObject*)result.data)->stackCnt);
              ((JVMObject*)result.data)->stackCnt--;
           }
         }
         return JVM_SUCCESS;
      default:
        fprintf(stderr, "unknown opcode %x\n", opcode);
        return JVM_ERROR_UNKNOWNOPCODE;
    }
  }
  
  return JVM_SUCCESS;
}

int jvm_CreateObject(JVM *jvm, JVMBundle *bundle, const char *className, JVMObject **out) {
  JVMClass                      *jclass;
  JVMObject                     *jobject;
  JVMLocal                      result;
  JVMLocal                      locals[1];
  
  *out = 0;
  /// find class and create instance
  printf("create object with class %s\n", className);
  jclass = jvm_FindClassInBundle(bundle, className);
  if (!jclass)
    return JVM_ERROR_CLASSNOTFOUND;
  jobject = (JVMObject*)malloc(sizeof(JVMObject));
  *out = jobject;
  if (!jobject)
    return JVM_ERROR_OUTOFMEMORY;
  memset(*out, 0, sizeof(JVMObject));
  jobject->class = jclass;
  /// link us into global object chain
  jobject->next = jvm->objects;
  jvm->objects = jobject;
  /// execute init method
  locals[0].data = (uint64)jobject;
  locals[0].flags = JVM_STACK_ISOBJECTREF;
  /// call default constructor (no arguments)
  return jvm_ExecuteObjectMethod(jvm, bundle, jclass, jobject, "<init>", "()V", &locals[0], 1, &result);
}

uint8* jvm_ReadWholeFile(const char *path, uint32 *size) {
  uint8         *buf;
  FILE          *fp;
  
  fp = fopen(path, "rb");
  fseek(fp, 0, 2);
  *size = ftell(fp);
  fseek(fp, 0, 0);
  buf = (uint8*)malloc(*size);
  fread(buf, 1, *size, fp);
  fclose(fp);
  return buf;
}

void jvm_AddClassToBundle(JVMBundle *jbundle, JVMClass *jclass) {
  JVMBundleClass                *jbclass;

  jbclass = (JVMBundleClass*)malloc(sizeof(JVMBundleClass));
  jbclass->jclass = jclass;

  jbclass->next = jbundle->first;
  jbundle->first = jbclass;

  return;
}

int main(int argc, char *argv[])
{
  uint8			*buf;
  JVMMemoryStream	m;
  JVMClass		*jclass;
  JVMBundle		jbundle;
  JVMBundleClass	*jbclass;
  JVM                   jvm;
  JVMObject             *jobject;
  uint32                size;
  int                   result;

  buf = jvm_ReadWholeFile("Apple.class", &size);
  msWrap(&m, buf, size);
  jclass = jvm_LoadClass(&m);
  jvm_AddClassToBundle(&jbundle, jclass);
  
  buf = jvm_ReadWholeFile("Test.class", &size);
  msWrap(&m, buf, size);
  jclass = jvm_LoadClass(&m);
  jvm_AddClassToBundle(&jbundle, jclass);

  jvm.objects = 0;

  /// create initial object
  result = jvm_CreateObject(&jvm, &jbundle, "Test", &jobject);
  
  return 1;
}

Python Example

#!/usr/bin/python3.1
import os
import sys
import struct
import pprint

def funpack(fd, fmt):
	sz = struct.calcsize(fmt)
	data = fd.read(sz)
	return struct.unpack_from(fmt, data)

def javaReadClass(fd):
	hdr = {}
	magic, vermin, vermaj = funpack(fd, '>IHH')
	print('magic:%x vermin:%x vermaj:%x' % (magic, vermin, vermaj))
	constPoolCnt = funpack(fd, '>H')[0]
	# =========== const pool =========
	constPool = {}
	hdr['vermin'] = vermin
	hdr['vermaj'] = vermaj
	hdr['constPool'] = constPool
	x = 0
	while x < constPoolCnt - 1:
		x = x + 1
		tag = fd.read(1)
		# method ref
		if tag[0] == 10:
			e = {}
			e['name_index'], e['descriptor_index'] = funpack(fd, '>HH')
			e['type'] = 10
			constPool[x] = e
			continue
		# classinfo
		if tag[0] == 7:
			e = {}
			e['name_index'] = funpack(fd, '>H')[0]
			e['type'] = 7
			constPool[x] = e
			continue
		# utf8
		if tag[0] == 1:
			sz = funpack(fd, '>H')[0]
			data = fd.read(sz)
			e = {}
			e['type'] = 1
			e['value'] = data 
			constPool[x] = e
			continue
		# nameandtype
		if tag[0] == 12:
			e = {}
			e['name_index'], e['descriptor_index'] = funpack(fd, '>HH')
			e['type'] = 12
			constPool[x] = e
			continue
		# fieldref
		if tag[0] == 9:
			e = {}
			e['classIndex'], e['nameAndTypeIndex'] = funpack(fd, '>HH')
			e['type'] = 9
			constPool[x] = e
			continue
		raise Exception('unknown tag %s' % tag[0])
	# -------------------------
	classAccessFlags, thisClass, superClass, ifaceCnt = funpack(fd, '>HHHH')
	print('accessFlags:%x' % classAccessFlags)
	cinfo = constPool[thisClass]
	hdr['name'] = constPool[cinfo['name_index']]['value']
	cinfo = constPool[superClass]
	hdr['super'] = constPool[cinfo['name_index']]['value']
	# ========= interfaces =========
	ifaces = {}
	hdr['ifaces'] = ifaces
	x = 0
	while x < ifaceCnt:
		iface = funpack(fd, '>H')
		cinfo = constPool['classinfo'][iface]
		ifaces[x] = cinfo
		x = x + 1
	# -------------------------------
	# ========= fields ==============
	fieldsCnt = funpack(fd, '>H')[0]
	fields = {}
	hdr['fields'] = fields
	x = 0
	while x < fieldsCnt:
		field = {}
		fields[x + 1] = field
		field['accessFlags'], field['name_index'], \
			field['descriptor_index'], field['attrCnt'] = \
				funpack(fd, '>HHHH')
		field['name'] = constPool[field['name_index']]['value']
		x = x + 1
	# -------------------------------
	# ========= methods ========
	methCnt = funpack(fd, '>H')[0]
	methods = {}
	hdr['methods'] = methods
	x = 0
	while x < methCnt:
		method = {}
		methods[x] = method
		method['access_flags'], method['name_index'], \
			method['descriptor_index'], method['attrCnt'] = \
				funpack(fd, '>HHHH')
		attrCnt = method['attrCnt']
		attrs = []
		method['attrs'] = attrs
		y = 0
		# ========== attributes ==========
		while y < attrCnt:
			attr = {}
			attrs.append(attr)
			attr['name_index'], attr['length'] = funpack(fd, '>HI')
			attr['name'] = constPool[attr['name_index']]['value']
			attr['info'] = fd.read(attr['length'])
			y = y + 1
		# --------------------------------
		x = x + 1
	# -------------------------------
	# ========== attributes =========
	attrCnt = funpack(fd, '>H')[0]
	attrs = []
	x = 0
	while x < attrCnt:
		attr = {}
		attrs.append(attr)
		attr['name_index'], attr['length'] = funpack(fd, '>HI')
		attr['name'] = constPool[attr['name_index']]['value']
		attr['info'] = fd.read(attr['length'])
		x = x + 1
	hdr['attrs'] = attrs
	# ======== resolve some data =======
	__methods = {}
	for k in hdr['methods']:
		meth = hdr['methods'][k]
		meth['name'] = hdr['constPool'][meth['name_index']]['value']
		meth['descriptor'] = hdr['constPool'][meth['descriptor_index']]['value']
		__methods[meth['name']] = meth
	hdr['methods'] = __methods
	return hdr

def javaGetClassMethodAndCode(jclass, methName):
	for mk in jclass['methods']:
		method = jclass['methods'][mk]
		if mk == methName:
			# look for code attribute
			for attr in method['attrs']:
				if attr['name'] == b'Code':
					# found code attribute
					return (method, attr['info'])
			
	raise Exception('not found %s::%s' % (jclass['name'], methName))
	return False

def mkhexstr(data):
	h = []
	for d in data:
		h.append('%02x ' % d)
	return ''.join(h)

TYPE_UNK = 0
TYPE_OBJ = 1
TYPE_INT = 2
TYPE_SHORT = 3
TYPE_LONG = 4
TYPE_NULL = 5
TYPE_FLOAT = 6
TYPE_DOUBLE = 7

dbgmap = {
	TYPE_UNK:		'UNKNOWN',
	TYPE_OBJ:		'OBJECT',
	TYPE_INT:		'INT',
	TYPE_SHORT:		'SHORT',
	TYPE_LONG:		'LONG',
	TYPE_NULL:		'NULL',
	TYPE_FLOAT:		'FLOAT',
	TYPE_DOUBLE:	'DOUBLE',
}

class Obj:
	fields = None
	methods = None
	jclass = None

'''
	Variant structure with easier printing support for debugging.
'''
class Var:
	btype = 0
	value = None
	def __repr__(self):
		if self.btype == TYPE_OBJ:
			return '<OBJECT fields=%s>' % self.value.fields
		else:
			return '<%s:%s>' % (dbgmap[self.btype], self.value)

'''
	The stack contains variant like objects. We store
	basic type information on each stack item. 
'''
def var_mk_unk():
	var = Var()
	var.btype = TYPE_UNK
	return var
def var_mk_obj(obj):
	var = Var()
	var.btype = TYPE_OBJ;
	var.value = obj
	return var
def var_mk_float(f):
	var = Var()
	var.btype = TYPE_FLOAT
	var.value = f
	return var
def var_mk_null():
	var = Var()
	var.btype = TYPE_NULL;
	return var
def var_mk_long(l):
	var = Var()
	var.btype = TYPE_LONG
	var.value = l
	return var
def var_mk_int(i):
	var = Var()
	var.btype = TYPE_INT
	var.value = i
	return var

'''
	(1) create object structure
	(2) find specified class in bundle via textual classname
	(3) create fields in object
	(4) create methods in object (**needed??**)
	(5) push object reference to local_0
	(6) execute <init> method of objet
	(7) add object to system instanced objects list
	(8) return the java object
'''
def javaObjectInstance(jsys, jclassname):
	jobj = Obj()
	# look through bundle and find jclassname
	jclass = jsys.bundle[jclassname]
	
	# initialize fields 
	x = 0
	fields = {}
	for field in jclass['fields']:
		fields[x] = var_mk_unk()
	methods = {}
	for method in jclass['methods']:
		methods[method] = jclass['methods'][method]
	jobj.methods = methods
	jobj.fields = fields
	jobj.jclass = jclass
	jobj.name = jclassname
	# need to execute initialization method
	# <init>
	local = {}
	local[0] = var_mk_obj(jobj)
	javaExecuteClassMethod(jsys, jobj, jclass, b'<init>', local)
	
	# make system ref obj created
	jsys.objects.append(jobj)
	return jobj 

'''
	(1) get the code for the method in the javaClass
	(2) execute each opcode
'''
def javaExecuteClassMethod(jsys, jobj, jclass, methName, local):
	method, code = javaGetClassMethodAndCode(jclass, methName)
	print(method)
	
	print('\033[31mjavaExecuteClassMethod(class:%s method:%s\033[37m' % (jobj.name, methName))
	print(mkhexstr(code))
	# execution stack for method
	stack = []

	x = 0
	csz = len(code)
	while x < csz:
		opcode = code[x]
		print('===%02x===' % opcode)
		print(stack)
		# NOP
		if opcode == 0x00:
			x = x + 2
			continue
		# astore
		if opcode == 0x3a:
			ndx = code[x+1]
			local[ndx] = stack.pop(-1)
			x = x + 1
			continue
		# astore_0
		if opcode == 0x4b:
			local[0] = stack.pop(-1)
			x = x + 1
			continue
		# astore_1
		if opcode == 0x4c:
			local[1] = stack.pop(-1)
			x = x + 1
			continue
		# astore_2
		if opcode == 0x4d:
			local[2] = stack.pop(-1)
			x = x + 1
			continue
		# astore_3
		if opcode == 0x4e:
			local[3] = stack.pop(-1)
			x = x + 1
			continue
		# iload
		if opcode == 0x15:
			ndx = code[x+1]
			stack.append(local[ndx])
			x = x + 2
			continue
		# iload_0
		if opcode == 0x1a:
			stack.append(local[0])
			x = x + 1
			continue
		# iload_1
		if opcode == 0x1b:
			stack.append(local[1])
			x = x + 1
			continue
		# iload_2
		if opcode == 0x1c:
			stack.append(local[2])
			x = x + 1
			continue
		# iload_3
		if opcode == 0x1d:
			stack.append(local[3])
			x = x + 1
			continue
		# lstore
		if opcode == 0x37:
			ndx = code[x+1]
			local[ndx] = stack.pop(-1)
			x = x + 2
			continue
		# lstore_0
		if opcode == 0x3f:
			local[0] = stack.pop(-1)
			x = x + 1
			continue
		# lstore_1
		if opcode == 0x40:
			local[1] = stack.pop(-1)
			x = x + 1
			continue
		# lstore_2
		if opcode == 0x41:
			local[2] = stack.pop(-1)
			x = x + 1
			continue
		# lstore_3
		if opcode == 0x42:
			local[3] = stack.pop(-1)
			x = x + 1
			continue
		# aconst_null: push null onto the stack
		if opcode == 0x01:
			stack.append(var_mk_null())
			x = x + 1
			continue
		# lconst_0: push long(0) onto the stack
		if opcode == 0x09:
			stack.append(var_mk_long(0))
			x = x + 1
			continue
		# iconst_5: push long(5) onto the stack
		if opcode == 0x08:
			stack.append(var_mk_long(5))
			x = x + 1
			continue
		# putstatic: set static field to value in the class
		if opcode == 0xb3:
			iby1 = code[x+1]
			iby2 = code[x+2]
			frefndx = iby1 << 8 + iby2
			print('0xb3', frefndx)
			raise Exception('not implemented')
			x = x + 3
			continue
		# iconst_m1
		if opcode == 0x02:
			stack.append(var_mk_int(-1))
			x = x + 1
			continue
		# iconst_0
		if opcode == 0x03:
			stack.append(var_mk_int(0))
			x = x + 1
			continue
		# iconst_1
		if opcode == 0x04:
			stack.append(var_mk_int(1))
			x = x + 1
			continue
		# iconst_2
		if opcode == 0x05:
			stack.append(var_mk_int(2))
			x = x + 1
			continue
		# iconst_3
		if opcode == 0x06:
			stack.append(var_mk_int(3))
			x = x + 1
			continue
		# iconst_4
		if opcode == 0x07:
			stack.append(var_mk_int(4))
			x = x + 1
			continue
		# iconst_5
		if opcode == 0x08:
			stack.append(var_mk_int(5))
			x = x + 1
		# fconst_0
		if opcode == 0x0b:
			stack.append(var_mk_float(0.0))
			x = x + 1
			continue
		# fconst_1
		if opcode == 0x0c:
			stack.append(var_mk_float(1.0))
			x = x + 1
			continue
		# fconst_2
		if opcode == 0x0d:
			stack.append(var_mk_float(2.0))
			x = x + 1
			continue
		# aload_0: load a ref onto the stack from locvar 0
		if opcode == 0x2a:
			stack.append(local[0])
			x = x + 1
			continue
		# aload_0: load a ref onto the stack from locvar 0
		if opcode == 0x2b:
			stack.append(local[1])
			x = x + 1
			continue
		# aload_0: load a ref onto the stack from locvar 0
		if opcode == 0x2c:
			stack.append(local[2])
			x = x + 1
			continue
		# aload_0: load a ref onto the stack from locvar 0
		if opcode == 0x2d:
			stack.append(local[3])
			x = x + 1
			continue
		# putfield: set field to value in object
		if opcode == 0xb5:
			ndx = code[x+1] << 8 | code[x+2]
			#print('stack:%s' % stack)
			value = stack.pop(-1)
			objref = stack.pop(-1)
			print('objref:%s' % objref)
			if objref.btype != TYPE_OBJ:
				print('attempt to set field on non-object')
				return 0
			objref.value.fields[ndx] = value
			print('putfield:%x\n' % ndx)
			x = x + 3
			continue
		# return void from method
		if opcode == 0xb1:
			return var_mk_null()
		# getfield
		if opcode == 0xb4:
			ndx = code[x+1] << 8 | code[x+2]
			objref = stack.pop(-1)
			value = objref.value.fields[ndx]
			stack.append(value)
			x = x + 3
			continue
		# iadd
		if opcode == 0x60:
			value1 = stack.pop(-1)
			value2 = stack.pop(-1)
			value3 = Var()
			value3.btype = TYPE_INT
			value3.value = value1.value + value2.value
			stack.append(value3)
			x = x + 1
			continue
		# return integer from method
		if opcode == 0xac:
			value = stack.pop(-1)
			return value
		# bipush: push a byte onto the stack as
		#         an integer value
		if opcode == 0x10:
			byte = code[x+1]
			var = Var()
			var.btype = TYPE_INT
			var.value = int(byte)
			stack.append(var)
			x = x + 2
			continue
		# idiv: divide two integers
		if opcode == 0x6c:
			value1 = stack.pop(-1)
			value2 = stack.pop(-1)
			value3 = Var()
			value3.btype = TYPE_INT;
			value3.value = int(value2.value / value1.value)
			stack.append(value3)
			x = x + 1
			continue
		# isub
		if opcode == 0x64:
			value1 = stack.pop(-1)
			value2 = stack.pop(-1)
			value3 = Var()
			value3.btype = TYPE_INT;
			value3.value = int(value2.value - value1.value)
			stack.append(value3)
			x = x + 1
			continue
		# imul: multiply two integers
		if opcode == 0x68:
			value1 = stack.pop(-1)
			value2 = stack.pop(-1)
			value3 = Var()
			value3.btype = TYPE_INT;
			value3.value = int(value2.value * value1.value)
			stack.append(value3)
			x = x + 1
			continue
		# invokespecial
		if opcode == 0xb7:
			ndx = code[x+1] << 8 | code[x+2]
			# nameandtype const
			constPool = jclass['constPool']
			m = constPool[ndx]
			name = constPool[m['name_index']]
			desc = constPool[m['descriptor_index']]
			print('..name:%s desc:%s' % (name, desc))
			_name = constPool[name['name_index']]['value']
			name = constPool[desc['name_index']]['value']
			desc = constPool[desc['descriptor_index']]['value']
			
			# just ignore built-in stuff for now until
			# i can try to get it actually created when
			# get a chance
			if _name == b'java/lang/Object':
				print('\033[32mattempted call for %s:%s' % (name, desc))
				x = x + 3
				continue
			
			print('_name:%s name:%s descriptor:%s' % (_name, name, desc))
			
			# handles invokation with arguments
			local = {}
			y = 0
			while stack[-1].btype != TYPE_OBJ:
				local[y] = stack.pop(-1)
				y = y + 1
			objref = stack.pop(-1)
			# reverse arguments
			sz = len(local)
			nlocal = {}
			for k in local:
				nlocal[sz - k] = local[k]
			nlocal[0] = objref

			local = {}
			ret = javaExecuteClassMethod(jsys, objref.value, objref.value.jclass, name, nlocal)
			x = x + 3
			continue
		# dup: duplicate the value on top stack
		if opcode == 0x59:
			stack.append(stack[-1])
			x = x + 1
			continue
		# new
		if opcode == 0xbb:
			ndx = code[x+1] << 8 | code[x+2]
			constPool = jclass['constPool']
			m = constPool[ndx]
			m = constPool[m['name_index']]['value']
			# find apple in our bundle or try loading
			# it from disk in the relative directory
			if m not in jsys.bundle:
				raise Exception('Not In Bundle! [not-implemented]')
			# new java object
			njobj = javaObjectInstance(jsys, m)
			stack.append(var_mk_obj(njobj))
			x = x + 3
			continue
		if opcode == 0xb6:
			#print(stack)
			ndx = code[x+1] << 8 | code[x+2]
			# need to get virtual method and find out
			# how many arguments it needs
			
			# pop from stack until we reach an object type
			local = {}
			y = 0
			while stack[-1].btype != TYPE_OBJ:
				local[y] = stack.pop(-1)
				y = y + 1
			objref = stack.pop(-1)
			# reverse arguments
			sz = len(local)
			nlocal = {}
			for k in local:
				nlocal[sz - k] = local[k]
			nlocal[0] = objref
			
			#print('local:%s' % local)
			#print('nlocal:%s' % nlocal)
			#print('objref:%s' % objref)
			
			constPool = jclass['constPool']
			m = constPool[ndx]
			print('m:%s' % m)
			'''
				If I follow desc out I get the name of the
				method we are calling and its type info.
				
				But, if you follow name you end up with
				the name of the current class.
			'''
			name = constPool[m['name_index']]
			desc = constPool[m['descriptor_index']]
			print('..name:%s desc:%s' % (name, desc))
			_name = constPool[name['name_index']]['value']
			name = constPool[desc['name_index']]['value']
			desc = constPool[desc['descriptor_index']]['value']
			
			print(_name, name, desc)
			
			# objref is the actual instance of an object
			# name identifies the virtual method
			# desc describes the return type and arguments
			ret = javaExecuteClassMethod(jsys, objref.value, objref.value.jclass, name, nlocal)
			print('~~~ ret from call ~~~')
			print('@@', ret)
			stack.append(ret)
			x = x + 3
			continue
			
			
		print('stack:%s' % stack)
		raise Exception('opcode not understood %x' % opcode)

class JavaSystem:
	bundle = {}
	objects = []

jsys = JavaSystem()

fd = open('Apple.class', 'rb')
jclass = javaReadClass(fd)
jsys.bundle[jclass['name']] = jclass
fd.close()

fd = open('Test.class', 'rb')
jclass = javaReadClass(fd)
jsys.bundle[jclass['name']] = jclass
fd.close()

jobj = javaObjectInstance(jsys, b'Test')

local = {}
local[0] = var_mk_obj(jobj)
ret = javaExecuteClassMethod(jsys, jobj, jclass, b'main', local)
print('------ return -------')
pprint.pprint(ret)