代码虚拟机

代码虚拟化简介

虚拟化是用一套自定义的字节码来替换程序中的native指令，而字节码在执行的时候又由程序中的解释器来解释执行。自定义的字节码只有解释器才能识别，一般工具无法识别这些字节码，所以基于虚拟机的保护相对其他保护而言要更加难破解。其中的关系就像解释语言一样，不是系统可执行文件，不能直接在系统中运行，需要相应的解释器才能运行，如Python。

虚拟化技术应用广泛，如sandbox、程序保护壳等。很多时候为了防止恶意代码对我们的系统造成破坏，需要一个sandbox，使程序运行在sandbox中，即使恶意代码破坏系统，也只是破坏了sandbox，不会对系统造成影响。还有如vmp，shielden这些加密壳就是内置了一个虚拟机来实现对程序代码的保护。
基于虚拟机的代码保护也算是代码混淆技术的一种。代码混淆技术对保护代码很有效果，但是也存在着副作用，比如会或多或少降低程序效率，这一点在基于虚拟机的保护中格外突出，所以大多数基于虚拟机的保护都只是保护了其中比较重要的部分。
在基于虚拟机的保护技术中，通常自定义的字节码与native指令都存在着映射关系，一条或多条字节码对应于一条native指令。这是为了增加虚拟机保护被破解的难度，对被保护代码进行转化的时候就可以随机生成出多套字节码。

实现细节

1.定义一套字节码
这里只定义了常用的几个命令，可以再进行扩展。

enum OPCODES
{
    MOV = 0xa0, // mov指令字节码0xa0
    XOR = 0xa1, // xor指令字节码0xa1
    CMP = 0xa2, // cmp指令字节码0xa2
    RET = 0xa3, // ret指令字节码0xa3
    SYS_READ = 0xa4, // read系统调用字节码0xa4
    SYS_WRITE = 0xa5, // write系统调用字节码0xa5
    JNZ = 0xa6 // jnz指令字节码0xa6
};

2.实现解释器
实现解释器需要虚拟出一些自定义字节码运行的环境，与真实的运行环境类似，需要处理器、堆、栈，这里先定义一个虚拟的处理器：

typedef struct opcode_t
{
    unsigned char opcode; // 字节码
    void (*func)(void *); // 字节码对应的处理函数
} vm_opcode;
// 虚拟处理器
typedef struct processor_t
{
    int r1; // 虚拟存储器r1
    int r2; // 虚拟存储器r2
    int r3; // 虚拟存储器r3
    int r4; // 虚拟存储器r4
    int flag; // 虚拟标志寄存器flag，作用类似于eflags
    unsigned char *eip; // 虚拟寄存器eip，指向正在解释的字节码地址
    vm_opcode op_table[OPCODE_NUM]; // 字节码列表，存放了所有字节码与对应的处理函数
} vm_processor;

要保护的代码逻辑比较简单，所以只需要一个处理器就可以了，堆和栈不是必须的。程序中有一个全局的heap_buf用来存储数据，可以把这个缓冲区空间理解成堆或栈。
有了上面的两个结构，下面实现解释器：

// 执行字节码
void exec_opcode(vm_processor *proc)
{
    int flag = 0;
    int i = 0;
    // 查找eip指向的正在解释的字节码对应的处理函数
    while (!flag && i < OPCODE_NUM) {
        if (*proc->eip == proc->op_table[i].opcode) {
            flag = 1;
            // 查找到之后，调用本条指令的处理函数，由处理函数来解释
            proc->op_table[i].func((void *)proc);
        } else {
            i++;
        }
    }
}
// 虚拟机的解释器
void vm_interp(vm_processor *proc)
{
    // eip指向被保护代码的第一个字节
    // target_func + 4是为了跳过编译器生成的函数入口的代码
    proc->eip = (unsigned char *) target_func + 4;
    // 循环判断eip指向的字节码是否为返回指令，如果不是就调用exec_opcode来解释执行
    while (*proc->eip != RET) {
        exec_opcode(proc);
    }
}

其中target_func是保护的代码，用eip逐条语句执行target_func里的字节码。解释字节码时首先判断是哪一个指令需要执行，接着调用它的处理函数。
了解了整体思路之后，程序的运行过程其实很简单，直接看完整代码就能懂。

完整代码

codevm.h

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define OPCODE_NUM 7 // opcode number
#define HEAP_SIZE_MAX 1024

char *heap_buf; // 虚拟堆栈空间
// opcode enum
enum OPCODES
{
    MOV = 0xa0, // mov指令字节码0xa0
    XOR = 0xa1, // xor指令字节码0xa1
    CMP = 0xa2, // cmp指令字节码0xa2
    RET = 0xa3, // ret指令字节码0xa3
    SYS_READ = 0xa4, // read系统调用字节码0xa4
    SYS_WRITE = 0xa5, // write系统调用字节码0xa5
    JNZ = 0xa6 // jnz指令字节码0xa6
};

enum REGISTERS
{
    R1 = 0x10,
    R2 = 0x11,
    R3 = 0x12,
    R4 = 0x13,
    EIP = 0x14,
    FLAG = 0x15
};
// opcode struct
typedef struct opcode_t
{
    unsigned char opcode; // 字节码
    void (*func)(void *); // 字节码对应的处理函数
} vm_opcode;
// virtual processor
typedef struct processor_t
{
    int r1; // 虚拟存储器r1
    int r2; // 虚拟存储器r2
    int r3; // 虚拟存储器r3
    int r4; // 虚拟存储器r4
    int flag; // 虚拟标志寄存器flag，作用类似于eflags
    unsigned char *eip; // 虚拟寄存器eip，指向正在解释的字节码地址
    vm_opcode op_table[OPCODE_NUM]; // 字节码列表，存放了所有字节码与对应的处理函数
} vm_processor;

codevm.c

# include "codevm.h"
// 要保护的代码，替换native指令为自定义字节码
void target_func()
{
    __asm__ __volatile__(".byte 0xa0, 0x10, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x11, 0x12, 0x00, 0x00, 0x00, 0xa4, 0xa0, 0x14, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x11, 0x29, 0x00, 0x00, 0x00, 0xa1, 0xa2, 0x20, 0xa6, 0x5b, 0xa0, 0x14, 0x01, 0x00, 0x00, 0x00, 0xa1, 0xa2, 0x21, 0xa6, 0x50, 0xa0, 0x14, 0x02, 0x00, 0x00, 0x00, 0xa1, 0xa2, 0x22, 0xa6, 0x45, 0xa0, 0x14, 0x03, 0x00, 0x00, 0x00, 0xa1, 0xa2, 0x23, 0xa6, 0x3a, 0xa0, 0x14, 0x04, 0x00, 0x00, 0x00, 0xa1, 0xa2, 0x24, 0xa6, 0x2f, 0xa0, 0x14, 0x05, 0x00, 0x00, 0x00, 0xa1, 0xa2, 0x25, 0xa6, 0x24, 0xa0, 0x14, 0x06, 0x00, 0x00, 0x00, 0xa1, 0xa2, 0x26, 0xa6, 0x19, 0xa0, 0x14, 0x07, 0x00, 0x00, 0x00, 0xa1, 0xa2, 0x27, 0xa6, 0x0f, 0xa0, 0x10, 0x30, 0x00, 0x00, 0x00, 0xa0, 0x11, 0x09, 0x00, 0x00, 0x00, 0xa5, 0xa3, 0xa0, 0x10, 0x40, 0x00, 0x00, 0x00, 0xa0, 0x11, 0x07, 0x00, 0x00, 0x00, 0xa5, 0xa3");
    /*
        mov r1, 0x00000000
        mov r2, 0x12
        call vm_read    ; 输入    

        mov r1, input[0]
        mov r2, 0x29
        xor r1, r2      ; 异或
        cmp r1, flag[0] ; 比较
        jnz ERROR       ; 如果不相同就跳转到输出错误的代码    

        ; 同上
        mov r1, input[1]
        xor r1, r2
        cmp r1, flag[1]
        jnz ERROR    

        mov r1, input[2]
        xor r1, r2
        cmp r1, flag[2]
        jnz ERROR    

        mov r1, input[3]
        xor r1, r2
        cmp r1, flag[3]
        jnz ERROR    

        mov r1, input[4]
        xor r1, r2
        cmp r1, flag[4]
        jnz ERROR    

        mov r1, input[5]
        xor r1, r2
        cmp r1, flag[5]
        jnz ERROR    

        mov r1, input[6]
        xor r1, r2
        cmp r1, flag[6]
        jnz ERROR    

        mov r1, input[7]
        xor r1, r2
        cmp r1, flag[7]
        jnz ERROR

        mov r1, 0x30
        mov r2, 0x09
        call vm_write
        ret
 
     ERROR:
        mov r1, 0x40
        mov r2, 0x07
        call vm_write
        ret
    */
}
// xor指令解释函数
void vm_xor(vm_processor *proc)
{
    // 异或的两个数分别存放在r1, r2寄存器中
    int arg1 = proc->r1;
    int arg2 = proc->r2;
    // 异或结果存在r1中
    proc->r1 = arg1 ^ arg2;
    // xor指令只占一个字节，解释后，eip后移一个字节
    proc->eip += 1;
}
// cmp指令解释函数
void vm_cmp(vm_processor *proc)
{
    // 比较的两个数据分别存放在r1和buffer中
    int arg1 = proc->r1;
    // 字节码中包含了buffer的偏移
    char *arg2 = *(proc->eip + 1) + heap_buf;
    // 比较并对flag寄存器置位，1为相等，0为不等
    if (arg1 == *arg2) {
        proc->flag = 1;
    } else {
        proc->flag = 0;
    }
    // cmp指令占两个字节，eip向后移动2个字节
    proc->eip += 2;
}
// jnz指令解释函数
void vm_jnz(vm_processor *proc)
{
    // 获取字节码中需要的地址相距eip当前地址的偏移
    unsigned char arg1 = *(proc->eip + 1);
    // 通过比较flag的值来判断之前指令的结果，如果flag为零说明之前指令不想等，jnz跳转实现
    if (proc->flag == 0) {
        // 跳转可以直接修改eip，偏移就是上面获取到的偏移
        proc->eip += arg1;
    } else {
        proc->flag = 0;
    }
    // jnz指令占2个字节，所以eip向后移动两个字节
    proc->eip += 2;
}    
// ret指令解释函数
void vm_ret(vm_processor *proc)
{ 

} 
// read系统调用解释函数
void vm_read(vm_processor *proc)
{
    // read系统调用有两个参数，分别存放在r1,r2寄存器中，r1中是保存读入数据的buf的偏移，r2为希望读入的长度
    char *arg2 = heap_buf + proc->r1;
    int arg3 = proc->r2;
    // 直接调用read
    read(0, arg2, arg3);
    // read系统调用占1个字节，所以eip向后移动1个字节
    proc->eip += 1;
}
// write 系统调用解释函数
void vm_write(vm_processor *proc)
{
    // 与read系统调用相同，r1中是保存写出数据的buf的偏移，r2为希望写出的长度
    char *arg2 = heap_buf + proc->r1;
    int arg3 = proc->r2;
    // 直接调用write
    write(1, arg2, arg3);
    // write系统调用占1个字节，所以eip向后移动1个字节
    proc->eip += 1;
}
// mov 指令解释函数
void vm_mov(vm_processor *proc)
{
    // mov 指令两个参数都隐含在字节码中了，指令标识后的第一个字节是寄存器的标识，指令标识后的第二到第五个字节是要mov的立即数，目前只实现了mov一个立即数到一个寄存器中和mov一个buffer中的内容到一个r1寄存器
    unsigned char *dest = proc->eip + 1;
    int *src = (int *) (proc->eip + 2);
    // 前4个case分别对应r1~r4，最后一个case中，*src保存的是buffer的一个偏移，实现了把buffer中的一个字节赋值给r1
    switch (*dest) {
        case 0x10:
            proc->r1 = *src;
            break;
        case 0x11:
            proc->r2 = *src;
            break;
        case 0x12:
            proc->r3 = *src;
            break;
        case 0x13:
            proc->r4 = *src;
            break;
        case 0x14:
            proc->r1 = *(heap_buf + *src);
            break;
    }    
    // mov指令占6个字节，所以eip向后移动6个字节
    proc->eip += 6;
}
// 执行字节码
void exec_opcode(vm_processor *proc)
{
    int flag = 0;
    int i = 0;
    // 查找eip指向的正在解释的字节码对应的处理函数
    while (!flag && i < OPCODE_NUM) {
        if (*proc->eip == proc->op_table[i].opcode) {
            flag = 1;
            // 查找到之后，调用本条指令的处理函数，由处理函数来解释
            proc->op_table[i].func((void *)proc);
        } else {
            i++;
        }
    }
}
// 虚拟机的解释器
void vm_interp(vm_processor *proc)
{
    /* eip指向被保护代码的第一个字节
     * target_func + 4是为了跳过编译器生成的函数入口的代码
     */
    proc->eip = (unsigned char *) target_func + 4;
    // 循环判断eip指向的字节码是否为返回指令，如果不是就调用exec_opcode来解释执行
    while (*proc->eip != RET) {
        exec_opcode(proc);
    }
}
// 初始化虚拟机处理器
void init_vm_proc(vm_processor *proc)
{
    proc->r1 = 0;
    proc->r2 = 0;
    proc->r3 = 0;
    proc->r4 = 0;
    proc->flag = 0;
    // 把指令字节码与解释函数关联起来
    proc->op_table[0].opcode = MOV;
    proc->op_table[0].func = (void (*)(void *)) vm_mov;
    
    proc->op_table[1].opcode = XOR;
    proc->op_table[1].func = (void (*)(void *)) vm_xor;

    proc->op_table[2].opcode = CMP;
    proc->op_table[2].func = (void (*)(void *)) vm_cmp;

    proc->op_table[3].opcode = SYS_READ;
    proc->op_table[3].func = (void (*)(void *)) vm_read;

    proc->op_table[4].opcode = SYS_WRITE;
    proc->op_table[4].func = (void (*)(void *)) vm_write;

    proc->op_table[5].opcode = RET;
    proc->op_table[5].func = (void (*)(void *)) vm_ret;

    proc->op_table[6].opcode = JNZ;
    proc->op_table[6].func = (void (*)(void *)) vm_jnz;
    // 创建buffer
    heap_buf = (char *)malloc(HEAP_SIZE_MAX);
    // 初始化buffer
    memcpy(heap_buf + 0x20, "syclover", 8);
    memcpy(heap_buf + 0x30, "success!\n", 9);
    memcpy(heap_buf + 0x40, "error!\n", 7);
}
// flag: ZPJEF_L[
int main()
{
    vm_processor proc = {0};
    // initial vm processor
    init_vm_proc(&proc);
    // execute target func
    vm_interp(&proc);
    return 0;
}

虚拟机保护效果

我的运行环境是Ubuntu16.04 x64，运行结果如下：

用IDA打开的结果如下：

总结

这里只是对虚拟机代码保护的原理进行介绍，实际应用中不会这么简单，比如，需要考虑如何将native指令替换为自定义字节码等。想要深入学习虚拟化技术还是非常复杂的。很多国外的文章中还提到过一种基于LLVM-IR的虚拟机保护，有机会要继续学习一下。
reference
动手实现代码虚拟机