PE结构

梳理下PE的文件结构

PE整体结构

PE结构可以大致分为:

  • DOS部分
  • NT头
  • 节表(块表)
  • 节数据(块数据)
  • 调试信息

DOS头

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
typedef struct _IMAGE_DOS_HEADER {      // DOS .EXE header
WORD e_magic; // Magic number (标志,不会变的标志)
WORD e_cblp; // Bytes on last page of file
WORD e_cp; // Pages in file
WORD e_crlc; // Relocations
WORD e_cparhdr; // Size of header in paragraphs
WORD e_minalloc; // Minimum extra paragraphs needed
WORD e_maxalloc; // Maximum extra paragraphs needed
WORD e_ss; // Initial (relative) SS value
WORD e_sp; // Initial SP value
WORD e_csum; // Checksum
WORD e_ip; // Initial IP value
WORD e_cs; // Initial (relative) CS value
WORD e_lfarlc; // File address of relocation table
WORD e_ovno; // Overlay number
WORD e_res[4]; // Reserved words
WORD e_oemid; // OEM identifier (for e_oeminfo)
WORD e_oeminfo; // OEM information; e_oemid specific
WORD e_res2[10]; // Reserved words
LONG e_lfanew; // File address of new exe header
} IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER;

IMAGE_DOS_HEADER结构体的大小为64字节。在该结构中必须知道两个重要成员: e_magic与e_lfanew

  • e_magic: DOS签名(signature)
  • e_lfanew: 只是NT头的偏移

主要通过它里面的e_magic和e_lfanew来判断该文件是否是PE文件格式。

NT头

PE文件头由PE文件头标志,标准PE头,扩展PE头三部分组成。PE文件头标志自然是50 40 00 00,也就是’PE’,我们从结构体的角度看一下PE文件头的详细信息

1
2
3
4
5
typedef struct _IMAGE_NT_HEADERS {
DWORD Signature; //NT头标志 => 4字节
IMAGE_FILE_HEADER FileHeader; //文件头 => 20字节
IMAGE_OPTIONAL_HEADER32 OptionalHeader; //扩展PE头 => 32位下224字节(0xE0) 64位下240字节(0xF0)
} IMAGE_NT_HEADERS32, *PIMAGE_NT_HEADERS32;

文件头

文件头是表现文件大致属性的IMAGE_FILE_HEADER结构体。

1
2
3
4
5
6
7
8
9
typedef struct _IMAGE_FILE_HEADER {
WORD Machine; //可以运行在什么平台上 任意:0 ,Intel 386以及后续:14C x64:8664
WORD NumberOfSections; //节的数量
DWORD TimeDateStamp; //编译器填写的时间戳
DWORD PointerToSymbolTable; //调试相关
DWORD NumberOfSymbols; //调试相关
WORD SizeOfOptionalHeader; //标识扩展PE头大小
WORD Characteristics; //文件属性 => 16进制转换为2进制根据哪些位有1,可以查看相关属性
} IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER;

这个结构体主要包含了,CPU的Machine码,节区数量,需要装载的可选头的大小和文件属性等信息。

可选头

可选头包含了很多信息,重点关注:

  • 代码起始地址 AdressOfEntryPoint
  • 加载基址 ImageBase。执行PE文件时,PE装载器先创建进程,再将文件载入内存,然后再把EIP寄存器的值设为ImgeBase+AddressOfEntryPoint
  • 文件对齐和节区对齐的值
  • PE头的大小和Image的大小
  • IMAGE_DATA_DIRECTORY的表(包含了很多重要的表,例如导入表导出表等)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
typedef struct _IMAGE_OPTIONAL_HEADER {
//
// Standard fields.
//

WORD Magic; //PE32: 10B PE64: 20B
BYTE MajorLinkerVersion;
BYTE MinorLinkerVersion;
DWORD SizeOfCode; //所有含有代码的区块的大小 编译器填入 没用(可改)
DWORD SizeOfInitializedData; //所有初始化数据区块的大小 编译器填入 没用(可改)
DWORD SizeOfUninitializedData; //所有含未初始化数据区块的大小 编译器填入 没用(可改)
DWORD AddressOfEntryPoint; //程序入口RVA
DWORD BaseOfCode; //代码区块起始RVA
DWORD BaseOfData; //数据区块起始RVA

//
// NT additional fields.
//

DWORD ImageBase; //内存镜像基址(程序默认载入基地址)
DWORD SectionAlignment; //内存中对齐大小
DWORD FileAlignment; //文件中对齐大小(提高程序运行效率)
WORD MajorOperatingSystemVersion;
WORD MinorOperatingSystemVersion;
WORD MajorImageVersion;
WORD MinorImageVersion;
WORD MajorSubsystemVersion;
WORD MinorSubsystemVersion;
DWORD Win32VersionValue;
DWORD SizeOfImage; //内存中整个PE文件的映射的尺寸,可比实际值大,必须是SectionAlignment的整数倍
DWORD SizeOfHeaders; //所有的头加上节表文件对齐之后的值
DWORD CheckSum; //映像校验和,一些系统.dll文件有要求,判断是否被修改
WORD Subsystem;
WORD DllCharacteristics; //文件特性,不是针对DLL文件的,16进制转换2进制可以根据属性对应的表格得到相应的属性
DWORD SizeOfStackReserve;
DWORD SizeOfStackCommit;
DWORD SizeOfHeapReserve;
DWORD SizeOfHeapCommit;
DWORD LoaderFlags;
DWORD NumberOfRvaAndSizes;
IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; //数据目录表,结构体数组
} IMAGE_OPTIONAL_HEADER32, *PIMAGE_OPTIONAL_HEADER32;

节区头

节区头是由IMAGE_SECTION_HEADER结构体组成的数组,每个结构体对应一个节区。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
typedef struct _IMAGE_SECTION_HEADER {
BYTE Name[IMAGE_SIZEOF_SHORT_NAME]; //ASCII字符串 可自定义 只截取8个字节
union { //该节在没有对齐之前的真实尺寸,该值可以不准确
DWORD PhysicalAddress;
DWORD VirtualSize;
} Misc;
DWORD VirtualAddress; //内存中的偏移地址
DWORD SizeOfRawData; //节在文件中对齐的尺寸
DWORD PointerToRawData; //节区在文件中的偏移
DWORD PointerToRelocations;
DWORD PointerToLinenumbers;
WORD NumberOfRelocations;
WORD NumberOfLinenumbers;
DWORD Characteristics; //节的属性
} IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER;

主要包含了节区的偏移地址,对齐和节的属性等信息

导出表

导出表(Export Table)一般是DLL文件用的比较多,exe文件很少有导出表,导出表的数据结构如下

1
2
3
4
5
6
7
8
9
10
11
12
13
typedef struct _IMAGE_EXPORT_DIRECTORY {
DWORD Characteristics;
DWORD TimeDateStamp;
WORD MajorVersion;
WORD MinorVersion;
DWORD Name; // 指针指向该导出表文件名字符串
DWORD Base; // 导出函数起始序号
DWORD NumberOfFunctions; // 所有导出函数的个数
DWORD NumberOfNames; // 以函数名字导出的函数个数
DWORD AddressOfFunctions; // 指针指向导出函数地址表RVA
DWORD AddressOfNames; // 指针指向导出函数名称表RVA
DWORD AddressOfNameOrdinals; // 指针指向导出函数序号表RVA
} IMAGE_EXPORT_DIRECTORY, *PIMAGE_EXPORT_DIRECTORY;

导入表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
typedef struct _IMAGE_IMPORT_DESCRIPTOR {
union {
DWORD Characteristics; // 0 for terminating null import descriptor
DWORD OriginalFirstThunk; // RVA 指向 INT (PIMAGE_THUNK_DATA结构数组)
} DUMMYUNIONNAME;
DWORD TimeDateStamp; // 0 if not bound,
// -1 if bound, and real date\time stamp
// in IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT (new BIND)
// O.W. date/time stamp of DLL bound to (Old BIND)

DWORD ForwarderChain; // -1 if no forwarders
DWORD Name; //RVA指向dll名字,以0结尾
DWORD FirstThunk; // RVA 指向 IAT (PIMAGE_THUNK_DATA结构数组)
} IMAGE_IMPORT_DESCRIPTOR;
typedef IMAGE_IMPORT_DESCRIPTOR UNALIGNED *PIMAGE_IMPORT_DESCRIPTOR;

可以看到,OriginalFirstThunk 和 FirstThunk 指向的内容分别是 INT 和 IAT ,但实际上 INT 和 IAT 的内容是一样的,所以他们指向的内容是一样的,只是方式不同而已,下图可以完美的解释

但是上图只是PE文件加载前的情况,PE文件一旦运行起来,就会变成下图的情况

PE-Viewer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
// ConsoleApplication3.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <stdio.h>
#include <Windows.h>
#include <stdlib.h>

DWORD RVAOffset(PIMAGE_NT_HEADERS pNtHeader, DWORD Rva) {
PIMAGE_SECTION_HEADER pSectionHeader = (PIMAGE_SECTION_HEADER)IMAGE_FIRST_SECTION(pNtHeader);
for (int i = 0; i < pNtHeader->FileHeader.NumberOfSections; i++) {
DWORD SectionBeginRva = pSectionHeader[i].VirtualAddress;
DWORD SectionEndRva = pSectionHeader[i].VirtualAddress + pSectionHeader[i].SizeOfRawData;
if (Rva >= SectionBeginRva && Rva <= SectionEndRva) {
DWORD Temp = Rva - SectionBeginRva;
DWORD Rwa = Temp + pSectionHeader[i].PointerToRawData;
return Rwa;
}
}
}

int main(int argc, char* argv[])
{
HANDLE hFile;
HANDLE hMapping;
WCHAR szFilePath[MAX_PATH];
LPVOID ImageBase;
PIMAGE_DOS_HEADER pDH = NULL;//指向IMAGE_DOS结构的指针
PIMAGE_NT_HEADERS pNtH = NULL;//指向IMAGE_NT结构的指针
PIMAGE_FILE_HEADER pFH = NULL;//指向IMAGE_FILE结构的指针
PIMAGE_OPTIONAL_HEADER pOH = NULL;//指向IMAGE_OPTIONALE结构的指针
OPENFILENAME ofn;//定义结构,调用打开对话框选择要分析的文件及其保存路径

memset(szFilePath, 0, MAX_PATH);
memset(&ofn, 0, sizeof(ofn));

//打开一个窗口,选择文件
ofn.lStructSize = sizeof(ofn);
ofn.hwndOwner = NULL;
ofn.hInstance = GetModuleHandle(NULL);
ofn.nMaxFile = MAX_PATH;
ofn.Flags = OFN_PATHMUSTEXIST | OFN_FILEMUSTEXIST | OFN_HIDEREADONLY;
ofn.lpstrInitialDir = L".";
ofn.lpstrFile = szFilePath;
ofn.lpstrTitle = L"chose a PE file --by w00d";
ofn.lpstrFilter = L"*.*\0*.*\0";
if (!GetOpenFileName(&ofn)) {
printf("打开文件错误:%d\n", GetLastError());
return 0;
}

hFile = CreateFile(szFilePath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
if (!hFile) {
MessageBox(NULL, L"打开文件错误", NULL, MB_OK);
return 0;
}

hMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (!hMapping) {
printf("创建映射错误%d", GetLastError());
CloseHandle(hFile);
return 0;
}
ImageBase = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
if (!ImageBase) {
printf("文件映射错误:%d", GetLastError());
CloseHandle(hMapping);
CloseHandle(hFile);
return 0;
}
/************************************************************************/
/* PE头的判断 */
/************************************************************************/
printf("--------------------PEheader------------------------\n");
pDH = (PIMAGE_DOS_HEADER)ImageBase;
if (pDH->e_magic != IMAGE_DOS_SIGNATURE) //判断是否是MZ
{
printf("Not a valid PE file 2!\n");
CloseHandle(hMapping);
CloseHandle(hFile);
return 0;
}

pNtH = (PIMAGE_NT_HEADERS)((DWORD)pDH + pDH->e_lfanew); //判断是否为PE格式
if (pNtH->Signature != IMAGE_NT_SIGNATURE)
{
printf("Not a valid PE file 3!\n");
CloseHandle(hMapping);
CloseHandle(hFile);
return 0;
}
printf("PE e_lfanew is: 0x%x\n", pNtH);
/************************************************************************/
/* FileHeader */
/************************************************************************/
pFH = &pNtH->FileHeader;
printf("-----------------FileHeader------------------------\n");
printf("NumberOfSections: %d\n", pFH->NumberOfSections);
printf("SizeOfOptionalHeader: %d\n", pFH->SizeOfOptionalHeader);
/************************************************************************/
/* OptionalHeader */
/************************************************************************/
pOH = &pNtH->OptionalHeader;
printf("-----------------OptionalHeader---------------------\n");
printf("SizeOfCode:0x%08x\n", pOH->SizeOfCode);
printf("AddressOfEntryPoint: 0x%08X\n", pOH->AddressOfEntryPoint);
printf("ImageBase is 0x%x\n", ImageBase);
printf("SectionAlignment: 0x%08x\n", pOH->SectionAlignment);
printf("FileAlignment: 0x%08x\n", pOH->FileAlignment);
printf("SizeOfImage: 0x%08x\n", pOH->SizeOfImage);
printf("SizeOfHeaders: 0x%08x\n", pOH->SizeOfHeaders);
printf("NumberOfRvaAndSizes: 0x%08x\n", pOH->NumberOfRvaAndSizes);
/************************************************************************/
/* SectionTable */
/************************************************************************/
int SectionNumber = 0;
DWORD SectionHeaderOffset = (DWORD)pNtH + 24 + (DWORD)pFH->SizeOfOptionalHeader; //节表位置的计算
printf("--------------------SectionTable---------------------\n");
for (SectionNumber; SectionNumber < pFH->NumberOfSections; SectionNumber++) {
PIMAGE_SECTION_HEADER pSh = (PIMAGE_SECTION_HEADER)(SectionHeaderOffset + 40 * SectionNumber);
printf("%d 's Name is %s\n", SectionNumber + 1, pSh->Name);
printf("VirtualAddress: 0x%08X\n", (DWORD)pSh->VirtualAddress);
printf("SizeOfRawData: 0x%08X\n", (DWORD)pSh->SizeOfRawData);
printf("PointerToRawData: 0x%08X\n", (DWORD)pSh->PointerToRawData);
}
/************************************************************************/
/* ExportTable */
/************************************************************************/
printf("--------------------ExportTable----------------------\n");
DWORD Export_table_offset = RVAOffset(pNtH, (DWORD)pNtH->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress);
PIMAGE_EXPORT_DIRECTORY pExportDirectory = (PIMAGE_EXPORT_DIRECTORY)((DWORD)ImageBase + Export_table_offset);
DWORD EXport_table_offset_Name = (DWORD)ImageBase + RVAOffset(pNtH, pExportDirectory->Name);
DWORD * pNameOfAddress = (DWORD *)((DWORD)ImageBase + RVAOffset(pNtH, pExportDirectory->AddressOfNames));
DWORD * pFunctionOfAdress = (DWORD *)((DWORD)ImageBase + RVAOffset(pNtH, pExportDirectory->AddressOfFunctions));
WORD * pNameOrdinalOfAddress = (WORD *)((DWORD)ImageBase + RVAOffset(pNtH, pExportDirectory->AddressOfNameOrdinals));
printf("Name:%s\n", EXport_table_offset_Name);
printf("NameOfAddress:%08X\n", RVAOffset(pNtH, pExportDirectory->AddressOfNames));
printf("FunctionOfAdress:%08X\n", RVAOffset(pNtH, pExportDirectory->AddressOfFunctions));
printf("NameOrdinalOfAddress:%08X\n", RVAOffset(pNtH, pExportDirectory->AddressOfNameOrdinals));
if (pExportDirectory->NumberOfFunctions == 0) {
puts("!!!!!!!!!!!!!!!!!NO EXPORT!!!!!!!!!!!!!!!!!!!!!");
if (hFile != INVALID_HANDLE_VALUE)
{
CloseHandle(hFile);
}
if (hMapping != NULL)
{
CloseHandle(hMapping);
}
if (ImageBase != NULL)
{
UnmapViewOfFile(ImageBase);
}
}
printf("NumberOfNames:%d\n", pExportDirectory->NumberOfNames);
printf("NumberOfFunctions:%d\n", pExportDirectory->NumberOfFunctions);
/************************************************************************/
/* ImportTable */
/************************************************************************/
printf("--------------------ImportTable----------------------\n");
int cont = 0;
do
{
DWORD dwImportOffset = RVAOffset(pNtH, pNtH->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
dwImportOffset = dwImportOffset + cont;
PIMAGE_IMPORT_DESCRIPTOR pImport = (PIMAGE_IMPORT_DESCRIPTOR)((DWORD)ImageBase + dwImportOffset);
if (pImport->OriginalFirstThunk == 0 && pImport->TimeDateStamp == 0 && pImport->ForwarderChain == 0 && pImport->Name == 0 && pImport->FirstThunk == 0)
break;
DWORD dwOriginalFirstThunk = (DWORD)ImageBase + RVAOffset(pNtH, pImport->OriginalFirstThunk);
DWORD dwFirstThunk = (DWORD)ImageBase + RVAOffset(pNtH, pImport->FirstThunk);
DWORD dwName = (DWORD)ImageBase + RVAOffset(pNtH, pImport->Name);
printf("---------Import File Name: %s\n", dwName);
if (dwOriginalFirstThunk == 0x00000000)
{
dwOriginalFirstThunk = dwFirstThunk;
}
DWORD* pdwTrunkData = (DWORD*)dwOriginalFirstThunk;
int n = 0, x = 0;
while (pdwTrunkData[n] != 0) {
DWORD TrunkData = pdwTrunkData[n];
if (TrunkData < IMAGE_ORDINAL_FLAG32)//名字导入
{
PIMAGE_IMPORT_BY_NAME pInportByName = (PIMAGE_IMPORT_BY_NAME)((DWORD)ImageBase + RVAOffset(pNtH, TrunkData));
printf("ImportByName: %s\n", pInportByName->Name);
}
else
{
DWORD FunNumber = (DWORD)(TrunkData - IMAGE_ORDINAL_FLAG32);
printf("ImportByNumber: %-4d \n", FunNumber);
}
if (x != 0 && x % 3 == 0) printf("\n");
n++;
x++;
}
cont = cont + 40;//其实这里的40不是太理解,这个导入表不应该只有一个么,为什么是个循环
} while (1);
{
if (ImageBase)
{
UnmapViewOfFile(ImageBase);
}
if (hMapping)
{
CloseHandle(hMapping);
}
if (hFile != INVALID_HANDLE_VALUE)
{
CloseHandle(hFile);
}
return 0;
}
return 0;
}
-------------本文结束感谢您的阅读-------------
+ +