首页
社区
课程
招聘
[原创] 笔记 - 学习使用活跃变量分析来清除基本块内的垃圾指令
发表于: 2025-1-19 15:28 2334

[原创] 笔记 - 学习使用活跃变量分析来清除基本块内的垃圾指令

2025-1-19 15:28
2334

理论

图片描述
图片描述
图片描述
图片描述

判断是否为垃圾指令:(insn是指令)
def[insn] ∩ out[insn] = ∅ <==> insn 是垃圾指令

代码演示

使用x86汇编代码做例子
代码使用了capstone反汇编库

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
 
#include <capstone/capstone.h>
#pragma comment(lib, "capstone.lib")
#define max(a,b) (((a) > (b)) ? (a) : (b))
#define min(a,b) (((a) < (b)) ? (a) : (b))
 
// 差集运算
template <typename T>
std::vector<T> my_set_diff(std::vector<T> a, std::vector<T> b) {
    if (a.empty()) {
        return {};
    }
    if (b.empty()) {
        return a;
    }
    std::vector<T> tem;
    tem.resize(max(a.size(), b.size()));
    std::sort(a.begin(), a.end());
    std::sort(b.begin(), b.end());
    auto temLast = set_difference(a.begin(), a.end(), b.begin(), b.end(), tem.begin());
    std::vector<T> ret;
    for (auto it = tem.begin(); it != temLast; it++) {
        ret.push_back(*it);
    }
    return ret;
}
// 并集运算
template <typename T>
std::vector<T> my_set_union(std::vector<T> a, std::vector<T> b) {
    if (a.empty()) {
        return b;
    }
    if (b.empty()) {
        return a;
    }
    std::vector<T> tem;
    tem.resize(a.size() + b.size());
    std::sort(a.begin(), a.end());
    std::sort(b.begin(), b.end());
    auto temLast = set_union(a.begin(), a.end(), b.begin(), b.end(), tem.begin());
    std::vector<T> ret;
    for (auto it = tem.begin(); it != temLast; it++) {
        ret.push_back(*it);
    }
    return ret;
}
// 交集运算
template <typename T>
std::vector<T> my_set_intersection(std::vector<T> a, std::vector<T> b) {
    if (a.empty()) {
        return {};
    }
    if (b.empty()) {
        return {};
    }
    std::vector<T> tem;
    tem.resize(min(a.size(), b.size()));
    std::sort(a.begin(), a.end());
    std::sort(b.begin(), b.end());
    auto temLast = set_intersection(a.begin(), a.end(), b.begin(), b.end(), tem.begin());
    std::vector<T> ret;
    for (auto it = tem.begin(); it != temLast; it++) {
        ret.push_back(*it);
    }
    return ret;
}
 
cs_err disasm(csh hCs, const uint8_t* codePtr, uint64_t virAddr, uint64_t codeSize, cs_insn* insn, cs_detail* detail) {
    cs_insn* _insn = 0;
    size_t err = 0;
    if ((err = cs_disasm(hCs, codePtr, codeSize, virAddr, 1, &_insn)) <= 0) {
        return cs_errno(hCs);
    }
 
    memcpy(insn, _insn, sizeof(cs_insn));
    if (detail != NULL) {
        memcpy(detail, _insn->detail, sizeof(cs_detail));
    }
    insn->detail = detail;
 
    cs_free(_insn, 1);
    return cs_errno(hCs);
}
cs_err disasm_iter(csh hCs, uint8_t*& codePtr, uint64_t& virAddr, uint64_t& codeSize, cs_insn* insn, cs_detail* detail) {
    cs_err status = CS_ERR_OK;
    if ((status = disasm(hCs, codePtr, virAddr, codeSize, insn, detail)) != CS_ERR_OK) {
        return status;
    }
    codePtr += insn->size;
    virAddr += insn->size;
    codeSize -= insn->size;
    return status;
}
using regs = std::vector<uint16_t>;
struct accessRegs {
    regs reads;
    regs writes;
};
accessRegs get_regs_access(csh hCs, cs_insn* insn) {
    cs_regs read_regs{};
    uint8_t read_num = 0;
    cs_regs write_regs{};
    uint8_t write_num = 0;
    cs_regs_access(hCs, insn, read_regs, &read_num, write_regs, &write_num);
 
    regs reads = {};
    regs writes = {};
    for (uint8_t i = 0; i < read_num; i++) {
        reads.push_back(read_regs[i]);
    }
    for (uint8_t i = 0; i < write_num; i++) {
        writes.push_back(write_regs[i]);
    }
 
    return { reads, writes };
}
 
struct Insn {
    uint8_t addr[16];
    size_t size;
    uint64_t viraddr;
    std::string opstr;
};
enum class x86_flag : uint8_t { AF, CF, SF, ZF, PF, OF, TF, IF, DF, NT, RF };
enum class x86_fpuflag : uint8_t { C0, C1, C2, C3 };
 
// ----------------------------------
std::vector<x86_flag> calc_eflags_use(csh hCs, const Insn& ins); // 计算指令针对EFlags中具体各个标志位的use集合
std::vector<x86_flag> calc_eflags_def(csh hCs, const Insn& ins); // 计算指令针对EFlags中具体各个标志位的def集合
 
std::vector<x86_fpuflag> calc_fpuflags_use(csh hCs, const Insn& ins); // 计算指令针对FPU Flags中具体各个标志位的use集合
std::vector<x86_fpuflag> calc_fpuflags_def(csh hCs, const Insn& ins); // 计算指令针对FPU Flags中具体各个标志位的use集合
 
void RemoveDeadCode(char* x86code, size_t codesize, bool is64, uint64_t virtualAddressBegin) {
    csh hCs = 0;
    if (!cs_support(CS_ARCH_X86)) {
        std::cout << "当前Capstone不支持X86架构" << std::endl;
        return;
    }
    if (cs_open(CS_ARCH_X86, ((is64) ? CS_MODE_64 : CS_MODE_32), &hCs) != CS_ERR_OK) {
        std::cout << "打开Capstone句柄失败" << std::endl;
        return;
    }
    if (cs_option(hCs, CS_OPT_DETAIL, CS_OPT_ON) != CS_ERR_OK) {
        std::cout << "无法开启Detail" << std::endl;
        return;
    }
 
    uint8_t* addr = (uint8_t*)x86code;
    size_t size = codesize;
    uint64_t virAddr = virtualAddressBegin;
 
    std::vector<Insn> bb;
    std::vector<std::vector<Insn>> bbs;
    while (size > 0 && (virAddr < (virtualAddressBegin + codesize))) {
        cs_insn cinsn{};
        cs_detail cdetail{};
        cs_err err = CS_ERR_OK;
        if ((err = disasm_iter(hCs, addr, virAddr, size, &cinsn, &cdetail)) != CS_ERR_OK) {
            std::cout << "反汇编发生错误: " << err << std::endl;
            break;
        }
 
        bool end_sign1 = (cs_insn_group(hCs, &cinsn, X86_GRP_JUMP) ||
            cs_insn_group(hCs, &cinsn, X86_GRP_BRANCH_RELATIVE) ||
            cs_insn_group(hCs, &cinsn, X86_GRP_CALL) && cinsn.id != X86_INS_CALL);
        bool end_sign2 = (cinsn.id == X86_INS_RET || cinsn.id == X86_INS_RETF ||
            cinsn.id == X86_INS_RETFQ || cs_insn_group(hCs, &cinsn, X86_GRP_INT));
        bool end_sign3 = cs_insn_group(hCs, &cinsn, X86_GRP_PRIVILEGE);
 
        if (end_sign1 || end_sign2 || end_sign3) { // 基本块终止,收集过的指令全部加入基本块
            if (!bb.empty()) {
                bbs.push_back(bb);
                bb = {};
            }
        }
        else {
            std::string opstr = cinsn.mnemonic;
            opstr += "\t";
            opstr += cinsn.op_str;
            Insn ins = {};
            ins.opstr = opstr;
            ins.size = cinsn.size;
            ins.viraddr = cinsn.address;
            memcpy(&ins.addr, cinsn.bytes, cinsn.size);
            bb.push_back(ins);
        }
    }
    if (!bb.empty()) { // 退出循环后,把剩下来的指令也加入到基本块中
        bbs.push_back(bb);
        bb = {};
    }
 
    std::cout << "\"---->\" 表示该指令可以被删除" << std::endl;
    bool changed = false; // 迭代标志
    for (std::vector<Insn> mbb : bbs) {
        std::cout << "@@@-------------------------------------------------------";
        std::cout << std::endl;
 
        std::vector<bool> delList;
        delList.resize(mbb.size());
 
        do {
            changed = false;
            if (mbb.empty()) {
                continue;
            }
            std::vector<std::vector<uint16_t>> use;
            std::vector<std::vector<uint16_t>> def;
            std::vector<std::vector<x86_flag>> use_flags;
            std::vector<std::vector<x86_flag>> def_flags;
            std::vector<std::vector<x86_fpuflag>> use_fpuflags;
            std::vector<std::vector<x86_fpuflag>> def_fpuflags;
 
            for (size_t i = 0; i < mbb.size(); i++) {
                // use 和 def 可以直接从汇编代码中解析出来,所以算做已知量
                cs_insn cinsn{};
                cs_detail cdetail{};
                disasm(hCs, mbb[i].addr, mbb[i].viraddr, mbb[i].size, &cinsn, &cdetail);
 
                auto access = get_regs_access(hCs, &cinsn); // 获取指令对寄存器的读写列表
 
                use.push_back(my_set_diff(access.reads, { X86_REG_EFLAGS })); // 指令的 use
                def.push_back(my_set_diff(access.writes, { X86_REG_EFLAGS })); // 指令的 def
 
                use_flags.push_back(calc_eflags_use(hCs, mbb[i])); // 针对指令中对具体标志位的 use
                def_flags.push_back(calc_eflags_def(hCs, mbb[i])); // 针对指令中对具体标志位的 def
 
                use_fpuflags.push_back(calc_fpuflags_use(hCs, mbb[i])); // 针对指令中对具体fpu标志位的 use
                def_fpuflags.push_back(calc_fpuflags_def(hCs, mbb[i])); // 针对指令中对具体fpu标志位的 def
            }
 
            std::vector<std::vector<uint16_t>> in;
            std::vector<std::vector<uint16_t>> out;
            std::vector<std::vector<x86_flag>> in_flags;
            std::vector<std::vector<x86_flag>> out_flags;
            std::vector<std::vector<x86_fpuflag>> in_fpuflags;
            std::vector<std::vector<x86_fpuflag>> out_fpuflags;
 
            in.resize(mbb.size());
            out.resize(mbb.size());
            in_flags.resize(mbb.size());
            out_flags.resize(mbb.size());
            in_fpuflags.resize(mbb.size());
            out_fpuflags.resize(mbb.size());
 
            // 给基本块最后一个指令的out集合赋值的意义是:如果这个基本块退出,有哪些寄存器是会被下一个函数入口所继续使用的
            // 为了保证最大兼容性,索性直接把x86里的所有寄存器都加了进来
            out.back() = { X86_REG_AH, X86_REG_AL, X86_REG_AX, X86_REG_BH, X86_REG_BL,
                X86_REG_BP, X86_REG_BPL, X86_REG_BX, X86_REG_CH, X86_REG_CL,
                X86_REG_CS, X86_REG_CX, X86_REG_DH, X86_REG_DI, X86_REG_DIL,
                X86_REG_DL, X86_REG_DS, X86_REG_DX, X86_REG_EAX, X86_REG_EBP,
                X86_REG_EBX, X86_REG_ECX, X86_REG_EDI, X86_REG_EDX,
                X86_REG_EIP, X86_REG_EIZ, X86_REG_ES, X86_REG_ESI, X86_REG_ESP,
                X86_REG_FPSW, X86_REG_FS, X86_REG_GS, X86_REG_IP, X86_REG_RAX,
                X86_REG_RBP, X86_REG_RBX, X86_REG_RCX, X86_REG_RDI, X86_REG_RDX,
                X86_REG_RIP, X86_REG_RIZ, X86_REG_RSI, X86_REG_RSP, X86_REG_SI,
                X86_REG_SIL, X86_REG_SP, X86_REG_SPL, X86_REG_SS, X86_REG_CR0,
                X86_REG_CR1, X86_REG_CR2, X86_REG_CR3, X86_REG_CR4, X86_REG_CR5,
                X86_REG_CR6, X86_REG_CR7, X86_REG_CR8, X86_REG_CR9, X86_REG_CR10,
                X86_REG_CR11, X86_REG_CR12, X86_REG_CR13, X86_REG_CR14, X86_REG_CR15,
                X86_REG_DR0, X86_REG_DR1, X86_REG_DR2, X86_REG_DR3, X86_REG_DR4,
                X86_REG_DR5, X86_REG_DR6, X86_REG_DR7, X86_REG_DR8, X86_REG_DR9,
                X86_REG_DR10, X86_REG_DR11, X86_REG_DR12, X86_REG_DR13, X86_REG_DR14,
                X86_REG_DR15, X86_REG_FP0, X86_REG_FP1, X86_REG_FP2, X86_REG_FP3,
                X86_REG_FP4, X86_REG_FP5, X86_REG_FP6, X86_REG_FP7,
                X86_REG_K0, X86_REG_K1, X86_REG_K2, X86_REG_K3, X86_REG_K4,
                X86_REG_K5, X86_REG_K6, X86_REG_K7, X86_REG_MM0, X86_REG_MM1,
                X86_REG_MM2, X86_REG_MM3, X86_REG_MM4, X86_REG_MM5, X86_REG_MM6,
                X86_REG_MM7, X86_REG_R8, X86_REG_R9, X86_REG_R10, X86_REG_R11,
                X86_REG_R12, X86_REG_R13, X86_REG_R14, X86_REG_R15,
                X86_REG_ST0, X86_REG_ST1, X86_REG_ST2, X86_REG_ST3,
                X86_REG_ST4, X86_REG_ST5, X86_REG_ST6, X86_REG_ST7,
                X86_REG_XMM0, X86_REG_XMM1, X86_REG_XMM2, X86_REG_XMM3, X86_REG_XMM4,
                X86_REG_XMM5, X86_REG_XMM6, X86_REG_XMM7, X86_REG_XMM8, X86_REG_XMM9,
                X86_REG_XMM10, X86_REG_XMM11, X86_REG_XMM12, X86_REG_XMM13, X86_REG_XMM14,
                X86_REG_XMM15, X86_REG_XMM16, X86_REG_XMM17, X86_REG_XMM18, X86_REG_XMM19,
                X86_REG_XMM20, X86_REG_XMM21, X86_REG_XMM22, X86_REG_XMM23, X86_REG_XMM24,
                X86_REG_XMM25, X86_REG_XMM26, X86_REG_XMM27, X86_REG_XMM28, X86_REG_XMM29,
                X86_REG_XMM30, X86_REG_XMM31, X86_REG_YMM0, X86_REG_YMM1, X86_REG_YMM2,
                X86_REG_YMM3, X86_REG_YMM4, X86_REG_YMM5, X86_REG_YMM6, X86_REG_YMM7,
                X86_REG_YMM8, X86_REG_YMM9, X86_REG_YMM10, X86_REG_YMM11, X86_REG_YMM12,
                X86_REG_YMM13, X86_REG_YMM14, X86_REG_YMM15, X86_REG_YMM16, X86_REG_YMM17,
                X86_REG_YMM18, X86_REG_YMM19, X86_REG_YMM20, X86_REG_YMM21, X86_REG_YMM22,
                X86_REG_YMM23, X86_REG_YMM24, X86_REG_YMM25, X86_REG_YMM26, X86_REG_YMM27,
                X86_REG_YMM28, X86_REG_YMM29, X86_REG_YMM30, X86_REG_YMM31, X86_REG_ZMM0,
                X86_REG_ZMM1, X86_REG_ZMM2, X86_REG_ZMM3, X86_REG_ZMM4, X86_REG_ZMM5,
                X86_REG_ZMM6, X86_REG_ZMM7, X86_REG_ZMM8, X86_REG_ZMM9, X86_REG_ZMM10,
                X86_REG_ZMM11, X86_REG_ZMM12, X86_REG_ZMM13, X86_REG_ZMM14, X86_REG_ZMM15,
                X86_REG_ZMM16, X86_REG_ZMM17, X86_REG_ZMM18, X86_REG_ZMM19, X86_REG_ZMM20,
                X86_REG_ZMM21, X86_REG_ZMM22, X86_REG_ZMM23, X86_REG_ZMM24, X86_REG_ZMM25,
                X86_REG_ZMM26, X86_REG_ZMM27, X86_REG_ZMM28, X86_REG_ZMM29, X86_REG_ZMM30,
                X86_REG_ZMM31, X86_REG_R8B, X86_REG_R9B, X86_REG_R10B, X86_REG_R11B,
                X86_REG_R12B, X86_REG_R13B, X86_REG_R14B, X86_REG_R15B, X86_REG_R8D,
                X86_REG_R9D, X86_REG_R10D, X86_REG_R11D, X86_REG_R12D, X86_REG_R13D,
                X86_REG_R14D, X86_REG_R15D, X86_REG_R8W, X86_REG_R9W, X86_REG_R10W,
                X86_REG_R11W, X86_REG_R12W, X86_REG_R13W, X86_REG_R14W, X86_REG_R15W,
                X86_REG_BND0, X86_REG_BND1, X86_REG_BND2, X86_REG_BND3,
            };
            out_flags.back() = { x86_flag::AF, x86_flag::CF, x86_flag::SF, x86_flag::ZF,
                x86_flag::PF, x86_flag::OF, x86_flag::TF, x86_flag::IF, x86_flag::DF,
                x86_flag::NT, x86_flag::RF
            };
            out_fpuflags.back() = { x86_fpuflag::C0, x86_fpuflag::C1, x86_fpuflag::C2, x86_fpuflag::C3 };
 
            // in 和 out 需要根据 use 和 def,使用 活跃变量数据流方程 进行求解
            for (size_t i = 0; i < mbb.size(); i++) {
                size_t j = (mbb.size() - 1) - i; // 倒序索引 (因为活性变量分析本身就是从后往前分析的)
                std::vector<uint16_t> tem = my_set_diff(out[j], def[j]);
                in[j] = my_set_union(use[j], tem); // 计算 in
 
                std::vector<x86_flag> tem_flag = my_set_diff(out_flags[j], def_flags[j]);
                in_flags[j] = my_set_union(use_flags[j], tem_flag); // 计算 in_flags
 
                std::vector<x86_fpuflag> tem_fpuflag = my_set_diff(out_fpuflags[j], def_fpuflags[j]);
                in_fpuflags[j] = my_set_union(use_fpuflags[j], tem_fpuflag); // 计算 in_fpuflags
 
                if (j != 0) {
                    out[j - 1] = in[j]; // 计算 out
                    out_flags[j - 1] = in_flags[j]; // 计算 out_flags
                    out_fpuflags[j - 1] = in_fpuflags[j]; // 计算 out_fpuflags
                }
            }
 
            for (size_t i = 0; i < mbb.size(); i++) {
                if (def[i].empty() && def_flags[i].empty() && def_fpuflags[i].empty()) { // 如果def为空集,则直接跳过
                    continue;
                }
                if (delList[i]) { // 如果此前已经被标记过垃圾指令,则跳过
                    continue;
                }
                if (my_set_intersection(def[i], out[i]).empty() &&
                    my_set_intersection(def_flags[i], out_flags[i]).empty() &&
                    my_set_intersection(def_fpuflags[i], out_fpuflags[i]).empty()) { // 判断 mbb[i] 是否为垃圾指令
                    changed = true; // 准备进行新一轮迭代
                    delList[i] = true; // 标记垃圾指令
                }
            }
        } while (changed);
 
        // 最终优化展示
        for (size_t i = 0; i < mbb.size(); i++) {
            if (delList[i]) {
                std::cout << "| ---->"; // 可被删除的
            }
            else {
                std::cout << "|      ";
            }
            std::cout << std::hex << std::uppercase;
            std::cout << "\t0x" << mbb[i].viraddr << "\t\t" << mbb[i].opstr << std::endl;
        }
        std::cout << "--------------------------------------------------------@@@";
        std::cout << std::endl << std::endl;
    }
 
    cs_close(&hCs);
}
 
int main() {
    /*     mov eax, 5
    *      mov ebx, 10
    *      add eax, ebx
    *      mov ecx, 20
    *      mov edx, 30
    *      add eax, ebx
    *      mov edx, 10
    *      int3         --------------> int3只是方便拿来划分基本块用的
    *      stc
    *      clc
    *      cld
    *      std
    */
    char x86code[] = "\xB8\x05\x00\x00\x00\xBB\x10\x00\x00\x00\x01\xD8\xB9\x20\x00\x00\x00\xBA\x30\x00\x00\x00\x01\xD8\xBA\x10\x00\x00\x00\xCC\xF9\xF8\xFC\xFD";
    bool is64 = false;
    uint64_t virtualAddressBegin = 0x00000000;
 
    RemoveDeadCode(x86code, sizeof(x86code) - 1, is64, virtualAddressBegin);
    return 0;
}
 
std::vector<x86_flag> calc_eflags_use(csh hCs, const Insn& ins) {
    cs_insn cinsn{};
    cs_detail cdetail{};
    disasm(hCs, ins.addr, ins.viraddr, ins.size, &cinsn, &cdetail);
    uint64_t eflags = cdetail.x86.eflags;
 
    std::vector<x86_flag> ret = {};
 
    // TEST:    Instruction tests flag
    std::vector<uint64_t> useChecks = { X86_EFLAGS_TEST_OF, X86_EFLAGS_TEST_SF, X86_EFLAGS_TEST_ZF,
        X86_EFLAGS_TEST_PF, X86_EFLAGS_TEST_CF, X86_EFLAGS_TEST_NT, X86_EFLAGS_TEST_DF,
        X86_EFLAGS_TEST_RF, X86_EFLAGS_TEST_IF, X86_EFLAGS_TEST_TF, X86_EFLAGS_TEST_AF
    };
     
    for (uint64_t check : useChecks) {
        if (eflags & check) {
            x86_flag ret_flag;
            switch (check){
                case X86_EFLAGS_TEST_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_TEST_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_TEST_ZF: ret_flag = x86_flag::ZF; break;
                case X86_EFLAGS_TEST_PF: ret_flag = x86_flag::PF; break;
                case X86_EFLAGS_TEST_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_TEST_NT: ret_flag = x86_flag::NT; break;
                case X86_EFLAGS_TEST_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_TEST_RF: ret_flag = x86_flag::RF; break;
                case X86_EFLAGS_TEST_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_TEST_TF: ret_flag = x86_flag::TF; break;
                case X86_EFLAGS_TEST_AF: ret_flag = x86_flag::AF; break;
            }
            ret.push_back(ret_flag);
        }
    }
    return ret;
}
std::vector<x86_flag> calc_eflags_def(csh hCs, const Insn& ins) {
    cs_insn cinsn{};
    cs_detail cdetail{};
    disasm(hCs, ins.addr, ins.viraddr, ins.size, &cinsn, &cdetail);
    uint64_t eflags = cdetail.x86.eflags;
 
    std::vector<x86_flag> ret = {};
 
    // MODIFY:   Instruction modifies flag (either sets or resets depending on operands).
    // PRIOR:    Instruction restores prior value of flag
    // SET:      Instruction sets flag
    // RESET:    Instruction resets flag
    std::vector<uint64_t> defChecks = { X86_EFLAGS_PRIOR_OF, X86_EFLAGS_PRIOR_SF, X86_EFLAGS_PRIOR_ZF,
        X86_EFLAGS_PRIOR_AF, X86_EFLAGS_PRIOR_PF, X86_EFLAGS_PRIOR_CF, X86_EFLAGS_PRIOR_TF,
        X86_EFLAGS_PRIOR_IF, X86_EFLAGS_PRIOR_DF, X86_EFLAGS_PRIOR_NT, X86_EFLAGS_MODIFY_AF,
        X86_EFLAGS_MODIFY_CF, X86_EFLAGS_MODIFY_SF, X86_EFLAGS_MODIFY_ZF, X86_EFLAGS_MODIFY_PF,
        X86_EFLAGS_MODIFY_OF, X86_EFLAGS_MODIFY_TF, X86_EFLAGS_MODIFY_IF, X86_EFLAGS_MODIFY_DF,
        X86_EFLAGS_MODIFY_NT, X86_EFLAGS_MODIFY_RF, X86_EFLAGS_SET_CF, X86_EFLAGS_SET_DF,
        X86_EFLAGS_SET_IF, X86_EFLAGS_SET_OF, X86_EFLAGS_SET_SF, X86_EFLAGS_SET_ZF, X86_EFLAGS_SET_AF,
        X86_EFLAGS_SET_PF, X86_EFLAGS_RESET_OF, X86_EFLAGS_RESET_CF, X86_EFLAGS_RESET_DF,
        X86_EFLAGS_RESET_IF, X86_EFLAGS_RESET_SF, X86_EFLAGS_RESET_AF, X86_EFLAGS_RESET_TF,
        X86_EFLAGS_RESET_NT, X86_EFLAGS_RESET_PF, X86_EFLAGS_RESET_RF, X86_EFLAGS_RESET_ZF };
 
    for (uint64_t check : defChecks) {
        if (eflags & check) {
            x86_flag ret_flag;
            switch (check) {
                case X86_EFLAGS_MODIFY_AF: ret_flag = x86_flag::AF; break;
                case X86_EFLAGS_MODIFY_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_MODIFY_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_MODIFY_ZF: ret_flag = x86_flag::ZF; break;
                case X86_EFLAGS_MODIFY_PF: ret_flag = x86_flag::PF; break;
                case X86_EFLAGS_MODIFY_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_MODIFY_TF: ret_flag = x86_flag::TF; break;
                case X86_EFLAGS_MODIFY_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_MODIFY_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_MODIFY_NT: ret_flag = x86_flag::NT; break;
                case X86_EFLAGS_MODIFY_RF: ret_flag = x86_flag::RF; break;
 
                case X86_EFLAGS_PRIOR_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_PRIOR_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_PRIOR_ZF: ret_flag = x86_flag::ZF; break;
                case X86_EFLAGS_PRIOR_AF: ret_flag = x86_flag::AF; break;
                case X86_EFLAGS_PRIOR_PF: ret_flag = x86_flag::PF; break;
                case X86_EFLAGS_PRIOR_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_PRIOR_TF: ret_flag = x86_flag::TF; break;
                case X86_EFLAGS_PRIOR_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_PRIOR_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_PRIOR_NT: ret_flag = x86_flag::NT; break;
 
                case X86_EFLAGS_SET_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_SET_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_SET_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_SET_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_SET_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_SET_ZF: ret_flag = x86_flag::ZF; break;
                case X86_EFLAGS_SET_AF: ret_flag = x86_flag::AF; break;
                case X86_EFLAGS_SET_PF: ret_flag = x86_flag::PF; break;
 
                case X86_EFLAGS_RESET_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_RESET_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_RESET_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_RESET_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_RESET_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_RESET_AF: ret_flag = x86_flag::AF; break;
                case X86_EFLAGS_RESET_TF: ret_flag = x86_flag::TF; break;
                case X86_EFLAGS_RESET_NT: ret_flag = x86_flag::NT; break;
                case X86_EFLAGS_RESET_PF: ret_flag = x86_flag::PF; break;
                case X86_EFLAGS_RESET_RF: ret_flag = x86_flag::RF; break;
                case X86_EFLAGS_RESET_ZF: ret_flag = x86_flag::ZF; break;
            }
            ret.push_back(ret_flag);
        }
    }
 
    return ret;
}
std::vector<x86_fpuflag> calc_fpuflags_use(csh hCs, const Insn& ins) {
    cs_insn cinsn{};
    cs_detail cdetail{};
    disasm(hCs, ins.addr, ins.viraddr, ins.size, &cinsn, &cdetail);
    uint64_t fpuflags = cdetail.x86.fpu_flags;
    std::vector<x86_fpuflag> ret = {};
 
    std::vector<uint32_t> useChecks = {
        X86_FPU_FLAGS_TEST_C0, X86_FPU_FLAGS_TEST_C1, X86_FPU_FLAGS_TEST_C2, X86_FPU_FLAGS_TEST_C3
    };
 
    for (uint32_t check : useChecks) {
        if (fpuflags & check) {
            x86_fpuflag ret_flag;
            switch (check) {
                case X86_FPU_FLAGS_TEST_C0: ret_flag = x86_fpuflag::C0; break;
                case X86_FPU_FLAGS_TEST_C1: ret_flag = x86_fpuflag::C1; break;
                case X86_FPU_FLAGS_TEST_C2: ret_flag = x86_fpuflag::C2; break;
                case X86_FPU_FLAGS_TEST_C3: ret_flag = x86_fpuflag::C3; break;
            }
            ret.push_back(ret_flag);
        }
    }
    return ret;
}
std::vector<x86_fpuflag> calc_fpuflags_def(csh hCs, const Insn& ins) {
    cs_insn cinsn{};
    cs_detail cdetail{};
    disasm(hCs, ins.addr, ins.viraddr, ins.size, &cinsn, &cdetail);
    uint64_t fpuflags = cdetail.x86.fpu_flags;
    std::vector<x86_fpuflag> ret = {};
 
    std::vector<uint32_t> defChecks = {
        X86_FPU_FLAGS_MODIFY_C0, X86_FPU_FLAGS_MODIFY_C1, X86_FPU_FLAGS_MODIFY_C2, X86_FPU_FLAGS_MODIFY_C3,
        X86_FPU_FLAGS_RESET_C0, X86_FPU_FLAGS_RESET_C1, X86_FPU_FLAGS_RESET_C2, X86_FPU_FLAGS_RESET_C3,
        X86_FPU_FLAGS_SET_C0, X86_FPU_FLAGS_SET_C1, X86_FPU_FLAGS_SET_C2, X86_FPU_FLAGS_SET_C3
    };
 
    for (uint32_t check : defChecks) {
        if (fpuflags & check) {
            x86_fpuflag ret_flag;
            switch (check) {
                case X86_FPU_FLAGS_MODIFY_C0: ret_flag = x86_fpuflag::C0; break;
                case X86_FPU_FLAGS_MODIFY_C1: ret_flag = x86_fpuflag::C1; break;
                case X86_FPU_FLAGS_MODIFY_C2: ret_flag = x86_fpuflag::C2; break;
                case X86_FPU_FLAGS_MODIFY_C3: ret_flag = x86_fpuflag::C3; break;
 
                case X86_FPU_FLAGS_RESET_C0: ret_flag = x86_fpuflag::C0; break;
                case X86_FPU_FLAGS_RESET_C1: ret_flag = x86_fpuflag::C1; break;
                case X86_FPU_FLAGS_RESET_C2: ret_flag = x86_fpuflag::C2; break;
                case X86_FPU_FLAGS_RESET_C3: ret_flag = x86_fpuflag::C3; break;
 
                case X86_FPU_FLAGS_SET_C0: ret_flag = x86_fpuflag::C0; break;
                case X86_FPU_FLAGS_SET_C1: ret_flag = x86_fpuflag::C1; break;
                case X86_FPU_FLAGS_SET_C2: ret_flag = x86_fpuflag::C2; break;
                case X86_FPU_FLAGS_SET_C3: ret_flag = x86_fpuflag::C3; break;
            }
            ret.push_back(ret_flag);
        }
    }
    return ret;
}

运行结果图:
图片描述

关于 EFlags

对于eflags,capstone是写了一堆宏,然后让我们自行与detail->x86.eflags来判断,所以...只好自己写了各个标志位的enum

1
enum class x86_flag : uint8_t { AF, CF, SF, ZF, PF, OF, TF, IF, DF, NT, RF };

然后intel规定一条指令对eflags的影响有7种形式,分别是:(来自Volume 1 -> Appendix A EFLAGES Cross-Reference)

  • T Instruction tests flag.
  • M Instruction modifies flag (either sets or resets depending on operands).
  • 0 Instruction resets flag.
  • 1 Instruction sets flag.
  • — Instruction's effect on flag is undefined.
  • R Instruction restores prior value of flag.
  • Blank Instruction does not affect flag.
    图片描述
    按照其内容,我将 T 归为了 use
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
std::vector<x86_flag> calc_eflags_use(csh hCs, const Insn& ins) {
    cs_insn cinsn{};
    cs_detail cdetail{};
    disasm(hCs, ins.addr, ins.viraddr, ins.size, &cinsn, &cdetail);
    uint64_t eflags = cdetail.x86.eflags;
 
    std::vector<x86_flag> ret = {};
 
    // TEST:    Instruction tests flag
    std::vector<uint64_t> useChecks = { X86_EFLAGS_TEST_OF, X86_EFLAGS_TEST_SF, X86_EFLAGS_TEST_ZF,
        X86_EFLAGS_TEST_PF, X86_EFLAGS_TEST_CF, X86_EFLAGS_TEST_NT, X86_EFLAGS_TEST_DF,
        X86_EFLAGS_TEST_RF, X86_EFLAGS_TEST_IF, X86_EFLAGS_TEST_TF, X86_EFLAGS_TEST_AF
    };
     
    for (uint64_t check : useChecks) {
        if (eflags & check) {
            x86_flag ret_flag;
            switch (check){
                case X86_EFLAGS_TEST_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_TEST_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_TEST_ZF: ret_flag = x86_flag::ZF; break;
                case X86_EFLAGS_TEST_PF: ret_flag = x86_flag::PF; break;
                case X86_EFLAGS_TEST_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_TEST_NT: ret_flag = x86_flag::NT; break;
                case X86_EFLAGS_TEST_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_TEST_RF: ret_flag = x86_flag::RF; break;
                case X86_EFLAGS_TEST_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_TEST_TF: ret_flag = x86_flag::TF; break;
                case X86_EFLAGS_TEST_AF: ret_flag = x86_flag::AF; break;
            }
            ret.push_back(ret_flag);
        }
    }
    return ret;
}

而对于MR10,我则将它们 归为了 def

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
std::vector<x86_flag> calc_eflags_def(csh hCs, const Insn& ins) {
    cs_insn cinsn{};
    cs_detail cdetail{};
    disasm(hCs, ins.addr, ins.viraddr, ins.size, &cinsn, &cdetail);
    uint64_t eflags = cdetail.x86.eflags;
 
    std::vector<x86_flag> ret = {};
 
    // MODIFY:   Instruction modifies flag (either sets or resets depending on operands).
    // PRIOR:    Instruction restores prior value of flag
    // SET:      Instruction sets flag
    // RESET:    Instruction resets flag
    std::vector<uint64_t> defChecks = { X86_EFLAGS_PRIOR_OF, X86_EFLAGS_PRIOR_SF, X86_EFLAGS_PRIOR_ZF,
        X86_EFLAGS_PRIOR_AF, X86_EFLAGS_PRIOR_PF, X86_EFLAGS_PRIOR_CF, X86_EFLAGS_PRIOR_TF,
        X86_EFLAGS_PRIOR_IF, X86_EFLAGS_PRIOR_DF, X86_EFLAGS_PRIOR_NT, X86_EFLAGS_MODIFY_AF,
        X86_EFLAGS_MODIFY_CF, X86_EFLAGS_MODIFY_SF, X86_EFLAGS_MODIFY_ZF, X86_EFLAGS_MODIFY_PF,
        X86_EFLAGS_MODIFY_OF, X86_EFLAGS_MODIFY_TF, X86_EFLAGS_MODIFY_IF, X86_EFLAGS_MODIFY_DF,
        X86_EFLAGS_MODIFY_NT, X86_EFLAGS_MODIFY_RF, X86_EFLAGS_SET_CF, X86_EFLAGS_SET_DF,
        X86_EFLAGS_SET_IF, X86_EFLAGS_SET_OF, X86_EFLAGS_SET_SF, X86_EFLAGS_SET_ZF, X86_EFLAGS_SET_AF,
        X86_EFLAGS_SET_PF, X86_EFLAGS_RESET_OF, X86_EFLAGS_RESET_CF, X86_EFLAGS_RESET_DF,
        X86_EFLAGS_RESET_IF, X86_EFLAGS_RESET_SF, X86_EFLAGS_RESET_AF, X86_EFLAGS_RESET_TF,
        X86_EFLAGS_RESET_NT, X86_EFLAGS_RESET_PF, X86_EFLAGS_RESET_RF, X86_EFLAGS_RESET_ZF };
 
    for (uint64_t check : defChecks) {
        if (eflags & check) {
            x86_flag ret_flag;
            switch (check) {
                case X86_EFLAGS_MODIFY_AF: ret_flag = x86_flag::AF; break;
                case X86_EFLAGS_MODIFY_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_MODIFY_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_MODIFY_ZF: ret_flag = x86_flag::ZF; break;
                case X86_EFLAGS_MODIFY_PF: ret_flag = x86_flag::PF; break;
                case X86_EFLAGS_MODIFY_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_MODIFY_TF: ret_flag = x86_flag::TF; break;
                case X86_EFLAGS_MODIFY_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_MODIFY_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_MODIFY_NT: ret_flag = x86_flag::NT; break;
                case X86_EFLAGS_MODIFY_RF: ret_flag = x86_flag::RF; break;
 
                case X86_EFLAGS_PRIOR_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_PRIOR_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_PRIOR_ZF: ret_flag = x86_flag::ZF; break;
                case X86_EFLAGS_PRIOR_AF: ret_flag = x86_flag::AF; break;
                case X86_EFLAGS_PRIOR_PF: ret_flag = x86_flag::PF; break;
                case X86_EFLAGS_PRIOR_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_PRIOR_TF: ret_flag = x86_flag::TF; break;
                case X86_EFLAGS_PRIOR_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_PRIOR_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_PRIOR_NT: ret_flag = x86_flag::NT; break;
 
                case X86_EFLAGS_SET_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_SET_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_SET_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_SET_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_SET_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_SET_ZF: ret_flag = x86_flag::ZF; break;
                case X86_EFLAGS_SET_AF: ret_flag = x86_flag::AF; break;
                case X86_EFLAGS_SET_PF: ret_flag = x86_flag::PF; break;
 
                case X86_EFLAGS_RESET_OF: ret_flag = x86_flag::OF; break;
                case X86_EFLAGS_RESET_CF: ret_flag = x86_flag::CF; break;
                case X86_EFLAGS_RESET_DF: ret_flag = x86_flag::DF; break;
                case X86_EFLAGS_RESET_IF: ret_flag = x86_flag::IF; break;
                case X86_EFLAGS_RESET_SF: ret_flag = x86_flag::SF; break;
                case X86_EFLAGS_RESET_AF: ret_flag = x86_flag::AF; break;
                case X86_EFLAGS_RESET_TF: ret_flag = x86_flag::TF; break;
                case X86_EFLAGS_RESET_NT: ret_flag = x86_flag::NT; break;
                case X86_EFLAGS_RESET_PF: ret_flag = x86_flag::PF; break;
                case X86_EFLAGS_RESET_RF: ret_flag = x86_flag::RF; break;
                case X86_EFLAGS_RESET_ZF: ret_flag = x86_flag::ZF; break;
            }
            ret.push_back(ret_flag);
        }
    }
 
    return ret;
}

紧接类似地,用这些去创建eflags的use、def、in、out集合,和寄存器一样的流程去解析它们的use、def,去计算它们的in、out;最后再和寄存器的def/out的交集结果用&&连起来判断就可以了

1
2
3
4
5
6
if (my_set_intersection(def[i], out[i]).empty() &&
    my_set_intersection(def_flags[i], out_flags[i]).empty() &&
    my_set_intersection(def_fpuflags[i], out_fpuflags[i]).empty()) { // 判断 mbb[i] 是否为垃圾指令
    changed = true; // 准备进行新一轮迭代
    delList[i] = true; // 标记垃圾指令
}

关于 fpu flags

这是只是我顺手写的,为了保证最大兼容性,写一下这个应该也无妨

参考

【利用活跃变量分析来去掉vmp的大部分垃圾指令】 https://bbs.kanxue.com/thread-265950.htm
【C++利用活跃变量分析清除基本块中的垃圾指令】
https://bbs.kanxue.com/thread-277825.htm
【中南大学 编译原理 - 课时42: 数据流分析(2) 】
839K9s2c8@1M7s2y4Q4x3@1q4Q4x3V1k6Q4x3V1k6%4N6%4N6Q4x3X3g2T1K9h3I4A6j5X3W2D9K9g2)9J5k6h3y4G2L8g2)9J5c8Y4k6A6k6r3g2G2i4K6u0r3b7W2j5I4K9W2f1@1P5e0q4i4y4%4k6s2


[培训]内核驱动高级班,冲击BAT一流互联网大厂工作,每周日13:00-18:00直播授课

最后于 2025-1-20 08:51 被LingMo0412编辑 ,原因:
收藏
免费 2
支持
分享
最新回复 (0)
游客
登录 | 注册 方可回帖
返回