BFD Tutorial
Table of Contents
1. BFD Tutorial
1.1. Overview
bfd (binary file descriptor library) 是 binutils 中用来读写 object 文件的库, binutils 中的 gas/ld/objdump/readelf/objcopy 等都依赖 bfd 去读写 object file.
bfd 支持不同平台上的不同的 object 文件格式, 包括 elf, a.out, coff 等.
bfd 把 object file 抽像成三部分:
- section
- symtab
- reloc
bfd 主要用来读写可重定位 (relocatable) 的 object file, 所以已经 link 完成的文件中的特有的数据 (例如 elf 的 segment/phdr) 它并不关注.
1.2. Read ELF
测试用的 test.obj:
int a = 10; void foo () { printf("%d\n", a); } int main(int argc, char *argv[]) { foo(); return 0; }
// 2022-03-04 11:42 #include <assert.h> #include <bfd.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> void read_reloc(bfd *abfd) { printf("---------- %s ----------\n", __FUNCTION__); int symtab_size = bfd_get_symtab_upper_bound(abfd); assert(symtab_size > 0); asymbol **symbol_table = (asymbol **)malloc(symtab_size); bfd_canonicalize_symtab(abfd, symbol_table); asection *text_section = bfd_get_section_by_name(abfd, ".text"); int reloc_size = bfd_get_reloc_upper_bound(abfd, text_section); arelent **reloc_table = (arelent **)malloc(reloc_size); int num_reloc = bfd_canonicalize_reloc(abfd, text_section, reloc_table, symbol_table); printf("reloc_size: %d, num_reloc: %d\n", reloc_size, num_reloc); for (int i = 0; i < num_reloc; i++) { arelent *reloc = reloc_table[i]; printf( "offset: 0x%lx rel: %-20s symbol: %-10s addend: %ld\n", reloc->address, reloc->howto->name, (*(reloc->sym_ptr_ptr))->name, reloc->addend); } } void read_symtab(bfd *abfd) { printf("---------- %s ----------\n", __FUNCTION__); int symtab_size = bfd_get_symtab_upper_bound(abfd); assert(symtab_size > 0); asymbol **symbol_table = (asymbol **)malloc(symtab_size); int num_symbols = bfd_canonicalize_symtab(abfd, symbol_table); printf("symtabl size: %d, num of symbols: %d\n", symtab_size, num_symbols); symbol_info info; for (int i = 0; i < num_symbols; i++) { if (symbol_table[i]->section == NULL) { continue; } bfd_symbol_info(symbol_table[i], &info); printf( "section: %-20s, symbol: %-25s -> 0x%lx, type: %x\n", symbol_table[i]->section->name, info.name, info.value, info.type); } } void read_section(bfd *abfd) { asection *section = bfd_get_section_by_name(abfd, ".text"); printf(".text vma: 0x%lx\n", section->vma); char *buffer = (char *)malloc(section->size); bfd_get_section_contents(abfd, section, buffer, 0, section->size); for (int i = 0; i < section->size; i++) { printf("%.2x ", buffer[i] & 0xff); } printf("\n"); } int main(int argc, char *argv[]) { bfd_init(); /* bfd_openr 用来读 */ /* bfd_openw 用来写 */ bfd *abfd = bfd_openr("/tmp/test.obj", NULL); printf("target: %s\n", bfd_get_target(abfd)); assert(abfd != NULL); /* abfd 支持三种格式, 其中 elf 的 executable, o, so 都算 bfd_object */ /* * if (bfd_check_format(abfd, bfd_archive)) { * printf("found archive\n"); * } * if (bfd_check_format(abfd, bfd_core)) { * printf("found core\n"); * } */ assert(bfd_check_format(abfd, bfd_object) != 0); read_symtab(abfd); read_reloc(abfd); read_section(abfd); return 0; }
target: elf64-x86-64 ---------- read_symtab ---------- symtabl size: 120, num of symbols: 14 section: *ABS* , symbol: C-src-ZDhXTO.c -> 0x0, type: 61 section: .text , symbol: .text -> 0x0, type: 74 section: .data , symbol: .data -> 0x0, type: 64 section: .bss , symbol: .bss -> 0x0, type: 62 section: .rodata , symbol: .rodata -> 0x0, type: 72 section: .note.GNU-stack , symbol: .note.GNU-stack -> 0x0, type: 6e section: .note.gnu.property , symbol: .note.gnu.property -> 0x0, type: 72 section: .eh_frame , symbol: .eh_frame -> 0x0, type: 72 section: .comment , symbol: .comment -> 0x0, type: 6e section: .data , symbol: a -> 0x0, type: 44 section: .text , symbol: foo -> 0x0, type: 54 section: *UND* , symbol: _GLOBAL_OFFSET_TABLE_ -> 0x0, type: 55 section: *UND* , symbol: printf -> 0x0, type: 55 section: .text , symbol: main -> 0x24, type: 54 ---------- read_reloc ---------- reloc_size: 40, num_reloc: 4 offset: 0xa rel: R_X86_64_PC32 symbol: a addend: -4 offset: 0x13 rel: R_X86_64_PC32 symbol: .rodata addend: -4 offset: 0x1d rel: R_X86_64_PLT32 symbol: printf addend: -4 offset: 0x3d rel: R_X86_64_PLT32 symbol: foo addend: -4 .text vma: 0x0 f3 0f 1e fa 55 48 89 e5 8b 05 00 00 00 00 89 c6 48 8d 3d 00 00 00 00 b8 00 00 00 00 e8 00 00 00 00 90 5d c3 f3 0f 1e fa 55 48 89 e5 48 83 ec 10 89 7d fc 48 89 75 f0 b8 00 00 00 00 e8 00 00 00 00 b8 00 00 00 00 c9 c3
1.2.1. API
1.2.1.1. bfd_openr
读文件
1.2.1.2. bfd_check_format
bfd 支持三种 format:
- bfd_object
- bfd_archive
- bfd_core
其中 elf executable, so, o 都属于 bfd_object
1.2.1.3. bfd_get_symtab_upper_bound
获得 symtab 的大小, 用户代码需要据此分配空间以便后续读出 symtab
1.2.1.4. bfd_canonicalize_symtab
读 symtab
asymbol 的主要成员有:
- name
value
symbol value 是指它在其 section 中的 offset
section
symbol 所属的 section, 重定位时需要根据 section 和 value 确定 symbol 的地址
1.2.1.5. bfd_get_section_by_name
查找 section
1.2.1.6. bfd_get_section_contents
读取 section 的数据
1.2.1.7. bfd_get_reloc_upper_bound
和 symbol 类似, 读取 reloc 的大小
1.2.1.8. bfd_canonicalize_reloc
读取 reloc
arelen 的主要成员有:
address
表示需要 patch 的地址
sym_ptr_ptr
要查找的 symbol
howto
reloc 的类型, 决定了 symbol 的真实地址要如何写到 address
addend
得到 symbol 的真实地址后需要加上 addend 做为最终的地址, 例如 array 的索引就是一种常见的 addend
1.3. Write ELF
用 bfd 生成 o 文件: test_write_section.o, test_write_reloc.o
// 2022-03-04 13:31 #include <assert.h> #include <bfd.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> void write_reloc() { // int a = 10; // int foo() { return a; } bfd* abfd = bfd_openw("/tmp/test_write_reloc.o", "elf64-x86-64"); bfd_set_arch_mach(abfd, bfd_arch_i386, bfd_mach_x86_64); assert(abfd != NULL); bfd_set_format(abfd, bfd_object); asection* data_section = bfd_make_section_old_way(abfd, ".data"); asection* text_section = bfd_make_section_old_way(abfd, ".text"); /* two symbol: foo & a */ asymbol* symbol_table[3]; memset(symbol_table, 0, sizeof(symbol_table)); symbol_table[0] = bfd_make_empty_symbol(abfd); symbol_table[0]->name = "foo"; symbol_table[0]->section = text_section; symbol_table[0]->flags = BSF_GLOBAL | BSF_FUNCTION; symbol_table[0]->value = 0; symbol_table[1] = bfd_make_empty_symbol(abfd); symbol_table[1]->name = "a"; symbol_table[1]->section = data_section; symbol_table[1]->flags = BSF_GLOBAL | BSF_OBJECT; symbol_table[1]->value = 0; bfd_set_symtab( abfd, symbol_table, sizeof(symbol_table) / sizeof(symbol_table[0]) - 1); /* text */ char buffer[] = {0xf3, 0x0f, 0x1e, 0xfa, 0x55, 0x48, 0x89, 0xe5, 0x8b, 0x05, 0x00, 0x00, 0x00, 0x00, 0x5d, 0xc3}; int size = sizeof(buffer) / sizeof(buffer[0]); bfd_set_section_flags( text_section, SEC_CODE | SEC_HAS_CONTENTS | SEC_RELOC); bfd_set_section_size(text_section, size); /* data */ bfd_set_section_flags(data_section, SEC_DATA | SEC_HAS_CONTENTS); bfd_set_section_size(data_section, 4); int x = 0xa; bfd_set_section_contents(abfd, text_section, buffer, 0, size); bfd_set_section_contents(abfd, data_section, (char*)&x, 0, 4); /* reloc */ /* one reloc: a */ arelent* reloc_table[2]; memset(reloc_table, 0, sizeof(reloc_table)); reloc_table[0] = bfd_alloc(abfd, sizeof(arelent)); reloc_table[0]->address = 0xa; reloc_table[0]->addend = -4; /* R_X86_64_PC32 */ reloc_table[0]->howto = bfd_reloc_type_lookup(abfd, BFD_RELOC_32_PCREL); reloc_table[0]->sym_ptr_ptr = &symbol_table[1]; bfd_set_reloc( abfd, text_section, reloc_table, sizeof(reloc_table) / sizeof(reloc_table[0]) - 1); bfd_close(abfd); } void write_symbol() { bfd* abfd = bfd_openw("/tmp/test_write_symbol.o", "elf64-x86-64"); bfd_set_arch_mach(abfd, bfd_arch_i386, bfd_mach_x86_64); assert(abfd != NULL); bfd_set_format(abfd, bfd_object); asymbol* symbol_table[2]; memset(symbol_table, 0, sizeof(symbol_table)); symbol_table[0] = bfd_make_empty_symbol(abfd); symbol_table[0]->name = "hello"; symbol_table[0]->section = bfd_make_section_old_way(abfd, ".text"); symbol_table[0]->flags = BSF_GLOBAL; symbol_table[0]->value = 0; bfd_set_symtab( abfd, symbol_table, sizeof(symbol_table) / sizeof(symbol_table[0]) - 1); bfd_close(abfd); } void write_section() { /* int foo() {return 10;} */ bfd* abfd = bfd_openw("/tmp/test_write_section.o", "elf64-x86-64"); bfd_set_arch_mach(abfd, bfd_arch_i386, bfd_mach_x86_64); assert(abfd != NULL); bfd_set_format(abfd, bfd_object); /* write symtab */ asymbol* symbol_table[2]; memset(symbol_table, 0, sizeof(symbol_table)); asection* text_section = bfd_make_section_old_way(abfd, ".text"); symbol_table[0] = bfd_make_empty_symbol(abfd); symbol_table[0]->name = "foo2"; symbol_table[0]->section = text_section; symbol_table[0]->flags = BSF_GLOBAL | BSF_FUNCTION; symbol_table[0]->value = 0; bfd_set_symtab( abfd, symbol_table, sizeof(symbol_table) / sizeof(symbol_table[0]) - 1); /* write section */ char buffer[] = {0xf3, 0x0f, 0x1e, 0xfa, 0x55, 0x48, 0x89, 0xe5, 0xb8, 0x0a, 0x00, 0x00, 0x00, 0x5d, 0xc3}; int size = sizeof(buffer) / sizeof(buffer[0]); bfd_set_section_flags(text_section, SEC_CODE | SEC_HAS_CONTENTS); bfd_set_section_size(text_section, size); bfd_set_section_contents(abfd, text_section, buffer, 0, size); bfd_close(abfd); } int main(int argc, char* argv[]) { write_symbol(); write_section(); write_reloc(); return 0; }
readelf -a /tmp/test_write_reloc.o|tail -n 16
Relocation section '.rela.text' at offset 0xd8 contains 1 entry: Offset Info Type Sym. Value Sym. Name + Addend 00000000000a 000400000002 R_X86_64_PC32 0000000000000000 a - 4 The decoding of unwind sections for machine type Advanced Micro Devices X86-64 is not currently supported. Symbol table '.symtab' contains 5 entries: Num: Value Size Type Bind Vis Ndx Name 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND 1: 0000000000000000 0 SECTION LOCAL DEFAULT 1 2: 0000000000000000 0 SECTION LOCAL DEFAULT 2 3: 0000000000000000 0 FUNC GLOBAL DEFAULT 2 foo 4: 0000000000000000 0 OBJECT GLOBAL DEFAULT 1 a No version information found in this file.
链接手动生成的 test_write_reloc.o 和 test_write_section.o
// 2022-03-05 19:48 #include <stdio.h> #include <stdlib.h> /* foo 定义在 test_write_reloc.o */ extern int foo(); /* foo2 定义在 test_write_section.o */ extern int foo2(); int main(int argc, char *argv[]) { printf("%d %d\n", foo(), foo2()); }
10 10
1.3.1. API
1.3.1.1. bfd_openw
bfd_openw("test_write_reloc.o", "elf64-x86-64"), 创建一个 x86_64 elf 文件
1.3.1.2. bfd_set_arch_mach
1.3.1.3. bfd_set_format
1.3.1.4. bfd_make_section_old_way
查找或创建一个 section
1.3.1.5. bfd_make_empty_symbol
创建一个 asymbol. 创建 symbol 中 flags 非常重要, 例如需要指定 BSF_GLOBAL 后该 symbol 才能被 linker 找到.
1.3.1.6. bfd_set_symtab
设置 symtab, 其中 symtab[-1] 需要为 NULL, 且 size 要排除该项
1.3.1.7. bfd_set_section_flags
例如 .text 需要设置为 SEC_CODE | SEC_HAS_CONTENTS, 缺少某些 flag 会导致某些功能不工作, 例如, 如果 SEC_HAS_CONTENTS 没有置位, 则后面 bfd_set_section_contents 会直接失败
1.3.1.8. bfd_set_section_size
设置 section 大小
1.3.1.9. bfd_set_section_contents
向 section 写数据. bfd 代码里对上述函数调用的顺序有要求, 列如 bfd_set_section_contents 后无法再 bfd_set_section_size (即使操作的是另一个 section)
1.3.1.10. bfd_reloc_type_lookup
查找 reloc howto
1.3.1.11. bfd_set_reloc
写 reloc
Backlinks
ELF (ELF > ELF Section > static linker 相关 > rel.text): BFD 操作 reloc 相关的功能就是在操作 rel.text
GDB Target Arch (GDB Target Arch > porting new arch > porting): gdb 的 disass 依赖 opcodes. core, exec 数据的读取依赖 bfd
Linker Relaxation (Linker Relaxation > impls): linker relaxation 的实现在 BFD 中, 以 riscv 为例, 在 elfnn-riscv.c 中, 具体的:
Linker Relocation (Linker Relocation): 编译器生成 object file 时, symbol 地址在 assembling 阶段无法确定, assembler 只能 通过 BFD 写入重定位信息到 rel.text, 后面需要 link edtitor 进行 relocation
binutils (binutils > BFD): BFD Tutorial