From 0b6fc8561226e3cb97c780512ee803baf3cf9462 Mon Sep 17 00:00:00 2001 From: Garrett Dickinson Date: Sun, 5 Feb 2023 23:11:57 -0600 Subject: [PATCH] Rework ELF data into structs, break up main.rs --- notes/jan31.md | 13 ++++++ src/elf.rs | 88 +++++++++++++++++++++++++++++++++++ src/main.rs | 103 ++++++++++++++++++----------------------- src/util.rs | 58 +++++++++++++++++++++++ testing/hello.o | Bin 0 -> 1504 bytes testing/hello.stripped | Bin 0 -> 14472 bytes 6 files changed, 205 insertions(+), 57 deletions(-) create mode 100644 notes/jan31.md create mode 100644 src/elf.rs create mode 100644 src/util.rs create mode 100644 testing/hello.o create mode 100755 testing/hello.stripped diff --git a/notes/jan31.md b/notes/jan31.md new file mode 100644 index 0000000..46e80eb --- /dev/null +++ b/notes/jan31.md @@ -0,0 +1,13 @@ +# Lecture notes 1/31/2023 + +## Setup +- Load file into byte array +- check if ELF +- parse file header +- get offset and length of text section + - virtual addresses and size +- get function addresses + +## Disassembly +- Use something like [Capstone]() + - feed address and offset in linear sweep to find instruction \ No newline at end of file diff --git a/src/elf.rs b/src/elf.rs new file mode 100644 index 0000000..735a05d --- /dev/null +++ b/src/elf.rs @@ -0,0 +1,88 @@ + +// Generic ELF information offsets. + +pub const MAGIC_NUMBER: &[u8] = &[0x7F,0x45,0x4C,0x46]; +pub const ARCH_OFFSET: u8 = 0x04; // x86 or x64 indiicator; 1 byte +pub const ENDIAN_OFFSET: u8 = 0x05; // Endian offset (1 - little, 2 - big); 1 byte +pub const ABI_OFFSET: u8 = 0x07; // ABI identifier; 1 byte +pub const TYPE_OFFSET: u8 = 0x10; // Object type identifier; 2 bytes +pub const MACHINE_OFFSET: u8 = 0x12; // Instruction set type; 2 bytes + + +// Offsets for file header entry points and table inforamtion. +// Arrayed offset are split by architecture: +// 0 : x86 +// 1 : x86_64 + +pub const ENTRYPOINT_OFFSET: u8 = 0x18; +pub const PHOFF_OFFSET: [u8; 2] = [0x1C, 0x20]; // Program header table pointer; 2 bytes +pub const SHOFF_OFFSET: [u8; 2] = [0x20, 0x28]; // Section table pointer; 2 bytes +pub const EHSIZE_OFFSET: [u8; 2] = [0x28, 0x34]; // Program header table entry size pointer; 2 bytes +pub const PHENTSIZE_OFFSET: [u8; 2] = [0x28, 0x34]; // Section table pointer; 2 bytes +pub const PHNUM_OFFSET: [u8; 2] = [0x2C, 0x38]; // Program header table number of entries pointer; 2 bytes +pub const SHENTSIZE_OFFSET: [u8; 2] = [0x2E, 0x3A]; // Size of section header table; 2 bytes +pub const SHNUM_OFFSET: [u8; 2] = [0x30, 0x3C]; // Number of entries in section table pointer; 2 bytes +pub const SHSTRNDX_OFFSET: [u8; 2] = [0x32, 0x3E]; // Index of section header that contains names; 2 bytes + + +#[derive(Debug)] +pub enum ArchitecureType { + X86, + X86_64, + Unknown +} + + +#[derive(Debug)] +pub enum EndianType { + Big, + Little, + Unknown +} + + +#[derive(Debug)] +pub struct FileHeader { + pub arch: ArchitecureType, + pub is_x86_64: bool, + pub endian: EndianType, + pub abi: u8, + pub elf_type: u8, + pub isa: u8, + pub entryoff: u8, + pub phoff: u8, + pub shoff: u8, + pub ehsize: u8, + pub phentsize: u8, + pub phnum: u8, + pub shentsize: u8, + pub shnum: u8, + pub shstrndx: u8 +} + + +#[derive(Debug)] +pub struct ProgramHeader { + pub program_type: u8, + pub flags: u8, + pub offset: u8, + pub vaddr: u8, + pub paddr: u8, + pub filesz: u8, + pub align: u8, +} + + +#[derive(Debug)] +pub struct SectionHeader { + pub name: u8, + pub section_type: u8, + pub flags: u8, + pub addr: u8, + pub offset: u8, + pub size: u8, + pub link: u8, + pub info: u8, + pub addralign: u8, + pub entsize: u8 +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 80c2866..7474916 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,38 +8,10 @@ use std::path; use std::env; use std::fs; use std::process::exit; -use std::collections::HashMap; - -// ELF Header Sizes - -const ELF_FILE_HEADER_LENGTH: [u8; 2] = [0x34, 0x40]; - - -// Generic ELF information offsets. - -const ELF_MAGIC_NUMBER: &[u8] = &[0x7F,0x45,0x4C,0x46]; -const ELF_ARCH_OFFSET: u8 = 0x04; // x86 or x64 indiicator; 1 byte -const ELF_ENDIAN_OFFSET: u8 = 0x05; // Endian offset (1 - little, 2 - big); 1 byte -const ELF_ABI_OFFSET: u8 = 0x07; // ABI identifier; 1 byte -const ELF_TYPE_OFFSET: u8 = 0x10; // Object type identifier; 2 bytes -const ELF_MACHINE_OFFSET: u8 = 0x12; // Instruction set type; 2 bytes - - -// Offsets for file header entry points and table inforamtion. -// Arrayed offset are split by architecture: -// 0 : x86 -// 1 : x86_64 - -const ELF_ENTRYPOINT_OFFSET: u8 = 0x18; -const ELF_PHOFF_OFFSET: [u8; 2] = [0x1C, 0x20]; // Program header table pointer; 2 bytes -const ELF_SHOFF_OFFSET: [u8; 2] = [0x20, 0x28]; // Section table pointer; 2 bytes -const ELF_EHSIZE_OFFSET: [u8; 2] = [0x28, 0x34]; // Program header table entry size pointer; 2 bytes -const ELF_PHENTSIZE_OFFSET: [u8; 2] = [0x28, 0x34]; // Section table pointer; 2 bytes -const ELF_PHNUM_OFFSET: [u8; 2] = [0x2C, 0x38]; // Program header table number of entries pointer; 2 bytes -const ELF_SHENTSIZE_OFFSET: [u8; 2] = [0x2E, 0x3A]; // Size of section header table; 2 bytes -const ELF_SHNUM_OFFSET: [u8; 2] = [0x30, 0x3C]; // Number of entries in section table pointer; 2 bytes -const ELF_SHSTRNDX_OFFSET: [u8; 2] = [0x32, 0x3E]; // Index of section header that contains names; 2 bytes +// Import modules +mod elf; +mod util; fn main() { @@ -49,14 +21,14 @@ fn main() { // Grab our filepath from our options if &args.len() < &2 { // No file given, terminate - println!("[Error] Please provied a file to open..."); + println!("[Error] Please provide a filepath to open"); exit(0); } let file_path: &String = &args[1]; if path::Path::new(file_path).exists() { - println!("File exists, reading '{}'", file_path); + println!("File exists, reading '{}'...", file_path); let contents: Result, std::io::Error> = fs::read(file_path); @@ -64,16 +36,18 @@ fn main() { let bytes: &Vec = &contents.expect(""); let magic_num: &[u8] = &bytes[0..4]; - if magic_num == ELF_MAGIC_NUMBER { + if magic_num == elf::MAGIC_NUMBER { println!("Found ELF Magic Number..."); println!("Parsing File Header..."); // Build the File Header data structure - let file_header_map = build_fild_header(bytes); + let file_header: elf::FileHeader = build_file_header(bytes); - for (key, value) in &file_header_map { - println!("{}: {}", key, value); - } + // Build Program Header data structure + //let program_header: elf::ProgramHeader = build_program_header(bytes, file_header.is_x86_64); + + println!("{:?}", file_header); + //println!("{:?}", program_header); } else { println!("[Error] Could not find magic number, is this an ELF executable?") @@ -88,29 +62,44 @@ fn main() { } -fn build_fild_header(data: &Vec) -> HashMap{ - let mut file_header: HashMap = HashMap::new(); +fn build_file_header(data: &Vec) -> elf::FileHeader { // Determine x86 or x64 architecture // 0 : x86 // 1 : x64 - let arch: u8 = (data[ELF_ARCH_OFFSET as usize] - 1).into(); + let arch: usize = (data[elf::ARCH_OFFSET as usize] - 1).into(); - file_header.insert("e_arch".to_string(), data[ELF_ARCH_OFFSET as usize]); - file_header.insert("e_endian".to_string(), data[ELF_ENDIAN_OFFSET as usize]); - file_header.insert("e_abi".to_string(), data[ELF_ABI_OFFSET as usize]); - file_header.insert("e_type".to_string(), data[ELF_TYPE_OFFSET as usize]); - file_header.insert("e_machine".to_string(), data[ELF_MACHINE_OFFSET as usize]); - - file_header.insert("e_entry".to_string(), data[ELF_ENTRYPOINT_OFFSET as usize]); - file_header.insert("e_phoff".to_string(), data[ELF_PHOFF_OFFSET[arch as usize] as usize]); - file_header.insert("e_shoff".to_string(), data[ELF_SHOFF_OFFSET[arch as usize] as usize]); - file_header.insert("e_ehsize".to_string(), data[ELF_EHSIZE_OFFSET[arch as usize] as usize]); - file_header.insert("e_phentsize".to_string(), data[ELF_PHENTSIZE_OFFSET[arch as usize] as usize]); - file_header.insert("e_phnum".to_string(), data[ELF_PHNUM_OFFSET[arch as usize] as usize]); - file_header.insert("e_shentsize".to_string(), data[ELF_SHENTSIZE_OFFSET[arch as usize] as usize]); - file_header.insert("e_shnum".to_string(), data[ELF_SHNUM_OFFSET[arch as usize] as usize]); - file_header.insert("e_shstrndx".to_string(), data[ELF_SHSTRNDX_OFFSET[arch as usize] as usize]); + let file_header: elf::FileHeader = elf::FileHeader { + arch: util::parse_architecture(data[elf::ARCH_OFFSET as usize]), + is_x86_64: arch != 0, + endian: util::parse_endian(data[elf::ENDIAN_OFFSET as usize]), + abi: data[elf::ABI_OFFSET as usize], + elf_type: data[elf::TYPE_OFFSET as usize], + isa: data[elf::MACHINE_OFFSET as usize], + entryoff: data[elf::ENTRYPOINT_OFFSET as usize], + phoff: data[elf::PHOFF_OFFSET[arch] as usize], + shoff: data[elf::SHOFF_OFFSET[arch] as usize], + ehsize: data[elf::EHSIZE_OFFSET[arch] as usize], + phentsize: data[elf::PHENTSIZE_OFFSET[arch] as usize], + phnum: data[elf::PHNUM_OFFSET[arch] as usize], + shentsize: data[elf::SHENTSIZE_OFFSET[arch] as usize], + shnum: data[elf::SHNUM_OFFSET[arch] as usize], + shstrndx: data[elf::SHSTRNDX_OFFSET[arch] as usize], + }; return file_header; -} \ No newline at end of file +} + + +// fn build_program_header(data: &Vec, is_x86_64: bool) -> elf::ProgramHeader { + +// let arch: i8 = if is_x86_64 { 1 } else { 0 }; + +// let mut program_header: elf::ProgramHeader; + +// // let mut program_header: elf::ProgramHeader = elf::ProgramHeader { +// // arch: util::parse_architecture(data[elf::ARCH_OFFSET as usize]) +// // }; + +// return program_header; +// } \ No newline at end of file diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..83c3735 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,58 @@ +use crate::elf::{self, EndianType, ArchitecureType}; + + +pub fn parse_endian(endian: u8) -> elf::EndianType { + match endian { + 0x00 => return EndianType::Big, + 0x01 => return EndianType::Little, + _ => return EndianType::Unknown + } +} + + +pub fn parse_architecture(arch: u8) -> elf::ArchitecureType { + match arch { + 0x01 => return ArchitecureType::X86, + 0x02 => return ArchitecureType::X86_64, + _ => return ArchitecureType::Unknown + } +} + + +pub fn parse_abi(abi: u8) -> String { + match abi { + 0x00 => "SystemV".to_string(), + 0x01 => "HP-UX".to_string(), + 0x02 => "NetBSD".to_string(), + 0x03 => "Linux".to_string(), + 0x04 => "GNU Hurd".to_string(), + 0x06 => "Solaris".to_string(), + 0x07 => "AIX".to_string(), + 0x08 => "IRIX".to_string(), + 0x09 => "FreeBSD".to_string(), + 0x0C => "OpenBSD".to_string(), + 0x0D => "OpenVMS".to_string(), + + // Match unknown ABI + _ => "Unknown".to_string() + } +} + + +pub fn parse_isa(isa: u8) -> String { + match isa { + 0x03 => "Intel x86".to_string(), + 0x3E => "AMD x86-64".to_string(), + + // Matching just for fun, maybe future functionality? o.O + 0x14 => "PowerPC".to_string(), + 0x15 => "PowerPC 64-bit".to_string(), + 0x32 => "IA_64".to_string(), + 0x28 => "Arm".to_string(), + 0xB7 => "Arm 64-bit".to_string(), + + // Match unknown ISA + _ => "Unknown".to_string() + } +} + diff --git a/testing/hello.o b/testing/hello.o new file mode 100644 index 0000000000000000000000000000000000000000..fa94a8e95f7266af6c38f23ec1e03e980b2e7d28 GIT binary patch literal 1504 zcmbtTPiqrV5T8wJtJuaU6@-ec2%^wFveiTNAi+?ZL-9}uf*`s{x2Brijhh!}J?KsB zLFg9{`Y}8fJb3j>2)&AzB0`74&4&H8IsiI$tqyZ()K zXO!rX9|Y=_s?P_bTn*kVEU5bJn+8UXaXvLZ$iDorP29#FurWTMnSzn`dX>D*nm0Q& zd3uL0aG7x&&^3IPsA8L7_0KZ zeg2>xrlh(QhZ_kEvW-Nq!{@AV#Q1UNUxx)|>Vo?QhrhKlA~oEKb=YC9vx4_T z%A$U#bhN2St%W)y)fo;+Woe;w(>RV2wk4^K6z^busKf3{qd!kZYLKOaDAQXUgG^EH z>3SB%k?G|Bn9t$Z$%K=pVRV_RnBi;FEGw8%!v{_)RByx?WEQR@I7rk#S( l6m!&Ztsx3ZHx0~)HS=j8x-C3Jd z)M$JV+G72qVnO=D9|{!}6!JqQQt=656`}qRTCtL%)mXK}2N}T$W&$nrR zn+Eb~IT)FVGmW4H88R|&F1ef&A^|En4(64yL-a5X$pAq3i{nL^ zOcM3?Mf^!rV35bDjMeLavDTS*tJGzSAb6}Y?>*tYCp_S!9&D*U zP|B+s(y8v2=7w~#KAp-G2I~h`wA8mWTluWjjXcw&(RXpS`w|V>oZ-a10Cghbh83<_E{}MV88j6z6)+Vr6)+Vr6)+Vr6*%=P@MF~lzuF_;mfJhZK5tOUeq`K@ zcpuv%Z#OAg)2APBduRr)U-#BxE1z})~`vKIG2udB1yY<%Dy1IMyw?f& z*(OWL_M246V0FooiuvWc>A@A-`5FGy_nvnd2dtg%KT5lVn*5A+FWIAns@C?C4g0+GG zp{X%hkBGI7i09vbEWA#Ue>Fd?v|3!2hhjjm+0oH?uJ8TU8NDezsqGR1@M|=CK=;fQc3mLZ% zjm4~GR%3mv;H$Cwo0_b~=A~j*%F6fU-JF}~Ru(PMb#epB%4A)~>dh3afn0XL$+<%T zQ+FXn%c)aIFeuMzwC5ftmrrFgGn6>_a!xwI1ZfSV zUCts!dum#}S!v{*t;%wpK~iaraf+g|$%LCwmeUvS$tC)ocwdrwgQok4Cvv$2rOphr zX{bbhiaOKu?tETZTeJQBPKN)6_}9YM1J9-6JeP@s_akuqVIa&q=x0-d=REjxt^Y?n z<~el!ZzjkOzX!no*91i_^P*`bwSxSel0Pt7jMR-Vzk|#t=$b+PxUT>)KGrIS^S_b! zTto12-vK_ahxK#h+JTSj|2DF5%HZQZ2E=_!x|y*in{5f&_?`eh?gKzt65%G{0bnCFX2TO_*WJ!y1}t5LB!bFqxb%U1Ztj+Kyijt^#>3h B`OyFX literal 0 HcmV?d00001