View difference between Paste ID: hh8wtmXf and
SHOW: | | - or go back to the newest paste.
1-
1+
#include <unistd.h>
2
#include <inttypes.h>
3
#include <stdio.h>
4
#include <fcntl.h>
5
#include <err.h>
6
#include <elf.h>
7
8
9
#define MESSAGE "hello, world\n"
10
#define MESSAGE_SZ (sizeof(MESSAGE) - 1)
11
12
/*
13
 * Smallest possible (?) proper hello world for ELF amd64 linux.
14
 *
15
 * The code itself is:
16
 *	movb $13, %dl
17
 *	lea 2f(%rip), %rsi
18
 *	inc %eax
19
 *	mov %eax, %edi
20
 * 1:	syscall
21
 *	movb $60, %al
22
 *	jmp 1b
23
 * 2:  	.string	"hello, world\n"
24
 * 
25
 * The only wrong thing is the exit code. We depend on the kernel zeroing registers for us,
26
 * but that should be a safe assumption because the kernel doesn't want to leak registers
27
 * to userland anyway (unless there's some ABI I'm not aware of).
28
 *
29
 * The code is stuffed into unused fields in the ELF headers with relative jumps gluing it
30
 * together. The string is stuffed into the nicely contiguous space provided to us by e_shoff
31
 * (section headers are irrelevant for executables), e_flags e_ehsize, that is just perfectly
32
 * fits the One True Hello World Message.
33
 *
34
 * The program header overlaps with the elf header with some trickery involving a good guess
35
 * about what the kernel actually does with p_flags (see comment for e_unused4).
36
 *
37
 * I am not sure since when the Linux kernel supports ET_DYN binaries. Maybe it always had?
38
 * ET_DYN allows us to pull off a horrible horrible trick (see comment for e_unused1).
39
 * This wouldn't work without ET_DYN unless we can find some way to load the address of the
40
 * message into rsi with some shorter instruction.
41
 */
42
43
#define OVERLAP
44
45
int
46
main(int argc, char **argv)
47
{
48
#define RIP_OFFSET(from_field, to_field) ((char *)&blob.to_field - (char *)&blob.from_field)
49
	struct blob {
50
		unsigned char e_ident[8];
51
		unsigned char e_unused1[8];
52
		uint16_t e_type;
53
		uint16_t e_machine;
54
		unsigned char e_unused2[4];		/* e_version */
55
		uint64_t e_entry;
56
		uint64_t e_phoff;
57
		unsigned char e_unused3[8 + 4 + 2];	/* e_shoff + e_flags + e_ehsize */
58
		uint16_t e_phentsize;
59
		uint16_t e_phnum;
60
		unsigned char e_unused4[2 + 2 + 2];	/* e_shentsize + e_shnum + e_shstrndx */
61
62
#ifndef OVERLAP
63
		uint32_t p_type;
64
		uint32_t p_flags;
65
#else
66
#define p_type e_phnum
67
#endif
68
		uint64_t p_offset;
69
		uint64_t p_vaddr;
70
		unsigned char p_unused1[8];
71
		uint64_t p_filesz;
72
		uint64_t p_memsz;
73
		uint64_t p_align;
74
	} __attribute__((__packed__)) blob = {
75
		.e_ident = { 0x7f, 'E', 'L', 'F', ELFCLASS64, ELFDATA2LSB, EV_CURRENT, 0x00 },
76
		.e_unused1 = {
77
			/*
78
			 * First 8 bytes of e_ident must be what they are. The next 8 bytes
79
			 * are free for all so we make that our entry point.  We use
80
			 * "lea MESSAGEOFFSET(%rip), %rsi"  to load the address for write(2).
81
			 * lea is a 7 byte instruction. The 8th byte is a relative jump
82
			 * instruction with the jump offset conveniently provided to us by
83
			 * e_type, which is ET_DYN = 3. This lets us skip e_machine and
84
			 * execute the next instructions from e_version.
85
			 */
86
			0x48, 0x8d, 0x35, RIP_OFFSET(e_unused1[7], e_unused3), 0x00, 0x00, 0x00,
87
			0xeb
88
		},
89
		.e_type = ET_DYN,
90
		/* 0x03, 0x00 */
91
		.e_machine = EM_X86_64,
92
		/* 0x3e, 0x00 */
93
		.e_unused2 = {
94
			/*
95
			 * mov $0xe,%dl   - length of string for write(2)
96
			 * jmp <to rest of code>
97
			 */
98
			0xb2, MESSAGE_SZ, 				
99
			0xeb, RIP_OFFSET(e_unused2[4], e_unused4[2])
100
		},
101
		.e_entry = 8,
102
		.e_phoff = ((char *)&blob.p_type - (char *)&blob),
103
		.e_unused3 = MESSAGE,
104
		.e_phentsize = sizeof(Elf64_Phdr),
105
		.e_phnum = 1,
106
		/* 0x01 0x00 */
107
		.e_unused4 = {
108
			0x00, 0x00, 		/* Must be zero if OVERLAP */
109
			0xff, 0xc0,		/* inc %eax - syscall number for read is 1. */
110
			/* 
111
			 * The original code was:
112
			 *  mov $1, %al
113
			 *  mov %eax, %edi
114
			 * This set up the syscall number to 1 (write) and fd to 1 (stdout)
115
			 * and could be done in 4 bytes. But we can't use mov here, registers
116
			 * after exec come pre-zeroed, so we inc %eax instead. This is because
117
			 * the mov instruction is 0xb0, but we need the lowest bit of it set so
118
			 * we can make this and the phdr overlap.
119
			 *
120
			 * The lowest bit must be set because it overlaps with the p_flags in
121
			 * phdr and that specifies that the loaded region is executable. We don't
122
			 * need to specify readability (even though we do read from the region
123
			 * since the message is in it) because amd64 doesn't have an executable
124
			 * but not readable PTE. Unfortunately "inc" also makes the region
125
			 * writeable, so we violate W^X. Looking for a better instruction.
126
			 */
127
			0xeb, RIP_OFFSET(e_unused4[6], p_unused1)
128
		},
129
#ifndef OVERLAP
130
		/* This can overlap with .e_phnum. */
131
		.p_type = PT_LOAD,
132
		/* 0x01 0x00 0x00 0x00 - must be this */
133
		.p_flags = PF_R|PF_X,
134
		/* 0x03 0x00 0x00 0x00 - can be anything as long as the lowest bit is set. */
135
#endif
136
		.p_offset = 0,
137
		/* 0x0000000000000000 - must be 0 */
138
		.p_vaddr = 0,
139
		/* 0x0000000000000000 - must be 0 */
140
		.p_unused1 = {
141
			/*
142
			 * mov %eax, %edi
143
			 * syscall
144
			 * mov $0x3c, %al
145
			 * jmp <back to the syscall instruction>
146
			 */
147
			0x89, 0xc7,
148
			0x0f, 0x05,
149
			0xb0, 0x3c,
150
			0xeb, 0xfa,
151
		},
152
		.p_filesz = sizeof(blob),
153
		.p_memsz = sizeof(blob),
154
		.p_align = 0,
155
	};
156
157
	int fd;
158
159
	if ((fd = open("a.out", O_CREAT|O_RDWR|O_TRUNC, 0755)) == -1)
160
		err(1, "open");
161
162
	if (write(fd, &blob, sizeof(blob)) != sizeof(blob))
163
		err(1, "write");
164
	
165
	return 0;
166
}