Codice macchina x86_64 automodificante, 123 byte
0f b6 0f 31 c0 eb 11 0f be c9 8d 04 80 8d 44 41 d0 0f b6 4f 01 48 ff c7 83 f9 21 75 ea b9 21 21 21 a1 33 0f 0f bc c9 81 c1 ff 07 00 00 c1 e9 03 0f b6 c9 89 ca 09 c2 74 35 55 48 89 e5 c7 45 fc 59 58 5c 5e 8a 4c 0d fc 88 0d 15 00 00 00 f3 0f 2a c8 83 f8 02 5d 7c 1f ff c8 0f 57 c0 f3 0f 2a c0 f3 0f 5e c1 83 f8 01 0f 28 c8 7f eb c3 f3 0f 10 05 03 01 00 00 c3 0f 28 c1 c3
Perché i linguaggi interpretati sarebbero in grado di eseguire dinamicamente il codice con fantasia eval
, ma non un semplice codice macchina?
Provalo con:
#include <stdio.h>
#include <sys/mman.h>
#include <errno.h>
char ff[] = "\x0f\xb6\x0f\x31\xc0\xeb\x11\x0f\xbe\xc9\x8d\x04\x80\x8d\x44\x41\xd0\x0f\xb6\x4f\x01\x48\xff\xc7\x83\xf9\x21\x75\xea\xb9\x21\x21\x21\xa1\x33\x0f\x0f\xbc\xc9\x81\xc1\xff\x07\x00\x00\xc1\xe9\x03\x0f\xb6\xc9\x89\xca\x09\xc2\x74\x35\x55\x48\x89\xe5\xc7\x45\xfc\x59\x58\x5c\x5e\x8a\x4c\x0d\xfc\x88\x0d\x15\x00\x00\x00\xf3\x0f\x2a\xc8\x83\xf8\x02\x5d\x7c\x1f\xff\xc8\x0f\x57\xc0\xf3\x0f\x2a\xc0\xf3\x0f\x5e\xc1\x83\xf8\x01\x0f\x28\xc8\x7f\xeb\xc3\xf3\x0f\x10\x05\x03\x01\x00\x00\xc3\x0f\x28\xc1\xc3";
int main()
{
char* page = (char*)((unsigned long)((char*)ff) & (~0xfffLL));
if (mprotect(page, 0x1000, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
perror("mprotect");
return -1;
}
float (*f)(char*) = (float (*)(char*))ff;
char* testcases[] = { "0!","1!","2!","3!","4!","5!","6!","7!","8!","9!","10!",
"0!!","1!!","2!!","3!!","4!!","5!!","6!!","7!!","8!!","9!!","10!!",
"0!!!","1!!!","2!!!","3!!!","4!!!","5!!!","6!!!","7!!!","8!!!","9!!!","10!!!",
"0!!!!","1!!!!","2!!!!","3!!!!","4!!!!","5!!!!","6!!!!","7!!!!","8!!!!","9!!!!","10!!!!",
};
for (int i = 0; i < 44; i++) {
printf("%s -> %f\n", testcases[i], f(testcases[i]));
}
}
Montaggio:
_f:
100000d4f: 0f b6 0f movzx ecx, byte ptr [rdi]
100000d52: 31 c0 xor eax, eax
100000d54: eb 11 jmp 17 <_f+18>
100000d56: 0f be c9 movsx ecx, cl
100000d59: 8d 04 80 lea eax, [rax + 4*rax]
100000d5c: 8d 44 41 d0 lea eax, [rcx + 2*rax - 48]
100000d60: 0f b6 4f 01 movzx ecx, byte ptr [rdi + 1]
100000d64: 48 ff c7 inc rdi
100000d67: 83 f9 21 cmp ecx, 33
100000d6a: 75 ea jne -22 <_f+7>
100000d6c: b9 21 21 21 a1 mov ecx, 2703302945
100000d71: 33 0f xor ecx, dword ptr [rdi]
100000d73: 0f bc c9 bsf ecx, ecx
100000d76: 81 c1 ff 07 00 00 add ecx, 2047
100000d7c: c1 e9 03 shr ecx, 3
100000d7f: 0f b6 c9 movzx ecx, cl
100000d82: 89 ca mov edx, ecx
100000d84: 09 c2 or edx, eax
100000d86: 74 35 je 53 <_f+6E>
100000d88: 55 push rbp
100000d89: 48 89 e5 mov rbp, rsp
100000d8c: c7 45 fc 59 58 5c 5e mov dword ptr [rbp - 4], 1583110233
100000d93: 8a 4c 0d fc mov cl, byte ptr [rbp + rcx - 4]
100000d97: 88 0d 15 00 00 00 mov byte ptr [rip + 21], cl
100000d9d: f3 0f 2a c8 cvtsi2ss xmm1, eax
100000da1: 83 f8 02 cmp eax, 2
100000da4: 5d pop rbp
100000da5: 7c 1f jl 31 <_f+77>
100000da7: ff c8 dec eax
100000da9: 0f 57 c0 xorps xmm0, xmm0
100000dac: f3 0f 2a c0 cvtsi2ss xmm0, eax
100000db0: f3 0f 5e c1 divss xmm0, xmm1
100000db4: 83 f8 01 cmp eax, 1
100000db7: 0f 28 c8 movaps xmm1, xmm0
100000dba: 7f eb jg -21 <_f+58>
100000dbc: c3 ret
100000dbd: f3 0f 10 05 03 01 00 00 movss xmm0, dword ptr [rip + 259]
100000dc5: c3 ret
100000dc6: 0f 28 c1 movaps xmm0, xmm1
100000dc9: c3 ret
Le spiegazioni verranno aggiunte in seguito. L'idea di base è quella di modificare divss xmm0, xmm1
l'istruzione a 0x100000db0
e sostituirlo con un mulss
, addss
, subss
o divss
secondo operando fornito. Un piccolo trucco viene utilizzato anche per analizzare la stringa di input.
Assemblea generata con:
float f (char* s)
{
int x;
for (x=0; *s != '!'; s++) {
x=10*x + (*s-'0');
}
unsigned char op = (__builtin_ctz(*(unsigned int *)s ^ 0xa1212121)-1) >> 3;
if (x == 0 && op == 0) {
return 1;
}
unsigned int lookup = 0x5e5c5859;
unsigned char new_code = ((unsigned char*)&lookup)[op];
asm("movb %0, 0x15(%%rip)" : : "r" (new_code));
float sum;
for (sum = x--; x>0; x--) {
sum = x / sum;
}
return sum;
}