diff -rup linux-2.6.23.12/arch/i386/kernel/cpu/common.c linux-2.6.23.12-execshield/arch/i386/kernel/cpu/common.c --- linux-2.6.23.12/arch/i386/kernel/cpu/common.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/i386/kernel/cpu/common.c 2008-01-14 14:48:18.000000000 +0200 @@ -470,6 +470,13 @@ static void __cpuinit identify_cpu(struc if (disable_pse) clear_bit(X86_FEATURE_PSE, c->x86_capability); + if (exec_shield != 0) { +#ifdef CONFIG_X86_PAE + if (!test_bit(X86_FEATURE_NX, c->x86_capability)) +#endif + clear_bit(X86_FEATURE_SEP, c->x86_capability); + } + /* If the model name is still unset, do table lookup. */ if ( !c->x86_model_id[0] ) { char *p; diff -rup linux-2.6.23.12/arch/i386/kernel/process.c linux-2.6.23.12-execshield/arch/i386/kernel/process.c --- linux-2.6.23.12/arch/i386/kernel/process.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/i386/kernel/process.c 2008-01-14 14:50:14.000000000 +0200 @@ -669,6 +669,8 @@ struct task_struct fastcall * __switch_t __unlazy_fpu(prev_p); + if (next_p->mm) + load_user_cs_desc(cpu, next_p->mm); /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter > 5) @@ -949,3 +951,59 @@ unsigned long arch_align_stack(unsigned sp -= get_random_int() % 8192; return sp & ~0xf; } + +void arch_add_exec_range(struct mm_struct *mm, unsigned long limit) +{ + if (limit > mm->context.exec_limit) { + mm->context.exec_limit = limit; + set_user_cs(&mm->context.user_cs, limit); + if (mm == current->mm) { + preempt_disable(); + load_user_cs_desc(smp_processor_id(), mm); + preempt_enable(); + } + } +} + +void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end) +{ + struct vm_area_struct *vma; + unsigned long limit = PAGE_SIZE; + + if (old_end == mm->context.exec_limit) { + for (vma = mm->mmap; vma; vma = vma->vm_next) + if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) + limit = vma->vm_end; + + mm->context.exec_limit = limit; + set_user_cs(&mm->context.user_cs, limit); + if (mm == current->mm) { + preempt_disable(); + load_user_cs_desc(smp_processor_id(), mm); + preempt_enable(); + } + } +} + +void arch_flush_exec_range(struct mm_struct *mm) +{ + mm->context.exec_limit = 0; + set_user_cs(&mm->context.user_cs, 0); +} + +/* + * Generate random brk address between 128MB and 196MB. (if the layout + * allows it.) + */ +void randomize_brk(unsigned long old_brk) +{ + unsigned long new_brk, range_start, range_end; + + range_start = 0x08000000; + if (current->mm->brk >= range_start) + range_start = current->mm->brk; + range_end = range_start + 0x02000000; + new_brk = randomize_range(range_start, range_end, 0); + if (new_brk) + current->mm->brk = new_brk; +} diff -rup linux-2.6.23.12/arch/i386/kernel/smp.c linux-2.6.23.12-execshield/arch/i386/kernel/smp.c --- linux-2.6.23.12/arch/i386/kernel/smp.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/i386/kernel/smp.c 2008-01-14 14:51:17.000000000 +0200 @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -316,6 +317,9 @@ fastcall void smp_invalidate_interrupt(s cpu = get_cpu(); + if (current->active_mm) + load_user_cs_desc(cpu, current->active_mm); + if (!cpu_isset(cpu, flush_cpumask)) goto out; /* diff -rup linux-2.6.23.12/arch/i386/kernel/sysenter.c linux-2.6.23.12-execshield/arch/i386/kernel/sysenter.c --- linux-2.6.23.12/arch/i386/kernel/sysenter.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/i386/kernel/sysenter.c 2008-01-14 14:52:28.000000000 +0200 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -283,7 +284,8 @@ int arch_setup_additional_pages(struct l if (compat) addr = VDSO_HIGH_BASE; else { - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1); + if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; diff -rup linux-2.6.23.12/arch/i386/kernel/traps.c linux-2.6.23.12-execshield/arch/i386/kernel/traps.c --- linux-2.6.23.12/arch/i386/kernel/traps.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/i386/kernel/traps.c 2008-01-14 15:10:33.000000000 +0200 @@ -583,7 +583,88 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", inv DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) + +/* + * lazy-check for CS validity on exec-shield binaries: + * + * the original non-exec stack patch was written by + * Solar Designer . Thanks! + */ +static int +check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code) +{ +struct desc_struct *desc1, *desc2; +struct vm_area_struct *vma; +unsigned long limit; + + if (current->mm == NULL) + return 0; + + limit = -1UL; + if (current->mm->context.exec_limit != -1UL) { + limit = PAGE_SIZE; + spin_lock(¤t->mm->page_table_lock); + for (vma = current->mm->mmap; vma; vma = vma->vm_next) + if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) + limit = vma->vm_end; + spin_unlock(¤t->mm->page_table_lock); + if (limit >= TASK_SIZE) + limit = -1UL; + current->mm->context.exec_limit = limit; + } + set_user_cs(¤t->mm->context.user_cs, limit); + + desc1 = ¤t->mm->context.user_cs; + desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS; + + if (desc1->a != desc2->a || desc1->b != desc2->b) { + /* + * The CS was not in sync - reload it and retry the + * instruction. If the instruction still faults then + * we won't hit this branch next time around. + */ + if (print_fatal_signals >= 2) { + printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n", + error_code, error_code/8, regs->eip, smp_processor_id()); + printk(KERN_ERR "exec_limit: %08lx, user_cs: %08lx/%08lx, CPU_cs: %08lx/%08lx.\n", + current->mm->context.exec_limit, desc1->a, desc1->b, desc2->a, desc2->b); + } + load_user_cs_desc(cpu, current->mm); + return 1; + } + + return 0; +} + +/* + * The fixup code for errors in iret jumps to here (iret_exc). It loses + * the original trap number and error code. The bogus trap 32 and error + * code 0 are what the vanilla kernel delivers via: + * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) + * + * NOTE: Because of the final "1" in the macro we need to enable interrupts. + * + * In case of a general protection fault in the iret instruction, we + * need to check for a lazy CS update for exec-shield. + */ +fastcall void do_iret_error(struct pt_regs *regs, long error_code) +{ + int ok; + local_irq_enable(); + ok = check_lazy_exec_limit(get_cpu(), regs, error_code); + put_cpu(); + if (!ok && notify_die(DIE_TRAP, "iret exception", regs, + error_code, 32, SIGSEGV) != NOTIFY_STOP) { + siginfo_t info; + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = ILL_BADSTK; + info.si_addr = 0; + do_trap(32, SIGSEGV, "iret exception", 0, regs, error_code, + &info); + } +} + fastcall void __kprobes do_general_protection(struct pt_regs * regs, long error_code) @@ -591,6 +672,7 @@ fastcall void __kprobes do_general_prote int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; + int ok; /* * Perform the lazy TSS's I/O bitmap copy. If the TSS has an @@ -617,7 +699,6 @@ fastcall void __kprobes do_general_prote put_cpu(); return; } - put_cpu(); if (regs->eflags & VM_MASK) goto gp_in_vm86; @@ -625,6 +706,22 @@ fastcall void __kprobes do_general_prote if (!user_mode(regs)) goto gp_in_kernel; + ok = check_lazy_exec_limit(cpu, regs, error_code); + + put_cpu(); + + if (ok) + return; + + if (print_fatal_signals) { + printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, + error_code/8, regs->eip, smp_processor_id()); + printk(KERN_ERR "exec_limit: %08lx, user_cs: %08lx/%08lx.\n", + current->mm->context.exec_limit, + current->mm->context.user_cs.a, + current->mm->context.user_cs.b); + } + current->thread.error_code = error_code; current->thread.trap_no = 13; if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && @@ -638,11 +735,13 @@ fastcall void __kprobes do_general_prote return; gp_in_vm86: + put_cpu(); local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; gp_in_kernel: + put_cpu(); if (!fixup_exception(regs)) { current->thread.error_code = error_code; current->thread.trap_no = 13; diff -rup linux-2.6.23.12/arch/i386/mm/init.c linux-2.6.23.12-execshield/arch/i386/mm/init.c --- linux-2.6.23.12/arch/i386/mm/init.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/i386/mm/init.c 2008-01-14 15:13:03.000000000 +0200 @@ -485,7 +485,7 @@ EXPORT_SYMBOL_GPL(__supported_pte_mask); * Control non executable mappings. * * on Enable - * off Disable + * off Disable (disables exec-shield too) */ static int __init noexec_setup(char *str) { @@ -497,6 +497,7 @@ static int __init noexec_setup(char *str } else if (!strcmp(str,"off")) { disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; + exec_shield = 0; } else return -EINVAL; @@ -563,7 +564,11 @@ void __init paging_init(void) set_nx(); if (nx_enabled) printk("NX (Execute Disable) protection: active\n"); + else #endif + if (exec_shield) + printk(KERN_INFO "Using x86 segment limits to approximate " + "NX protection\n"); pagetable_init(); diff -rup linux-2.6.23.12/arch/i386/mm/mmap.c linux-2.6.23.12-execshield/arch/i386/mm/mmap.c --- linux-2.6.23.12/arch/i386/mm/mmap.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/i386/mm/mmap.c 2008-01-14 17:19:01.000000000 +0200 @@ -63,15 +63,22 @@ void arch_pick_mmap_layout(struct mm_str * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ - if (sysctl_legacy_va_layout || + if (!(2 & exec_shield) && (sysctl_legacy_va_layout || (current->personality & ADDR_COMPAT_LAYOUT) || - current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); mm->get_unmapped_area = arch_get_unmapped_area_topdown; +/* Must find why I see this error: + arch/i386/mm/mmap.c: In function 'arch_pick_mmap_layout': + arch/i386/mm/mmap.c:76: error: 'struct mm_struct' has no member named 'get_unmapped_exec_area' + + if (!(current->personality & READ_IMPLIES_EXEC)) + mm->get_unmapped_exec_area = arch_get_unmapped_exec_area; +*/ mm->unmap_area = arch_unmap_area_topdown; } } diff -rup linux-2.6.23.12/arch/i386/mm/pageattr.c linux-2.6.23.12-execshield/arch/i386/mm/pageattr.c --- linux-2.6.23.12/arch/i386/mm/pageattr.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/i386/mm/pageattr.c 2008-01-14 15:19:03.000000000 +0200 @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -152,6 +154,13 @@ __change_page_attr(struct page *page, pg BUG_ON(PageHighMem(page)); address = (unsigned long)page_address(page); + if (address >= (unsigned long)__start_rodata && + address <= (unsigned long)__end_rodata && + (pgprot_val(prot) & _PAGE_RW)) { + pgprot_val(prot) &= ~(_PAGE_RW); + add_taint(TAINT_MACHINE_CHECK); + } + kpte = lookup_address(address); if (!kpte) return -EINVAL; diff -rup linux-2.6.23.12/arch/ia64/ia32/binfmt_elf32.c linux-2.6.23.12-execshield/arch/ia64/ia32/binfmt_elf32.c --- linux-2.6.23.12/arch/ia64/ia32/binfmt_elf32.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/ia64/ia32/binfmt_elf32.c 2008-01-14 16:39:20.000000000 +0200 @@ -226,7 +226,7 @@ elf32_set_personality (void) } static unsigned long -elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) +elf32_map(struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused) { unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK; diff -rup linux-2.6.23.12/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.23.12-execshield/arch/x86_64/ia32/ia32_binfmt.c --- linux-2.6.23.12/arch/x86_64/ia32/ia32_binfmt.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/x86_64/ia32/ia32_binfmt.c 2008-01-14 15:24:52.000000000 +0200 @@ -206,7 +206,6 @@ elf_core_copy_task_xfpregs(struct task_s #define elf_check_arch(x) \ ((x)->e_machine == EM_386) -extern int force_personality32; #define ELF_EXEC_PAGESIZE PAGE_SIZE #define ELF_HWCAP (boot_cpu_data.x86_capability[0]) @@ -221,8 +220,7 @@ do { \ set_thread_flag(TIF_ABI_PENDING); \ else \ clear_thread_flag(TIF_ABI_PENDING); \ - /* XXX This overwrites the user set personality */ \ - current->personality |= force_personality32; \ + } while (0) /* Override some function names */ diff -rup linux-2.6.23.12/arch/x86_64/kernel/process.c linux-2.6.23.12-execshield/arch/x86_64/kernel/process.c --- linux-2.6.23.12/arch/x86_64/kernel/process.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/x86_64/kernel/process.c 2008-01-14 15:25:29.000000000 +0200 @@ -715,11 +715,6 @@ void set_personality_64bit(void) /* Make sure to be in 64bit mode */ clear_thread_flag(TIF_IA32); - /* TBD: overwrites user setup. Should have two bits. - But 64bit processes have always behaved this way, - so it's not too bad. The main problem is just that - 32bit childs are affected again. */ - current->personality &= ~READ_IMPLIES_EXEC; } asmlinkage long sys_fork(struct pt_regs *regs) diff -rup linux-2.6.23.12/arch/x86_64/kernel/setup64.c linux-2.6.23.12-execshield/arch/x86_64/kernel/setup64.c --- linux-2.6.23.12/arch/x86_64/kernel/setup64.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/x86_64/kernel/setup64.c 2008-01-14 15:26:38.000000000 +0200 @@ -39,46 +39,6 @@ char boot_cpu_stack[IRQSTACKSIZE] __attr unsigned long __supported_pte_mask __read_mostly = ~0UL; static int do_not_nx __cpuinitdata = 0; -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32 = 0; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes -off PROT_READ implies PROT_EXEC (default) -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - /* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. diff -rup linux-2.6.23.12/arch/x86_64/mm/fault.c linux-2.6.23.12-execshield/arch/x86_64/mm/fault.c --- linux-2.6.23.12/arch/x86_64/mm/fault.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/x86_64/mm/fault.c 2008-01-14 15:27:29.000000000 +0200 @@ -87,7 +87,7 @@ static noinline int is_prefetch(struct p instr = (unsigned char __user *)convert_rip_to_linear(current, regs); max_instr = instr + 15; - if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE64) return 0; while (scan_more && instr < max_instr) { diff -rup linux-2.6.23.12/arch/x86_64/mm/mmap.c linux-2.6.23.12-execshield/arch/x86_64/mm/mmap.c --- linux-2.6.23.12/arch/x86_64/mm/mmap.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/arch/x86_64/mm/mmap.c 2008-01-14 15:30:50.000000000 +0200 @@ -1,29 +1,91 @@ -/* Copyright 2005 Andi Kleen, SuSE Labs. - * Licensed under GPL, v.2 +/* + * linux/arch/x86-64/mm/mmap.c + * + * flexible mmap layout support + * + * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Started by Ingo Molnar */ + +#include #include #include #include -#include -/* Notebook: move the mmap code from sys_x86_64.c over here. */ + +/* + * Top of mmap area (just below the process stack). + * + * Leave an at least ~128 MB hole. + */ +#define MIN_GAP (128*1024*1024) +#define MAX_GAP (TASK_SIZE/6*5) + +static inline unsigned long mmap_base(void) +{ + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return TASK_SIZE - (gap & PAGE_MASK); +} + +static inline int mmap_is_legacy(void) +{ + /* + * Force standard allocation for 64 bit programs. + */ + if (!test_thread_flag(TIF_IA32)) + return 1; + + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ void arch_pick_mmap_layout(struct mm_struct *mm) { -#ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->flags & _TIF_IA32) - return ia32_pick_mmap_layout(mm); -#endif - mm->mmap_base = TASK_UNMAPPED_BASE; - if (current->flags & PF_RANDOMIZE) { - /* Add 28bit randomness which is about 40bits of address space - because mmap base has to be page aligned. - or ~1/128 of the total user VM - (total user address space is 47bits) */ - unsigned rnd = get_random_int() & 0xfffffff; - mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT; + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; } - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } diff -rup linux-2.6.23.12/drivers/char/random.c linux-2.6.23.12-execshield/drivers/char/random.c --- linux-2.6.23.12/drivers/char/random.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/drivers/char/random.c 2008-01-14 15:32:27.000000000 +0200 @@ -1639,13 +1639,19 @@ EXPORT_SYMBOL(secure_dccp_sequence_numbe */ unsigned int get_random_int(void) { + unsigned int val = 0; + +#ifdef CONFIG_X86_HAS_TSC + rdtscl(val); +#endif + /* * Use IP's RNG. It suits our purpose perfectly: it re-keys itself * every second, from the entropy pool (and thus creates a limited * drain on it), and uses halfMD4Transform within the second. We * also mix it with jiffies and the PID: */ - return secure_ip_id((__force __be32)(current->pid + jiffies)); + return secure_ip_id((__force __be32)(current->pid + jiffies + (int)val)); } /* diff -rup linux-2.6.23.12/fs/binfmt_elf.c linux-2.6.23.12-execshield/fs/binfmt_elf.c --- linux-2.6.23.12/fs/binfmt_elf.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/fs/binfmt_elf.c 2008-01-14 16:50:13.000000000 +0200 @@ -45,7 +45,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); -static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); +static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long); /* * If we don't support core dumping, then supply a NULL so we @@ -80,7 +80,7 @@ static struct linux_binfmt elf_format = .hasvdso = 1 }; -#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) +#define BAD_ADDR(x) IS_ERR_VALUE(x) static int set_brk(unsigned long start, unsigned long end) { @@ -295,33 +295,70 @@ create_elf_tables(struct linux_binprm *b #ifndef elf_map static unsigned long elf_map(struct file *filep, unsigned long addr, - struct elf_phdr *eppnt, int prot, int type) + struct elf_phdr *eppnt, int prot, int type, + unsigned long total_size) { unsigned long map_addr; - unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr); + unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); + unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); + addr = ELF_PAGESTART(addr); + size = ELF_PAGEALIGN(size); - down_write(¤t->mm->mmap_sem); /* mmap() will return -EINVAL if given a zero size, but a * segment with zero filesize is perfectly valid */ - if (eppnt->p_filesz + pageoffset) - map_addr = do_mmap(filep, ELF_PAGESTART(addr), - eppnt->p_filesz + pageoffset, prot, type, - eppnt->p_offset - pageoffset); - else - map_addr = ELF_PAGESTART(addr); + if (!size) + return addr; + + down_write(¤t->mm->mmap_sem); + /* + * total_size is the size of the ELF (interpreter) image. + * The _first_ mmap needs to know the full size, otherwise + * randomization might put this image into an overlapping + * position with the ELF binary image. (since size < total_size) + * So we first map the 'big' image - and unmap the remainder at + * the end. (which unmap is needed for ELF images with holes.) + */ + if (total_size) { + total_size = ELF_PAGEALIGN(total_size); + map_addr = do_mmap(filep, addr, total_size, prot, type, off); + if (!BAD_ADDR(map_addr)) + do_munmap(current->mm, map_addr+size, total_size-size); + } else + map_addr = do_mmap(filep, addr, size, prot, type, off); + up_write(¤t->mm->mmap_sem); return(map_addr); } #endif /* !elf_map */ +static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) +{ + int i, first_idx = -1, last_idx = -1; + + for (i = 0; i < nr; i++) { + if (cmds[i].p_type == PT_LOAD) { + last_idx = i; + if (first_idx == -1) + first_idx = i; + } + } + if (first_idx == -1) + return 0; + + return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - + ELF_PAGESTART(cmds[first_idx].p_vaddr); +} + + /* This is much more generalized than the library routine read function, so we keep this separate. Technically the library read function is only provided so that we can read a.out libraries that have an ELF header */ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, - struct file *interpreter, unsigned long *interp_load_addr) + struct file *interpreter, unsigned long *interp_map_addr, + unsigned long no_base) { struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; @@ -329,6 +366,7 @@ static unsigned long load_elf_interp(str int load_addr_set = 0; unsigned long last_bss = 0, elf_bss = 0; unsigned long error = ~0UL; + unsigned long total_size; int retval, i, size; /* First of all, some simple consistency checks */ @@ -367,6 +405,12 @@ static unsigned long load_elf_interp(str goto out_close; } + total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum); + if (!total_size) { + error = -EINVAL; + goto out_close; + } + eppnt = elf_phdata; for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { if (eppnt->p_type == PT_LOAD) { @@ -384,9 +428,14 @@ static unsigned long load_elf_interp(str vaddr = eppnt->p_vaddr; if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) elf_type |= MAP_FIXED; + else if (no_base && interp_elf_ex->e_type == ET_DYN) + load_addr = -vaddr; map_addr = elf_map(interpreter, load_addr + vaddr, - eppnt, elf_prot, elf_type); + eppnt, elf_prot, elf_type, total_size); + total_size = 0; + if (!*interp_map_addr) + *interp_map_addr = map_addr; error = map_addr; if (BAD_ADDR(map_addr)) goto out_close; @@ -452,8 +501,7 @@ static unsigned long load_elf_interp(str goto out_close; } - *interp_load_addr = load_addr; - error = ((unsigned long)interp_elf_ex->e_entry) + load_addr; + error = load_addr; out_close: kfree(elf_phdata); @@ -550,12 +598,13 @@ static int load_elf_binary(struct linux_ int elf_exec_fileno; int retval, i; unsigned int size; - unsigned long elf_entry, interp_load_addr = 0; + unsigned long elf_entry; + unsigned long interp_load_addr = 0; unsigned long start_code, end_code, start_data, end_data; unsigned long reloc_func_desc = 0; char passed_fileno[6]; struct files_struct *files; - int executable_stack = EXSTACK_DEFAULT; + int executable_stack; unsigned long def_flags = 0; struct { struct elfhdr elf_ex; @@ -622,6 +671,7 @@ static int load_elf_binary(struct linux_ fd_install(elf_exec_fileno = retval, bprm->file); elf_ppnt = elf_phdata; + executable_stack = EXSTACK_DEFAULT; elf_bss = 0; elf_brk = 0; @@ -728,6 +778,11 @@ static int load_elf_binary(struct linux_ break; } + if (current->personality == PER_LINUX && (exec_shield & 2)) { + executable_stack = EXSTACK_DISABLE_X; + current->flags |= PF_RANDOMIZE; + } + /* Some simple consistency checks for the interpreter */ if (elf_interpreter) { interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT; @@ -780,6 +835,15 @@ static int load_elf_binary(struct linux_ if (retval) goto out_free_dentry; +#ifdef CONFIG_X86_32 + /* + * Turn off the CS limit completely if exec-shield disabled or + * NX active: + */ + if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled) + arch_add_exec_range(current->mm, -1); +#endif + /* Discard our unneeded old files struct */ if (files) { put_files_struct(files); @@ -793,7 +857,8 @@ static int load_elf_binary(struct linux_ /* Do this immediately, since STACK_TOP as used in setup_arg_pages may depend on the personality. */ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); - if (elf_read_implies_exec(loc->elf_ex, executable_stack)) + if (!(exec_shield & 2) && + elf_read_implies_exec(loc->elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) @@ -814,9 +879,8 @@ static int load_elf_binary(struct linux_ current->mm->start_stack = bprm->p; /* Now we do a little grungy work by mmaping the ELF image into - the correct location in memory. At this point, we assume that - the image should be loaded at fixed address, not at a variable - address. */ + the correct location in memory. */ + for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { int elf_prot = 0, elf_flags; @@ -870,11 +934,15 @@ static int load_elf_binary(struct linux_ * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ +#ifdef CONFIG_X86 + load_bias = 0; +#else load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); +#endif } error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, - elf_prot, elf_flags); + elf_prot, elf_flags, 0); if (BAD_ADDR(error)) { send_sig(SIGKILL, current, 0); retval = IS_ERR((void *)error) ? @@ -950,13 +1018,24 @@ static int load_elf_binary(struct linux_ } if (elf_interpreter) { - if (interpreter_type == INTERPRETER_AOUT) + if (interpreter_type == INTERPRETER_AOUT) { elf_entry = load_aout_interp(&loc->interp_ex, interpreter); - else + } else { + unsigned long uninitialized_var(interp_map_addr); elf_entry = load_elf_interp(&loc->interp_elf_ex, interpreter, - &interp_load_addr); + &interp_map_addr, + load_bias); + if (!BAD_ADDR(elf_entry)) { + /* + * load_elf_interp() returns relocation + * adjustment + */ + interp_load_addr = elf_entry; + elf_entry += loc->interp_elf_ex.e_entry; + } + } if (BAD_ADDR(elf_entry)) { force_sig(SIGSEGV, current); retval = IS_ERR((void *)elf_entry) ? @@ -977,7 +1056,6 @@ static int load_elf_binary(struct linux_ } } - kfree(elf_phdata); if (interpreter_type != INTERPRETER_AOUT) sys_close(elf_exec_fileno); @@ -988,10 +1066,12 @@ static int load_elf_binary(struct linux_ retval = arch_setup_additional_pages(bprm, executable_stack); if (retval < 0) { send_sig(SIGKILL, current, 0); - goto out; + goto out_free_fh; } #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ + kfree(elf_phdata); + compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; retval = create_elf_tables(bprm, &loc->elf_ex, @@ -1010,6 +1090,11 @@ static int load_elf_binary(struct linux_ current->mm->end_data = end_data; current->mm->start_stack = bprm->p; +#ifdef __HAVE_ARCH_RANDOMIZE_BRK + if (current->flags & PF_RANDOMIZE) + randomize_brk(elf_brk); +#endif + if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, and some applications "depend" upon this behavior. diff -rup linux-2.6.23.12/fs/proc/array.c linux-2.6.23.12-execshield/fs/proc/array.c --- linux-2.6.23.12/fs/proc/array.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/fs/proc/array.c 2008-01-14 15:40:49.000000000 +0200 @@ -449,8 +449,12 @@ static int do_task_stat(struct task_stru } rcu_read_unlock(); - if (!whole || num_threads < 2) - wchan = get_wchan(task); + if (!whole || num_threads < 2) { + wchan = 0; + if (current->uid == task->uid || current->euid == task->uid || + capable(CAP_SYS_NICE)) + wchan = get_wchan(task); + } if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; diff -rup linux-2.6.23.12/fs/proc/base.c linux-2.6.23.12-execshield/fs/proc/base.c --- linux-2.6.23.12/fs/proc/base.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/fs/proc/base.c 2008-01-14 15:44:21.000000000 +0200 @@ -2057,7 +2057,7 @@ static const struct pid_entry tgid_base_ INF("cmdline", S_IRUGO, pid_cmdline), INF("stat", S_IRUGO, tgid_stat), INF("statm", S_IRUGO, pid_statm), - REG("maps", S_IRUGO, maps), + REG("maps", S_IRUSR, maps), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, numa_maps), #endif @@ -2069,7 +2069,7 @@ static const struct pid_entry tgid_base_ REG("mountstats", S_IRUSR, mountstats), #ifdef CONFIG_MMU REG("clear_refs", S_IWUSR, clear_refs), - REG("smaps", S_IRUGO, smaps), + REG("smaps", S_IRUSR, smaps), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), @@ -2344,7 +2344,7 @@ static const struct pid_entry tid_base_s INF("cmdline", S_IRUGO, pid_cmdline), INF("stat", S_IRUGO, tid_stat), INF("statm", S_IRUGO, pid_statm), - REG("maps", S_IRUGO, maps), + REG("maps", S_IRUSR, maps), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, numa_maps), #endif @@ -2355,7 +2355,7 @@ static const struct pid_entry tid_base_s REG("mounts", S_IRUGO, mounts), #ifdef CONFIG_MMU REG("clear_refs", S_IWUSR, clear_refs), - REG("smaps", S_IRUGO, smaps), + REG("smaps", S_IRUSR, smaps), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), diff -rup linux-2.6.23.12/fs/proc/task_mmu.c linux-2.6.23.12-execshield/fs/proc/task_mmu.c --- linux-2.6.23.12/fs/proc/task_mmu.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/fs/proc/task_mmu.c 2008-01-14 15:46:32.000000000 +0200 @@ -44,7 +44,11 @@ char *task_mem(struct mm_struct *mm, cha "VmStk:\t%8lu kB\n" "VmExe:\t%8lu kB\n" "VmLib:\t%8lu kB\n" - "VmPTE:\t%8lu kB\n", + "VmPTE:\t%8lu kB\n" + "StaBrk:\t%08lx kB\n" + "Brk:\t%08lx kB\n" + "StaStk:\t%08lx kB\n" + , hiwater_vm << (PAGE_SHIFT-10), (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), mm->locked_vm << (PAGE_SHIFT-10), @@ -52,7 +56,13 @@ char *task_mem(struct mm_struct *mm, cha total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); + (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, + mm->start_brk, mm->brk, mm->start_stack); +#ifdef CONFIG_X86_32 + if (!nx_enabled) + buffer += sprintf(buffer, + "ExecLim:\t%08lx\n", mm->context.exec_limit); +#endif return buffer; } @@ -135,6 +145,9 @@ static int show_map_internal(struct seq_ { struct proc_maps_private *priv = m->private; struct task_struct *task = priv->task; +#ifdef CONFIG_X86_32 + struct mm_struct *tmm = get_task_mm(task); +#endif struct vm_area_struct *vma = v; struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; @@ -157,10 +170,20 @@ static int show_map_internal(struct seq_ vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', - flags & VM_EXEC ? 'x' : '-', + (flags & VM_EXEC +#ifdef CONFIG_X86_32 + || (!nx_enabled && tmm && + (vma->vm_start < tmm->context.exec_limit)) +#endif + ) + ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', vma->vm_pgoff << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); +#ifdef CONFIG_X86_32 + if (tmm) + mmput(tmm); +#endif /* * Print the dentry name for named mappings, and a diff -rup linux-2.6.23.12/include/asm-i386/a.out.h linux-2.6.23.12-execshield/include/asm-i386/a.out.h --- linux-2.6.23.12/include/asm-i386/a.out.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-i386/a.out.h 2008-01-14 15:47:31.000000000 +0200 @@ -19,7 +19,7 @@ struct exec #ifdef __KERNEL__ -#define STACK_TOP TASK_SIZE +#define STACK_TOP (TASK_SIZE - PAGE_SIZE) /* 1 page for vdso */ #define STACK_TOP_MAX STACK_TOP #endif diff -rup linux-2.6.23.12/include/asm-i386/desc.h linux-2.6.23.12-execshield/include/asm-i386/desc.h --- linux-2.6.23.12/include/asm-i386/desc.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-i386/desc.h 2008-01-14 15:49:08.000000000 +0200 @@ -216,6 +216,20 @@ static inline unsigned long get_desc_bas return base; } +static inline void set_user_cs(struct desc_struct *desc, unsigned long limit) +{ + limit = (limit - 1) / PAGE_SIZE; + desc->a = limit & 0xffff; + desc->b = (limit & 0xf0000) | 0x00c0fb00; +} + +#define load_user_cs_desc(cpu, mm) \ + get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs + +extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit); +extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit); +extern void arch_flush_exec_range(struct mm_struct *mm); + #else /* __ASSEMBLY__ */ /* diff -rup linux-2.6.23.12/include/asm-i386/elf.h linux-2.6.23.12-execshield/include/asm-i386/elf.h --- linux-2.6.23.12/include/asm-i386/elf.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-i386/elf.h 2008-01-14 15:50:46.000000000 +0200 @@ -150,6 +150,12 @@ struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); +#ifdef CONFIG_X86_32 +#define __HAVE_ARCH_RANDOMIZE_BRK +extern void randomize_brk(unsigned long old_brk); +#endif + + extern unsigned int vdso_enabled; #define ARCH_DLINFO \ diff -rup linux-2.6.23.12/include/asm-i386/mmu.h linux-2.6.23.12-execshield/include/asm-i386/mmu.h --- linux-2.6.23.12/include/asm-i386/mmu.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-i386/mmu.h 2008-01-14 15:52:38.000000000 +0200 @@ -7,12 +7,18 @@ * we put the segment information here. * * cpu_vm_mask is used to optimize ldt flushing. + * exec_limit is used to track the range PROT_EXEC + * mappings span. */ typedef struct { int size; struct semaphore sem; void *ldt; void *vdso; +#ifdef CONFIG_X86_32 + struct desc_struct user_cs; + unsigned long exec_limit; +#endif } mm_context_t; #endif diff -rup linux-2.6.23.12/include/asm-i386/processor.h linux-2.6.23.12-execshield/include/asm-i386/processor.h --- linux-2.6.23.12/include/asm-i386/processor.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-i386/processor.h 2008-01-14 16:57:05.000000000 +0200 @@ -212,7 +212,10 @@ extern int bootloader_type; /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE/3) + +#define __HAVE_ARCH_ALIGN_STACK +extern unsigned long arch_align_stack(unsigned long sp); #define HAVE_ARCH_PICK_MMAP_LAYOUT @@ -399,6 +402,9 @@ struct thread_struct { regs->xcs = __USER_CS; \ regs->eip = new_eip; \ regs->esp = new_esp; \ + preempt_disable(); \ + load_user_cs_desc(smp_processor_id(), current->mm); \ + preempt_enable(); \ } while (0) /* Forward declaration, a strange C thing */ diff -rup linux-2.6.23.12/include/asm-ia64/pgalloc.h linux-2.6.23.12-execshield/include/asm-ia64/pgalloc.h --- linux-2.6.23.12/include/asm-ia64/pgalloc.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-ia64/pgalloc.h 2008-01-14 15:55:58.000000000 +0200 @@ -1,6 +1,10 @@ #ifndef _ASM_IA64_PGALLOC_H #define _ASM_IA64_PGALLOC_H +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + /* * This file contains the functions and defines necessary to allocate * page tables. diff -rup linux-2.6.23.12/include/asm-powerpc/pgalloc.h linux-2.6.23.12-execshield/include/asm-powerpc/pgalloc.h --- linux-2.6.23.12/include/asm-powerpc/pgalloc.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-powerpc/pgalloc.h 2008-01-14 15:56:32.000000000 +0200 @@ -2,6 +2,11 @@ #define _ASM_POWERPC_PGALLOC_H #ifdef __KERNEL__ +/* Dummy functions since we don't support execshield on ppc */ +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + #ifdef CONFIG_PPC64 #include #else diff -rup linux-2.6.23.12/include/asm-ppc/pgalloc.h linux-2.6.23.12-execshield/include/asm-ppc/pgalloc.h --- linux-2.6.23.12/include/asm-ppc/pgalloc.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-ppc/pgalloc.h 2008-01-14 15:57:06.000000000 +0200 @@ -39,5 +39,9 @@ extern void pte_free(struct page *pte); #define check_pgt_cache() do { } while (0) +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + #endif /* _PPC_PGALLOC_H */ #endif /* __KERNEL__ */ diff -rup linux-2.6.23.12/include/asm-s390/pgalloc.h linux-2.6.23.12-execshield/include/asm-s390/pgalloc.h --- linux-2.6.23.12/include/asm-s390/pgalloc.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-s390/pgalloc.h 2008-01-14 15:57:36.000000000 +0200 @@ -17,6 +17,10 @@ #include #include +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + #define check_pgt_cache() do {} while (0) /* diff -rup linux-2.6.23.12/include/asm-sparc/pgalloc.h linux-2.6.23.12-execshield/include/asm-sparc/pgalloc.h --- linux-2.6.23.12/include/asm-sparc/pgalloc.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-sparc/pgalloc.h 2008-01-14 15:58:20.000000000 +0200 @@ -65,4 +65,8 @@ BTFIXUPDEF_CALL(void, pte_free, struct p #define pte_free(pte) BTFIXUP_CALL(pte_free)(pte) #define __pte_free_tlb(tlb, pte) pte_free(pte) +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + #endif /* _SPARC_PGALLOC_H */ diff -rup linux-2.6.23.12/include/asm-sparc64/pgalloc.h linux-2.6.23.12-execshield/include/asm-sparc64/pgalloc.h --- linux-2.6.23.12/include/asm-sparc64/pgalloc.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-sparc64/pgalloc.h 2008-01-14 15:58:58.000000000 +0200 @@ -70,4 +70,8 @@ static inline void check_pgt_cache(void) quicklist_trim(0, NULL, 25, 16); } +#define arch_add_exec_range(mm, limit) do { ; } while (0) +#define arch_flush_exec_range(mm) do { ; } while (0) +#define arch_remove_exec_range(mm, limit) do { ; } while (0) + #endif /* _SPARC64_PGALLOC_H */ diff -rup linux-2.6.23.12/include/asm-x86_64/pgalloc.h linux-2.6.23.12-execshield/include/asm-x86_64/pgalloc.h --- linux-2.6.23.12/include/asm-x86_64/pgalloc.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/asm-x86_64/pgalloc.h 2008-01-14 15:59:20.000000000 +0200 @@ -5,6 +5,13 @@ #include #include +#define arch_add_exec_range(mm, limit) \ + do { (void)(mm), (void)(limit); } while (0) +#define arch_flush_exec_range(mm) \ + do { (void)(mm); } while (0) +#define arch_remove_exec_range(mm, limit) \ + do { (void)(mm), (void)(limit); } while (0) + #define pmd_populate_kernel(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) #define pud_populate(mm, pud, pmd) \ Only in linux-2.6.23.12-execshield/include/config/debug: preempt.h Only in linux-2.6.23.12-execshield/include/config/hz: 300.h Only in linux-2.6.23.12-execshield/include/config: preempt.h diff -rup linux-2.6.23.12/include/linux/mm.h linux-2.6.23.12-execshield/include/linux/mm.h --- linux-2.6.23.12/include/linux/mm.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/linux/mm.h 2008-01-14 16:00:34.000000000 +0200 @@ -1064,7 +1064,13 @@ extern int install_special_mapping(struc unsigned long addr, unsigned long len, unsigned long flags, struct page **pages); -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); +extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int); + +static inline unsigned long get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0); +} extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, diff -rup linux-2.6.23.12/include/linux/mm_types.h linux-2.6.23.12-execshield/include/linux/mm_types.h --- linux-2.6.23.12/include/linux/mm_types.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/linux/mm_types.h 2008-01-14 16:54:34.000000000 +0200 @@ -1,3 +1,4 @@ + #ifndef _LINUX_MM_TYPES_H #define _LINUX_MM_TYPES_H @@ -46,6 +47,11 @@ struct page { * see PAGE_MAPPING_ANON below. */ }; + + unsigned long (*get_unmapped_exec_area) (struct file *filp, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags); + #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS spinlock_t ptl; #endif diff -rup linux-2.6.23.12/include/linux/resource.h linux-2.6.23.12-execshield/include/linux/resource.h --- linux-2.6.23.12/include/linux/resource.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/linux/resource.h 2008-01-14 16:01:35.000000000 +0200 @@ -54,8 +54,11 @@ struct rlimit { /* * Limit the stack by to some sane default: root can always * increase this limit if needed.. 8MB seems reasonable. + * + * (2MB more to cover randomization effects.) */ -#define _STK_LIM (8*1024*1024) +#define _STK_LIM (10*1024*1024) +#define EXEC_STACK_BIAS (2*1024*1024) /* * GPG wants 32kB of mlocked memory, to make sure pass phrases diff -rup linux-2.6.23.12/include/linux/sched.h linux-2.6.23.12-execshield/include/linux/sched.h --- linux-2.6.23.12/include/linux/sched.h 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/include/linux/sched.h 2008-01-14 17:18:25.000000000 +0200 @@ -93,6 +93,9 @@ struct exec_domain; struct futex_pi_state; struct bio; +extern int exec_shield; +extern int print_fatal_signals; + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -301,6 +304,9 @@ extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long +arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long, + unsigned long, unsigned long); +extern unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); diff -rup linux-2.6.23.12/kernel/sysctl.c linux-2.6.23.12-execshield/kernel/sysctl.c --- linux-2.6.23.12/kernel/sysctl.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/kernel/sysctl.c 2008-01-14 14:55:23.000000000 +0200 @@ -79,6 +79,26 @@ extern int maps_protect; extern int sysctl_stat_interval; extern int audit_argv_kb; +int exec_shield = (1<<0); +/* exec_shield is a bitmask: + * 0: off; vdso at STACK_TOP, 1 page below TASK_SIZE + * (1<<0) 1: on [also on if !=0] + * (1<<1) 2: force noexecstack regardless of PT_GNU_STACK + * The old settings + * (1<<2) 4: vdso just below .text of main (unless too low) + * (1<<3) 8: vdso just below .text of PT_INTERP (unless too low) + * are ignored because the vdso is placed completely randomly + */ + +static int __init setup_exec_shield(char *str) +{ + get_option(&str, &exec_shield); + + return 1; +} + +__setup("exec-shield=", setup_exec_shield); + /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; @@ -340,7 +360,14 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = KERN_CORE_USES_PID, + .ctl_name = CTL_UNNUMBERED, + .procname = "exec-shield", + .data = &exec_shield, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = KERN_CORE_USES_PID, .procname = "core_uses_pid", .data = &core_uses_pid, .maxlen = sizeof(int), diff -rup linux-2.6.23.12/mm/mmap.c linux-2.6.23.12-execshield/mm/mmap.c --- linux-2.6.23.12/mm/mmap.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/mm/mmap.c 2008-01-14 17:22:27.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -373,6 +374,8 @@ static inline void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent) { + if (vma->vm_flags & VM_EXEC) + arch_add_exec_range(mm, vma->vm_end); if (prev) { vma->vm_next = prev->vm_next; prev->vm_next = vma; @@ -476,6 +479,8 @@ __vma_unlink(struct mm_struct *mm, struc rb_erase(&vma->vm_rb, &mm->mm_rb); if (mm->mmap_cache == vma) mm->mmap_cache = prev; + if (vma->vm_flags & VM_EXEC) + arch_remove_exec_range(mm, vma->vm_end); } /* @@ -781,6 +786,8 @@ struct vm_area_struct *vma_merge(struct } else /* cases 2, 5, 7 */ vma_adjust(prev, prev->vm_start, end, prev->vm_pgoff, NULL); + if (prev->vm_flags & VM_EXEC) + arch_add_exec_range(mm, prev->vm_end); return prev; } @@ -933,7 +940,8 @@ unsigned long do_mmap_pgoff(struct file /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ - addr = get_unmapped_area(file, addr, len, pgoff, flags); + addr = get_unmapped_area_prot(file, addr, len, pgoff, flags, + prot & PROT_EXEC); if (addr & ~PAGE_MASK) return addr; @@ -1406,13 +1414,23 @@ void arch_unmap_area_topdown(struct mm_s } unsigned long -get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) +get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags, int exec) { unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - get_area = current->mm->get_unmapped_area; +/* Must find why I see this error: + mm/mmap.c: In function 'get_unmapped_area_prot': + mm/mmap.c:1423: error: 'struct mm_struct' has no member named 'get_unmapped_exec_area' + mm/mmap.c:1424: error: 'struct mm_struct' has no member named 'get_unmapped_exec_area' + + if (exec && current->mm->get_unmapped_exec_area) + get_area = current->mm->get_unmapped_exec_area; + else +*/ + get_area = current->mm->get_unmapped_area; + if (file && file->f_op && file->f_op->get_unmapped_area) get_area = file->f_op->get_unmapped_area; addr = get_area(file, addr, len, pgoff, flags); @@ -1427,7 +1445,74 @@ get_unmapped_area(struct file *file, uns return addr; } -EXPORT_SYMBOL(get_unmapped_area); +EXPORT_SYMBOL(get_unmapped_area_prot); + +#define SHLIB_BASE 0x00110000 + +unsigned long arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0, + unsigned long len0, unsigned long pgoff, unsigned long flags) +{ + unsigned long addr = addr0, len = len0; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long tmp; + + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + + if (!addr) { + addr = randomize_range(SHLIB_BASE, 0x01000000, len); + } else { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) { + return addr; + } + } + + addr = SHLIB_BASE; + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) + return -ENOMEM; + + if (!vma || addr + len <= vma->vm_start) { + /* + * Must not let a PROT_EXEC mapping get into the + * brk area: + */ + if (addr + len > mm->brk) + goto failed; + + /* + * Up until the brk area we randomize addresses + * as much as possible: + */ + if (addr >= 0x01000000) { + tmp = randomize_range(0x01000000, + PAGE_ALIGN(max(mm->start_brk, + (unsigned long)0x08000000)), len); + vma = find_vma(mm, tmp); + if (TASK_SIZE - len >= tmp && + (!vma || tmp + len <= vma->vm_start)) + return tmp; + } + /* + * Ok, randomization didnt work out - return + * the result of the linear search: + */ + return addr; + } + addr = vma->vm_end; + } + +failed: + return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags); +} /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr) @@ -1502,6 +1587,14 @@ out: return prev ? prev->vm_next : vma; } +static int over_stack_limit(unsigned long sz) +{ + if (sz < EXEC_STACK_BIAS) + return 0; + return (sz - EXEC_STACK_BIAS) > + current->signal->rlim[RLIMIT_STACK].rlim_cur; +} + /* * Verify that the stack growth is acceptable and * update accounting. This is shared with both the @@ -1518,7 +1611,7 @@ static int acct_stack_growth(struct vm_a return -ENOMEM; /* Stack limit test */ - if (size > rlim[RLIMIT_STACK].rlim_cur) + if (over_stack_limit(size)) return -ENOMEM; /* mlock limit tests */ @@ -1818,10 +1911,15 @@ int split_vma(struct mm_struct * mm, str if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); - if (new_below) + if (new_below) { + unsigned long old_end = vma->vm_end; + vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + ((addr - new->vm_start) >> PAGE_SHIFT), new); - else + + if (vma->vm_flags & VM_EXEC) + arch_remove_exec_range(mm, old_end); + } else vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); return 0; @@ -2037,6 +2135,7 @@ void exit_mmap(struct mm_struct *mm) vm_unacct_memory(nr_accounted); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); + arch_flush_exec_range(mm); /* * Walk the list again, actually closing and freeing it, diff -rup linux-2.6.23.12/mm/mprotect.c linux-2.6.23.12-execshield/mm/mprotect.c --- linux-2.6.23.12/mm/mprotect.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/mm/mprotect.c 2008-01-14 16:36:03.000000000 +0200 @@ -23,9 +23,14 @@ #include #include #include +#include #include #include +#ifdef CONFIG_X86 +#include +#endif + static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable) @@ -135,7 +140,7 @@ mprotect_fixup(struct vm_area_struct *vm struct mm_struct *mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; - unsigned long charged = 0; + unsigned long charged = 0, old_end = vma->vm_end; pgoff_t pgoff; int error; int dirty_accountable = 0; @@ -201,6 +206,9 @@ success: dirty_accountable = 1; } + if (oldflags & VM_EXEC) + arch_remove_exec_range(current->mm, old_end); + if (is_vm_hugetlb_page(vma)) hugetlb_change_protection(vma, start, end, vma->vm_page_prot); else diff -rup linux-2.6.23.12/mm/mremap.c linux-2.6.23.12-execshield/mm/mremap.c --- linux-2.6.23.12/mm/mremap.c 2007-12-18 23:55:57.000000000 +0200 +++ linux-2.6.23.12-execshield/mm/mremap.c 2008-01-14 16:36:34.000000000 +0200 @@ -392,8 +392,8 @@ unsigned long do_mremap(unsigned long ad if (vma->vm_flags & VM_MAYSHARE) map_flags |= MAP_SHARED; - new_addr = get_unmapped_area(vma->vm_file, 0, new_len, - vma->vm_pgoff, map_flags); + new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len, + vma->vm_pgoff, map_flags, vma->vm_flags & VM_EXEC); if (new_addr & ~PAGE_MASK) { ret = new_addr; goto out; Binary files linux-2.6.23.12/scripts/basic/docproc and linux-2.6.23.12-execshield/scripts/basic/docproc differ Binary files linux-2.6.23.12/scripts/basic/fixdep and linux-2.6.23.12-execshield/scripts/basic/fixdep differ Binary files linux-2.6.23.12/scripts/bin2c and linux-2.6.23.12-execshield/scripts/bin2c differ Binary files linux-2.6.23.12/scripts/conmakehash and linux-2.6.23.12-execshield/scripts/conmakehash differ Binary files linux-2.6.23.12/scripts/genksyms/genksyms and linux-2.6.23.12-execshield/scripts/genksyms/genksyms differ Binary files linux-2.6.23.12/scripts/kallsyms and linux-2.6.23.12-execshield/scripts/kallsyms differ Binary files linux-2.6.23.12/scripts/kconfig/conf and linux-2.6.23.12-execshield/scripts/kconfig/conf differ Binary files linux-2.6.23.12/scripts/kconfig/mconf and linux-2.6.23.12-execshield/scripts/kconfig/mconf differ Binary files linux-2.6.23.12/scripts/mod/mk_elfconfig and linux-2.6.23.12-execshield/scripts/mod/mk_elfconfig differ Binary files linux-2.6.23.12/scripts/mod/modpost and linux-2.6.23.12-execshield/scripts/mod/modpost differ