Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 23 Oct 2018 12:08:53 +0000 (13:08 +0100)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 23 Oct 2018 12:08:53 +0000 (13:08 +0100)
Pull locking and misc x86 updates from Ingo Molnar:
 "Lots of changes in this cycle - in part because locking/core attracted
  a number of related x86 low level work which was easier to handle in a
  single tree:

   - Linux Kernel Memory Consistency Model updates (Alan Stern, Paul E.
     McKenney, Andrea Parri)

   - lockdep scalability improvements and micro-optimizations (Waiman
     Long)

   - rwsem improvements (Waiman Long)

   - spinlock micro-optimization (Matthew Wilcox)

   - qspinlocks: Provide a liveness guarantee (more fairness) on x86.
     (Peter Zijlstra)

   - Add support for relative references in jump tables on arm64, x86
     and s390 to optimize jump labels (Ard Biesheuvel, Heiko Carstens)

   - Be a lot less permissive on weird (kernel address) uaccess faults
     on x86: BUG() when uaccess helpers fault on kernel addresses (Jann
     Horn)

   - macrofy x86 asm statements to un-confuse the GCC inliner. (Nadav
     Amit)

   - ... and a handful of other smaller changes as well"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (57 commits)
  locking/lockdep: Make global debug_locks* variables read-mostly
  locking/lockdep: Fix debug_locks off performance problem
  locking/pvqspinlock: Extend node size when pvqspinlock is configured
  locking/qspinlock_stat: Count instances of nested lock slowpaths
  locking/qspinlock, x86: Provide liveness guarantee
  x86/asm: 'Simplify' GEN_*_RMWcc() macros
  locking/qspinlock: Rework some comments
  locking/qspinlock: Re-order code
  locking/lockdep: Remove duplicated 'lock_class_ops' percpu array
  x86/defconfig: Enable CONFIG_USB_XHCI_HCD=y
  futex: Replace spin_is_locked() with lockdep
  locking/lockdep: Make class->ops a percpu counter and move it under CONFIG_DEBUG_LOCKDEP=y
  x86/jump-labels: Macrofy inline assembly code to work around GCC inlining bugs
  x86/cpufeature: Macrofy inline assembly code to work around GCC inlining bugs
  x86/extable: Macrofy inline assembly code to work around GCC inlining bugs
  x86/paravirt: Work around GCC inlining bugs when compiling paravirt ops
  x86/bug: Macrofy the BUG table section handling, to work around GCC inlining bugs
  x86/alternatives: Macrofy lock prefixes to work around GCC inlining bugs
  x86/refcount: Work around GCC inlining bug
  x86/objtool: Use asm macros to work around GCC inlining bugs
  ...

95 files changed:
Documentation/locking/lockstat.txt
Documentation/memory-barriers.txt
Makefile
arch/Kconfig
arch/arm64/Kconfig
arch/arm64/include/asm/jump_label.h
arch/arm64/kernel/jump_label.c
arch/s390/Kconfig
arch/s390/include/asm/jump_label.h
arch/s390/kernel/jump_label.c
arch/s390/kernel/vmlinux.lds.S
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/configs/i386_defconfig
arch/x86/configs/x86_64_defconfig
arch/x86/entry/calling.h
arch/x86/include/asm/alternative-asm.h
arch/x86/include/asm/alternative.h
arch/x86/include/asm/asm.h
arch/x86/include/asm/atomic.h
arch/x86/include/asm/atomic64_64.h
arch/x86/include/asm/bitops.h
arch/x86/include/asm/bug.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/elf.h
arch/x86/include/asm/extable.h
arch/x86/include/asm/fpu/internal.h
arch/x86/include/asm/futex.h
arch/x86/include/asm/jump_label.h
arch/x86/include/asm/local.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/preempt.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/qspinlock.h
arch/x86/include/asm/refcount.h
arch/x86/include/asm/rmwcc.h
arch/x86/include/asm/uaccess.h
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/jump_label.c
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/macros.S [new file with mode: 0644]
arch/x86/kernel/module.c
arch/x86/kernel/traps.c
arch/x86/lib/checksum_32.S
arch/x86/lib/copy_user_64.S
arch/x86/lib/csum-copy_64.S
arch/x86/lib/getuser.S
arch/x86/lib/putuser.S
arch/x86/lib/usercopy_32.c
arch/x86/lib/usercopy_64.c
arch/x86/mm/extable.c
arch/x86/mm/fault.c
arch/x86/tools/relocs.c
arch/x86/um/asm/elf.h
arch/xtensa/kernel/Makefile
drivers/misc/lkdtm/core.c
drivers/misc/lkdtm/lkdtm.h
drivers/misc/lkdtm/usercopy.c
fs/namespace.c
include/asm-generic/bug.h
include/asm-generic/qrwlock.h
include/asm-generic/qspinlock.h
include/asm-generic/vmlinux.lds.h
include/linux/compiler.h
include/linux/debug_locks.h
include/linux/jump_label.h
include/linux/lockdep.h
include/linux/rwsem.h
include/linux/sched.h
init/main.c
kernel/cpu.c
kernel/futex.c
kernel/jump_label.c
kernel/locking/lockdep.c
kernel/locking/lockdep_internals.h
kernel/locking/lockdep_proc.c
kernel/locking/qspinlock.c
kernel/locking/qspinlock_paravirt.h
kernel/locking/qspinlock_stat.h
kernel/locking/rtmutex.c
kernel/locking/rwsem-xadd.c
kernel/locking/rwsem.c
kernel/locking/rwsem.h
kernel/module.c
lib/debug_locks.c
mm/maccess.c
scripts/Kbuild.include
scripts/mod/Makefile
tools/memory-model/Documentation/explanation.txt
tools/memory-model/Documentation/recipes.txt
tools/memory-model/README
tools/memory-model/linux-kernel.cat
tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
tools/memory-model/litmus-tests/README
tools/objtool/special.c

index 5786ad2..fdbeb0c 100644 (file)
@@ -91,7 +91,7 @@ Look at the current lock statistics:
 07                         &mm->mmap_sem-R:            37            100           1.31      299502.61      325629.52        3256.30         212344       34316685           0.10        7744.91    95016910.20           2.77
 08                         ---------------
 09                           &mm->mmap_sem              1          [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
-19                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+10                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
 11                           &mm->mmap_sem             34          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
 12                           &mm->mmap_sem             17          [<ffffffff81127e71>] vm_munmap+0x41/0x80
 13                         ---------------
index 0d8d7ef..c1d9139 100644 (file)
@@ -471,8 +471,7 @@ And a couple of implicit varieties:
      operations after the ACQUIRE operation will appear to happen after the
      ACQUIRE operation with respect to the other components of the system.
      ACQUIRE operations include LOCK operations and both smp_load_acquire()
-     and smp_cond_acquire() operations. The later builds the necessary ACQUIRE
-     semantics from relying on a control dependency and smp_rmb().
+     and smp_cond_load_acquire() operations.
 
      Memory operations that occur before an ACQUIRE operation may appear to
      happen after it completes.
index 69fa5c0..2fc5732 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1073,7 +1073,7 @@ scripts: scripts_basic asm-generic gcc-plugins $(autoksyms_h)
 # version.h and scripts_basic is processed / created.
 
 # Listed in dependency order
-PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
+PHONY += prepare archprepare macroprepare prepare0 prepare1 prepare2 prepare3
 
 # prepare3 is used to check if we are building in a separate output directory,
 # and if so do:
@@ -1096,7 +1096,9 @@ prepare2: prepare3 outputmakefile asm-generic
 prepare1: prepare2 $(version_h) $(autoksyms_h) include/generated/utsrelease.h
        $(cmd_crmodverdir)
 
-archprepare: archheaders archscripts prepare1 scripts_basic
+macroprepare: prepare1 archmacros
+
+archprepare: archheaders archscripts macroprepare scripts_basic
 
 prepare0: archprepare gcc-plugins
        $(Q)$(MAKE) $(build)=.
@@ -1164,6 +1166,9 @@ archheaders:
 PHONY += archscripts
 archscripts:
 
+PHONY += archmacros
+archmacros:
+
 PHONY += __headers
 __headers: $(version_h) scripts_basic uapi-asm-generic archheaders archscripts
        $(Q)$(MAKE) $(build)=scripts build_unifdef
index 6801123..9d32960 100644 (file)
@@ -359,6 +359,9 @@ config HAVE_PERF_USER_STACK_DUMP
 config HAVE_ARCH_JUMP_LABEL
        bool
 
+config HAVE_ARCH_JUMP_LABEL_RELATIVE
+       bool
+
 config HAVE_RCU_TABLE_FREE
        bool
 
index a8ae30f..c03cd0d 100644 (file)
@@ -105,6 +105,7 @@ config ARM64
        select HAVE_ARCH_BITREVERSE
        select HAVE_ARCH_HUGE_VMAP
        select HAVE_ARCH_JUMP_LABEL
+       select HAVE_ARCH_JUMP_LABEL_RELATIVE
        select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_MMAP_RND_BITS
index 7e2b3e3..4720234 100644 (file)
 
 #define JUMP_LABEL_NOP_SIZE            AARCH64_INSN_SIZE
 
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch(struct static_key *key,
+                                              bool branch)
 {
-       asm_volatile_goto("1: nop\n\t"
-                ".pushsection __jump_table,  \"aw\"\n\t"
-                ".align 3\n\t"
-                ".quad 1b, %l[l_yes], %c0\n\t"
-                ".popsection\n\t"
+       asm_volatile_goto(
+               "1:     nop                                     \n\t"
+                "      .pushsection    __jump_table, \"aw\"    \n\t"
+                "      .align          3                       \n\t"
+                "      .long           1b - ., %l[l_yes] - .   \n\t"
+                "      .quad           %c0 - .                 \n\t"
+                "      .popsection                             \n\t"
                 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
        return false;
@@ -40,13 +43,16 @@ l_yes:
        return true;
 }
 
-static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key *key,
+                                                   bool branch)
 {
-       asm_volatile_goto("1: b %l[l_yes]\n\t"
-                ".pushsection __jump_table,  \"aw\"\n\t"
-                ".align 3\n\t"
-                ".quad 1b, %l[l_yes], %c0\n\t"
-                ".popsection\n\t"
+       asm_volatile_goto(
+               "1:     b               %l[l_yes]               \n\t"
+                "      .pushsection    __jump_table, \"aw\"    \n\t"
+                "      .align          3                       \n\t"
+                "      .long           1b - ., %l[l_yes] - .   \n\t"
+                "      .quad           %c0 - .                 \n\t"
+                "      .popsection                             \n\t"
                 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
        return false;
@@ -54,13 +60,5 @@ l_yes:
        return true;
 }
 
-typedef u64 jump_label_t;
-
-struct jump_entry {
-       jump_label_t code;
-       jump_label_t target;
-       jump_label_t key;
-};
-
 #endif  /* __ASSEMBLY__ */
 #endif /* __ASM_JUMP_LABEL_H */
index e075641..646b956 100644 (file)
 void arch_jump_label_transform(struct jump_entry *entry,
                               enum jump_label_type type)
 {
-       void *addr = (void *)entry->code;
+       void *addr = (void *)jump_entry_code(entry);
        u32 insn;
 
        if (type == JUMP_LABEL_JMP) {
-               insn = aarch64_insn_gen_branch_imm(entry->code,
-                                                  entry->target,
+               insn = aarch64_insn_gen_branch_imm(jump_entry_code(entry),
+                                                  jump_entry_target(entry),
                                                   AARCH64_INSN_BRANCH_NOLINK);
        } else {
                insn = aarch64_insn_gen_nop();
index cc83135..039a341 100644 (file)
@@ -126,6 +126,7 @@ config S390
        select HAVE_ALIGNED_STRUCT_PAGE if SLUB
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_JUMP_LABEL
+       select HAVE_ARCH_JUMP_LABEL_RELATIVE
        select HAVE_ARCH_KASAN
        select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
        select HAVE_ARCH_SECCOMP_FILTER
index 40f6512..e2d3e6c 100644 (file)
  * We use a brcl 0,2 instruction for jump labels at compile time so it
  * can be easily distinguished from a hotpatch generated instruction.
  */
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+static inline bool arch_static_branch(struct static_key *key, bool branch)
 {
-       asm_volatile_goto("0:   brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
-               ".pushsection __jump_table, \"aw\"\n"
-               ".balign 8\n"
-               ".quad 0b, %l[label], %0\n"
-               ".popsection\n"
-               : : "X" (&((char *)key)[branch]) : : label);
-
+       asm_volatile_goto("0:   brcl    0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
+                         ".pushsection __jump_table,\"aw\"\n"
+                         ".balign      8\n"
+                         ".long        0b-.,%l[label]-.\n"
+                         ".quad        %0-.\n"
+                         ".popsection\n"
+                         : : "X" (&((char *)key)[branch]) : : label);
        return false;
 label:
        return true;
 }
 
-static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+static inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
-       asm_volatile_goto("0:   brcl 15, %l[label]\n"
-               ".pushsection __jump_table, \"aw\"\n"
-               ".balign 8\n"
-               ".quad 0b, %l[label], %0\n"
-               ".popsection\n"
-               : : "X" (&((char *)key)[branch]) : : label);
-
+       asm_volatile_goto("0:   brcl 15,%l[label]\n"
+                         ".pushsection __jump_table,\"aw\"\n"
+                         ".balign      8\n"
+                         ".long        0b-.,%l[label]-.\n"
+                         ".quad        %0-.\n"
+                         ".popsection\n"
+                         : : "X" (&((char *)key)[branch]) : : label);
        return false;
 label:
        return true;
 }
 
-typedef unsigned long jump_label_t;
-
-struct jump_entry {
-       jump_label_t code;
-       jump_label_t target;
-       jump_label_t key;
-};
-
 #endif  /* __ASSEMBLY__ */
 #endif
index 43f8430..50a1798 100644 (file)
@@ -33,13 +33,13 @@ static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
 {
        /* brcl 15,offset */
        insn->opcode = 0xc0f4;
-       insn->offset = (entry->target - entry->code) >> 1;
+       insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
 }
 
 static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
                           struct insn *new)
 {
-       unsigned char *ipc = (unsigned char *)entry->code;
+       unsigned char *ipc = (unsigned char *)jump_entry_code(entry);
        unsigned char *ipe = (unsigned char *)expected;
        unsigned char *ipn = (unsigned char *)new;
 
@@ -59,6 +59,7 @@ static void __jump_label_transform(struct jump_entry *entry,
                                   enum jump_label_type type,
                                   int init)
 {
+       void *code = (void *)jump_entry_code(entry);
        struct insn old, new;
 
        if (type == JUMP_LABEL_JMP) {
@@ -69,13 +70,13 @@ static void __jump_label_transform(struct jump_entry *entry,
                jump_label_make_nop(entry, &new);
        }
        if (init) {
-               if (memcmp((void *)entry->code, &orignop, sizeof(orignop)))
+               if (memcmp(code, &orignop, sizeof(orignop)))
                        jump_label_bug(entry, &orignop, &new);
        } else {
-               if (memcmp((void *)entry->code, &old, sizeof(old)))
+               if (memcmp(code, &old, sizeof(old)))
                        jump_label_bug(entry, &old, &new);
        }
-       s390_kernel_write((void *)entry->code, &new, sizeof(new));
+       s390_kernel_write(code, &new, sizeof(new));
 }
 
 static int __sm_arch_jump_label_transform(void *data)
index cc3cbdc..21eb740 100644 (file)
@@ -65,6 +65,7 @@ SECTIONS
        __start_ro_after_init = .;
        .data..ro_after_init : {
                 *(.data..ro_after_init)
+               JUMP_TABLE_DATA
        }
        EXCEPTION_TABLE(16)
        . = ALIGN(PAGE_SIZE);
index e8de5de..45b94fa 100644 (file)
@@ -119,6 +119,7 @@ config X86
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_HUGE_VMAP              if X86_64 || X86_PAE
        select HAVE_ARCH_JUMP_LABEL
+       select HAVE_ARCH_JUMP_LABEL_RELATIVE
        select HAVE_ARCH_KASAN                  if X86_64
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_MMAP_RND_BITS          if MMU
index 8f6e7eb..5b562e4 100644 (file)
@@ -193,7 +193,6 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
 # does binutils support specific instructions?
 asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
 asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
-asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
 avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
 avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
 avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
@@ -237,6 +236,13 @@ archscripts: scripts_basic
 archheaders:
        $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
 
+archmacros:
+       $(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
+
+ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,-
+export ASM_MACRO_FLAGS
+KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
+
 ###
 # Kernel objects
 
index 0eb9f92..6c3ab05 100644 (file)
@@ -247,6 +247,7 @@ CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
index e32fc1f..ac9ae48 100644 (file)
@@ -243,6 +243,7 @@ CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
+CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
index 352e70c..708b46a 100644 (file)
@@ -338,7 +338,7 @@ For 32-bit we have the following conventions - kernel is built with
 .macro CALL_enter_from_user_mode
 #ifdef CONFIG_CONTEXT_TRACKING
 #ifdef HAVE_JUMP_LABEL
-       STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
+       STATIC_BRANCH_JMP l_yes=.Lafter_call_\@, key=context_tracking_enabled, branch=1
 #endif
        call enter_from_user_mode
 .Lafter_call_\@:
index 31b627b..8e4ea39 100644 (file)
@@ -7,16 +7,24 @@
 #include <asm/asm.h>
 
 #ifdef CONFIG_SMP
-       .macro LOCK_PREFIX
-672:   lock
+.macro LOCK_PREFIX_HERE
        .pushsection .smp_locks,"a"
        .balign 4
-       .long 672b - .
+       .long 671f - .          # offset
        .popsection
-       .endm
+671:
+.endm
+
+.macro LOCK_PREFIX insn:vararg
+       LOCK_PREFIX_HERE
+       lock \insn
+.endm
 #else
-       .macro LOCK_PREFIX
-       .endm
+.macro LOCK_PREFIX_HERE
+.endm
+
+.macro LOCK_PREFIX insn:vararg
+.endm
 #endif
 
 /*
index 4cd6a3b..d7faa16 100644 (file)
  */
 
 #ifdef CONFIG_SMP
-#define LOCK_PREFIX_HERE \
-               ".pushsection .smp_locks,\"a\"\n"       \
-               ".balign 4\n"                           \
-               ".long 671f - .\n" /* offset */         \
-               ".popsection\n"                         \
-               "671:"
-
-#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
-
+#define LOCK_PREFIX_HERE "LOCK_PREFIX_HERE\n\t"
+#define LOCK_PREFIX "LOCK_PREFIX "
 #else /* ! CONFIG_SMP */
 #define LOCK_PREFIX_HERE ""
 #define LOCK_PREFIX ""
index 990770f..21b0867 100644 (file)
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)                        \
-       .pushsection "__ex_table","a" ;                         \
-       .balign 4 ;                                             \
-       .long (from) - . ;                                      \
-       .long (to) - . ;                                        \
-       .long (handler) - . ;                                   \
+       ASM_EXTABLE_HANDLE from to handler
+
+.macro ASM_EXTABLE_HANDLE from:req to:req handler:req
+       .pushsection "__ex_table","a"
+       .balign 4
+       .long (\from) - .
+       .long (\to) - .
+       .long (\handler) - .
        .popsection
+.endm
+#else /* __ASSEMBLY__ */
+
+# define _ASM_EXTABLE_HANDLE(from, to, handler)                        \
+       "ASM_EXTABLE_HANDLE from=" #from " to=" #to             \
+       " handler=\"" #handler "\"\n\t"
+
+/* For C file, we already have NOKPROBE_SYMBOL macro */
+
+#endif /* __ASSEMBLY__ */
 
 # define _ASM_EXTABLE(from, to)                                        \
        _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
 
+# define _ASM_EXTABLE_UA(from, to)                             \
+       _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
 # define _ASM_EXTABLE_FAULT(from, to)                          \
        _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
 
        _ASM_PTR (entry);                                       \
        .popsection
 
+#ifdef __ASSEMBLY__
 .macro ALIGN_DESTINATION
        /* check for bad alignment of destination */
        movl %edi,%ecx
        jmp copy_user_handle_tail
        .previous
 
-       _ASM_EXTABLE(100b,103b)
-       _ASM_EXTABLE(101b,103b)
+       _ASM_EXTABLE_UA(100b, 103b)
+       _ASM_EXTABLE_UA(101b, 103b)
        .endm
-
-#else
-# define _EXPAND_EXTABLE_HANDLE(x) #x
-# define _ASM_EXTABLE_HANDLE(from, to, handler)                        \
-       " .pushsection \"__ex_table\",\"a\"\n"                  \
-       " .balign 4\n"                                          \
-       " .long (" #from ") - .\n"                              \
-       " .long (" #to ") - .\n"                                \
-       " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n"    \
-       " .popsection\n"
-
-# define _ASM_EXTABLE(from, to)                                        \
-       _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
-
-# define _ASM_EXTABLE_FAULT(from, to)                          \
-       _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
-
-# define _ASM_EXTABLE_EX(from, to)                             \
-       _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
-
-# define _ASM_EXTABLE_REFCOUNT(from, to)                       \
-       _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
-
-/* For C file, we already have NOKPROBE_SYMBOL macro */
-#endif
+#endif /* __ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__
 /*
index ce84388..ea3d952 100644 (file)
@@ -82,7 +82,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
  */
 static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
+       return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i);
 }
 #define arch_atomic_sub_and_test arch_atomic_sub_and_test
 
@@ -122,7 +122,7 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
  */
 static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
 {
-       GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
+       return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e);
 }
 #define arch_atomic_dec_and_test arch_atomic_dec_and_test
 
@@ -136,7 +136,7 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
  */
 static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
 {
-       GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
+       return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e);
 }
 #define arch_atomic_inc_and_test arch_atomic_inc_and_test
 
@@ -151,7 +151,7 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
  */
 static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
+       return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i);
 }
 #define arch_atomic_add_negative arch_atomic_add_negative
 
index 5f851d9..dadc20a 100644 (file)
@@ -73,7 +73,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
  */
 static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
+       return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
 }
 #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
 
@@ -115,7 +115,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
  */
 static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
 {
-       GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
+       return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e);
 }
 #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
 
@@ -129,7 +129,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
  */
 static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
 {
-       GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
+       return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e);
 }
 #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
 
@@ -144,7 +144,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
  */
 static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
+       return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
 }
 #define arch_atomic64_add_negative arch_atomic64_add_negative
 
index 9f645ba..124f919 100644 (file)
@@ -217,8 +217,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts),
-                        *addr, "Ir", nr, "%0", c);
+       return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
 }
 
 /**
@@ -264,8 +263,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
  */
 static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr),
-                        *addr, "Ir", nr, "%0", c);
+       return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
 }
 
 /**
@@ -318,8 +316,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
  */
 static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-       GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc),
-                        *addr, "Ir", nr, "%0", c);
+       return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
 }
 
 static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
index 6804d66..5090035 100644 (file)
@@ -4,6 +4,8 @@
 
 #include <linux/stringify.h>
 
+#ifndef __ASSEMBLY__
+
 /*
  * Despite that some emulators terminate on UD2, we use it for WARN().
  *
 
 #define LEN_UD2                2
 
-#ifdef CONFIG_GENERIC_BUG
-
-#ifdef CONFIG_X86_32
-# define __BUG_REL(val)        ".long " __stringify(val)
-#else
-# define __BUG_REL(val)        ".long " __stringify(val) " - 2b"
-#endif
-
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-
-#define _BUG_FLAGS(ins, flags)                                         \
-do {                                                                   \
-       asm volatile("1:\t" ins "\n"                                    \
-                    ".pushsection __bug_table,\"aw\"\n"                \
-                    "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
-                    "\t"  __BUG_REL(%c0) "\t# bug_entry::file\n"       \
-                    "\t.word %c1"        "\t# bug_entry::line\n"       \
-                    "\t.word %c2"        "\t# bug_entry::flags\n"      \
-                    "\t.org 2b+%c3\n"                                  \
-                    ".popsection"                                      \
-                    : : "i" (__FILE__), "i" (__LINE__),                \
-                        "i" (flags),                                   \
-                        "i" (sizeof(struct bug_entry)));               \
-} while (0)
-
-#else /* !CONFIG_DEBUG_BUGVERBOSE */
-
 #define _BUG_FLAGS(ins, flags)                                         \
 do {                                                                   \
-       asm volatile("1:\t" ins "\n"                                    \
-                    ".pushsection __bug_table,\"aw\"\n"                \
-                    "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n"   \
-                    "\t.word %c0"        "\t# bug_entry::flags\n"      \
-                    "\t.org 2b+%c1\n"                                  \
-                    ".popsection"                                      \
-                    : : "i" (flags),                                   \
+       asm volatile("ASM_BUG ins=\"" ins "\" file=%c0 line=%c1 "       \
+                    "flags=%c2 size=%c3"                               \
+                    : : "i" (__FILE__), "i" (__LINE__),                \
+                        "i" (flags),                                   \
                         "i" (sizeof(struct bug_entry)));               \
 } while (0)
 
-#endif /* CONFIG_DEBUG_BUGVERBOSE */
-
-#else
-
-#define _BUG_FLAGS(ins, flags)  asm volatile(ins)
-
-#endif /* CONFIG_GENERIC_BUG */
-
 #define HAVE_ARCH_BUG
 #define BUG()                                                  \
 do {                                                           \
@@ -82,4 +46,54 @@ do {                                                         \
 
 #include <asm-generic/bug.h>
 
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_GENERIC_BUG
+
+#ifdef CONFIG_X86_32
+.macro __BUG_REL val:req
+       .long \val
+.endm
+#else
+.macro __BUG_REL val:req
+       .long \val - 2b
+.endm
+#endif
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+.macro ASM_BUG ins:req file:req line:req flags:req size:req
+1:     \ins
+       .pushsection __bug_table,"aw"
+2:     __BUG_REL val=1b        # bug_entry::bug_addr
+       __BUG_REL val=\file     # bug_entry::file
+       .word \line             # bug_entry::line
+       .word \flags            # bug_entry::flags
+       .org 2b+\size
+       .popsection
+.endm
+
+#else /* !CONFIG_DEBUG_BUGVERBOSE */
+
+.macro ASM_BUG ins:req file:req line:req flags:req size:req
+1:     \ins
+       .pushsection __bug_table,"aw"
+2:     __BUG_REL val=1b        # bug_entry::bug_addr
+       .word \flags            # bug_entry::flags
+       .org 2b+\size
+       .popsection
+.endm
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#else /* CONFIG_GENERIC_BUG */
+
+.macro ASM_BUG ins:req file:req line:req flags:req size:req
+       \ins
+.endm
+
+#endif /* CONFIG_GENERIC_BUG */
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_X86_BUG_H */
index aced6c9..7d44272 100644 (file)
@@ -2,10 +2,10 @@
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#include <asm/processor.h>
-
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
 #include <asm/asm.h>
 #include <linux/bitops.h>
 
@@ -161,37 +161,10 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-       asm_volatile_goto("1: jmp 6f\n"
-                "2:\n"
-                ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-                        "((5f-4f) - (2b-1b)),0x90\n"
-                "3:\n"
-                ".section .altinstructions,\"a\"\n"
-                " .long 1b - .\n"              /* src offset */
-                " .long 4f - .\n"              /* repl offset */
-                " .word %P[always]\n"          /* always replace */
-                " .byte 3b - 1b\n"             /* src len */
-                " .byte 5f - 4f\n"             /* repl len */
-                " .byte 3b - 2b\n"             /* pad len */
-                ".previous\n"
-                ".section .altinstr_replacement,\"ax\"\n"
-                "4: jmp %l[t_no]\n"
-                "5:\n"
-                ".previous\n"
-                ".section .altinstructions,\"a\"\n"
-                " .long 1b - .\n"              /* src offset */
-                " .long 0\n"                   /* no replacement */
-                " .word %P[feature]\n"         /* feature bit */
-                " .byte 3b - 1b\n"             /* src len */
-                " .byte 0\n"                   /* repl len */
-                " .byte 0\n"                   /* pad len */
-                ".previous\n"
-                ".section .altinstr_aux,\"ax\"\n"
-                "6:\n"
-                " testb %[bitnum],%[cap_byte]\n"
-                " jnz %l[t_yes]\n"
-                " jmp %l[t_no]\n"
-                ".previous\n"
+       asm_volatile_goto("STATIC_CPU_HAS bitnum=%[bitnum] "
+                         "cap_byte=\"%[cap_byte]\" "
+                         "feature=%P[feature] t_yes=%l[t_yes] "
+                         "t_no=%l[t_no] always=%P[always]"
                 : : [feature]  "i" (bit),
                     [always]   "i" (X86_FEATURE_ALWAYS),
                     [bitnum]   "i" (1 << (bit & 7)),
@@ -226,5 +199,44 @@ t_no:
 #define CPU_FEATURE_TYPEVAL            boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
                                        boot_cpu_data.x86_model
 
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+#else /* __ASSEMBLY__ */
+
+.macro STATIC_CPU_HAS bitnum:req cap_byte:req feature:req t_yes:req t_no:req always:req
+1:
+       jmp 6f
+2:
+       .skip -(((5f-4f) - (2b-1b)) > 0) * ((5f-4f) - (2b-1b)),0x90
+3:
+       .section .altinstructions,"a"
+       .long 1b - .            /* src offset */
+       .long 4f - .            /* repl offset */
+       .word \always           /* always replace */
+       .byte 3b - 1b           /* src len */
+       .byte 5f - 4f           /* repl len */
+       .byte 3b - 2b           /* pad len */
+       .previous
+       .section .altinstr_replacement,"ax"
+4:
+       jmp \t_no
+5:
+       .previous
+       .section .altinstructions,"a"
+       .long 1b - .            /* src offset */
+       .long 0                 /* no replacement */
+       .word \feature          /* feature bit */
+       .byte 3b - 1b           /* src len */
+       .byte 0                 /* repl len */
+       .byte 0                 /* pad len */
+       .previous
+       .section .altinstr_aux,"ax"
+6:
+       testb \bitnum,\cap_byte
+       jnz \t_yes
+       jmp \t_no
+       .previous
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
 #endif /* _ASM_X86_CPUFEATURE_H */
index 0d157d2..a357031 100644 (file)
@@ -62,8 +62,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
 #define R_X86_64_PC16          13      /* 16 bit sign extended pc relative */
 #define R_X86_64_8             14      /* Direct 8 bit sign extended  */
 #define R_X86_64_PC8           15      /* 8 bit sign extended pc relative */
-
-#define R_X86_64_NUM           16
+#define R_X86_64_PC64          24      /* Place relative 64-bit signed */
 
 /*
  * These are used to set parameters in the core dumps.
index f9c3a5d..d8c2198 100644 (file)
@@ -29,7 +29,8 @@ struct pt_regs;
                (b)->handler = (tmp).handler - (delta);         \
        } while (0)
 
-extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern int fixup_exception(struct pt_regs *regs, int trapnr,
+                          unsigned long error_code, unsigned long fault_addr);
 extern int fixup_bug(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
 extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
index 69dcdf1..5f7290e 100644 (file)
@@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
                     "3: movl $-2,%[err]\n\t"                           \
                     "jmp 2b\n\t"                                       \
                     ".popsection\n\t"                                  \
-                    _ASM_EXTABLE(1b, 3b)                               \
+                    _ASM_EXTABLE_UA(1b, 3b)                            \
                     : [err] "=r" (err)                                 \
                     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)    \
                     : "memory")
index de4d688..13c83fe 100644 (file)
@@ -20,7 +20,7 @@
                     "3:\tmov\t%3, %1\n"                        \
                     "\tjmp\t2b\n"                              \
                     "\t.previous\n"                            \
-                    _ASM_EXTABLE(1b, 3b)                       \
+                    _ASM_EXTABLE_UA(1b, 3b)                    \
                     : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
                     : "i" (-EFAULT), "0" (oparg), "1" (0))
 
@@ -36,8 +36,8 @@
                     "4:\tmov\t%5, %1\n"                        \
                     "\tjmp\t3b\n"                              \
                     "\t.previous\n"                            \
-                    _ASM_EXTABLE(1b, 4b)                       \
-                    _ASM_EXTABLE(2b, 4b)                       \
+                    _ASM_EXTABLE_UA(1b, 4b)                    \
+                    _ASM_EXTABLE_UA(2b, 4b)                    \
                     : "=&a" (oldval), "=&r" (ret),             \
                       "+m" (*uaddr), "=&r" (tem)               \
                     : "r" (oparg), "i" (-EFAULT), "1" (0))
index 8c0de42..a5fb34f 100644 (file)
@@ -2,19 +2,6 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
-#ifndef HAVE_JUMP_LABEL
-/*
- * For better or for worse, if jump labels (the gcc extension) are missing,
- * then the entire static branch patching infrastructure is compiled out.
- * If that happens, the code in here will malfunction.  Raise a compiler
- * error instead.
- *
- * In theory, jump labels and the static branch patching infrastructure
- * could be decoupled to fix this.
- */
-#error asm/jump_label.h included on a non-jump-label kernel
-#endif
-
 #define JUMP_LABEL_NOP_SIZE 5
 
 #ifdef CONFIG_X86_64
 
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
-       asm_volatile_goto("1:"
-               ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
-               ".pushsection __jump_table,  \"aw\" \n\t"
-               _ASM_ALIGN "\n\t"
-               _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
-               ".popsection \n\t"
-               : :  "i" (key), "i" (branch) : : l_yes);
-
+       asm_volatile_goto("STATIC_BRANCH_NOP l_yes=\"%l[l_yes]\" key=\"%c0\" "
+                         "branch=\"%c1\""
+                       : :  "i" (key), "i" (branch) : : l_yes);
        return false;
 l_yes:
        return true;
@@ -48,13 +30,8 @@ l_yes:
 
 static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
-       asm_volatile_goto("1:"
-               ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
-               "2:\n\t"
-               ".pushsection __jump_table,  \"aw\" \n\t"
-               _ASM_ALIGN "\n\t"
-               _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
-               ".popsection \n\t"
+       asm_volatile_goto("STATIC_BRANCH_JMP l_yes=\"%l[l_yes]\" key=\"%c0\" "
+                         "branch=\"%c1\""
                : :  "i" (key), "i" (branch) : : l_yes);
 
        return false;
@@ -62,49 +39,28 @@ l_yes:
        return true;
 }
 
-#ifdef CONFIG_X86_64
-typedef u64 jump_label_t;
-#else
-typedef u32 jump_label_t;
-#endif
-
-struct jump_entry {
-       jump_label_t code;
-       jump_label_t target;
-       jump_label_t key;
-};
-
 #else  /* __ASSEMBLY__ */
 
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
-       .if \def
-       /* Equivalent to "jmp.d32 \target" */
-       .byte           0xe9
-       .long           \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-       .else
-       .byte           STATIC_KEY_INIT_NOP
-       .endif
+.macro STATIC_BRANCH_NOP l_yes:req key:req branch:req
+.Lstatic_branch_nop_\@:
+       .byte STATIC_KEY_INIT_NOP
+.Lstatic_branch_no_after_\@:
        .pushsection __jump_table, "aw"
        _ASM_ALIGN
-       _ASM_PTR        .Lstatic_jump_\@, \target, \key
+       .long           .Lstatic_branch_nop_\@ - ., \l_yes - .
+       _ASM_PTR        \key + \branch - .
        .popsection
 .endm
 
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
-       .if \def
-       .byte           STATIC_KEY_INIT_NOP
-       .else
-       /* Equivalent to "jmp.d32 \target" */
-       .byte           0xe9
-       .long           \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-       .endif
+.macro STATIC_BRANCH_JMP l_yes:req key:req branch:req
+.Lstatic_branch_jmp_\@:
+       .byte 0xe9
+       .long \l_yes - .Lstatic_branch_jmp_after_\@
+.Lstatic_branch_jmp_after_\@:
        .pushsection __jump_table, "aw"
        _ASM_ALIGN
-       _ASM_PTR        .Lstatic_jump_\@, \target, \key + 1
+       .long           .Lstatic_branch_jmp_\@ - ., \l_yes - .
+       _ASM_PTR        \key + \branch - .
        .popsection
 .endm
 
index c91083c..349a47a 100644 (file)
@@ -53,7 +53,7 @@ static inline void local_sub(long i, local_t *l)
  */
 static inline bool local_sub_and_test(long i, local_t *l)
 {
-       GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e);
+       return GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, e, "er", i);
 }
 
 /**
@@ -66,7 +66,7 @@ static inline bool local_sub_and_test(long i, local_t *l)
  */
 static inline bool local_dec_and_test(local_t *l)
 {
-       GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e);
+       return GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, e);
 }
 
 /**
@@ -79,7 +79,7 @@ static inline bool local_dec_and_test(local_t *l)
  */
 static inline bool local_inc_and_test(local_t *l)
 {
-       GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e);
+       return GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, e);
 }
 
 /**
@@ -93,7 +93,7 @@ static inline bool local_inc_and_test(local_t *l)
  */
 static inline bool local_add_negative(long i, local_t *l)
 {
-       GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s);
+       return GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, s, "er", i);
 }
 
 /**
index 4b75acc..83ce282 100644 (file)
@@ -346,23 +346,11 @@ extern struct pv_lock_ops pv_lock_ops;
 #define paravirt_clobber(clobber)              \
        [paravirt_clobber] "i" (clobber)
 
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type, clobber)      \
-       "771:\n\t" insn_string "\n" "772:\n"            \
-       ".pushsection .parainstructions,\"a\"\n"        \
-       _ASM_ALIGN "\n"                                 \
-       _ASM_PTR " 771b\n"                              \
-       "  .byte " type "\n"                            \
-       "  .byte 772b-771b\n"                           \
-       "  .short " clobber "\n"                        \
-       ".popsection\n"
-
 /* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string)                                      \
-       _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
+#define paravirt_call                                                  \
+       "PARAVIRT_CALL type=\"%c[paravirt_typenum]\""                   \
+       " clobber=\"%c[paravirt_clobber]\""                             \
+       " pv_opptr=\"%c[paravirt_opptr]\";"
 
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
@@ -390,16 +378,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 
 int paravirt_disable_iospace(void);
 
-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
-#define PARAVIRT_CALL                                  \
-       ANNOTATE_RETPOLINE_SAFE                         \
-       "call *%c[paravirt_opptr];"
-
 /*
  * These macros are intended to wrap calls through one of the paravirt
  * ops structs, so that they can be later identified and patched at
@@ -537,7 +515,7 @@ int paravirt_disable_iospace(void);
                /* since this condition will never hold */              \
                if (sizeof(rettype) > sizeof(unsigned long)) {          \
                        asm volatile(pre                                \
-                                    paravirt_alt(PARAVIRT_CALL)        \
+                                    paravirt_call                      \
                                     post                               \
                                     : call_clbr, ASM_CALL_CONSTRAINT   \
                                     : paravirt_type(op),               \
@@ -547,7 +525,7 @@ int paravirt_disable_iospace(void);
                        __ret = (rettype)((((u64)__edx) << 32) | __eax); \
                } else {                                                \
                        asm volatile(pre                                \
-                                    paravirt_alt(PARAVIRT_CALL)        \
+                                    paravirt_call                      \
                                     post                               \
                                     : call_clbr, ASM_CALL_CONSTRAINT   \
                                     : paravirt_type(op),               \
@@ -574,7 +552,7 @@ int paravirt_disable_iospace(void);
                PVOP_VCALL_ARGS;                                        \
                PVOP_TEST_NULL(op);                                     \
                asm volatile(pre                                        \
-                            paravirt_alt(PARAVIRT_CALL)                \
+                            paravirt_call                              \
                             post                                       \
                             : call_clbr, ASM_CALL_CONSTRAINT           \
                             : paravirt_type(op),                       \
@@ -694,6 +672,26 @@ struct paravirt_patch_site {
 extern struct paravirt_patch_site __parainstructions[],
        __parainstructions_end[];
 
+#else  /* __ASSEMBLY__ */
+
+/*
+ * This generates an indirect call based on the operation type number.
+ * The type number, computed in PARAVIRT_PATCH, is derived from the
+ * offset into the paravirt_patch_template structure, and can therefore be
+ * freely converted back into a structure offset.
+ */
+.macro PARAVIRT_CALL type:req clobber:req pv_opptr:req
+771:   ANNOTATE_RETPOLINE_SAFE
+       call *\pv_opptr
+772:   .pushsection .parainstructions,"a"
+       _ASM_ALIGN
+       _ASM_PTR 771b
+       .byte \type
+       .byte 772b-771b
+       .short \clobber
+       .popsection
+.endm
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_PARAVIRT_TYPES_H */
index 7f2dbd9..90cb2f3 100644 (file)
@@ -88,7 +88,7 @@ static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-       GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
+       return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
 }
 
 /*
index 6de1fd3..5e58a74 100644 (file)
@@ -37,8 +37,10 @@ struct pt_regs {
        unsigned short __esh;
        unsigned short fs;
        unsigned short __fsh;
+       /* On interrupt, gs and __gsh store the vector number. */
        unsigned short gs;
        unsigned short __gsh;
+       /* On interrupt, this is the error code. */
        unsigned long orig_ax;
        unsigned long ip;
        unsigned short cs;
index 3e70bed..87623c6 100644 (file)
@@ -6,9 +6,24 @@
 #include <asm/cpufeature.h>
 #include <asm-generic/qspinlock_types.h>
 #include <asm/paravirt.h>
+#include <asm/rmwcc.h>
 
 #define _Q_PENDING_LOOPS       (1 << 9)
 
+#define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire
+static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
+{
+       u32 val = 0;
+
+       if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
+                            "I", _Q_PENDING_OFFSET))
+               val |= _Q_PENDING_VAL;
+
+       val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK;
+
+       return val;
+}
+
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
 extern void __pv_init_lock_hash(void);
index 19b9052..a8b5e1e 100644 (file)
@@ -4,6 +4,41 @@
  * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
  * PaX/grsecurity.
  */
+
+#ifdef __ASSEMBLY__
+
+#include <asm/asm.h>
+#include <asm/bug.h>
+
+.macro REFCOUNT_EXCEPTION counter:req
+       .pushsection .text..refcount
+111:   lea \counter, %_ASM_CX
+112:   ud2
+       ASM_UNREACHABLE
+       .popsection
+113:   _ASM_EXTABLE_REFCOUNT(112b, 113b)
+.endm
+
+/* Trigger refcount exception if refcount result is negative. */
+.macro REFCOUNT_CHECK_LT_ZERO counter:req
+       js 111f
+       REFCOUNT_EXCEPTION counter="\counter"
+.endm
+
+/* Trigger refcount exception if refcount result is zero or negative. */
+.macro REFCOUNT_CHECK_LE_ZERO counter:req
+       jz 111f
+       REFCOUNT_CHECK_LT_ZERO counter="\counter"
+.endm
+
+/* Trigger refcount exception unconditionally. */
+.macro REFCOUNT_ERROR counter:req
+       jmp 111f
+       REFCOUNT_EXCEPTION counter="\counter"
+.endm
+
+#else /* __ASSEMBLY__ */
+
 #include <linux/refcount.h>
 #include <asm/bug.h>
 
  * central refcount exception. The fixup address for the exception points
  * back to the regular execution flow in .text.
  */
-#define _REFCOUNT_EXCEPTION                            \
-       ".pushsection .text..refcount\n"                \
-       "111:\tlea %[counter], %%" _ASM_CX "\n"         \
-       "112:\t" ASM_UD2 "\n"                           \
-       ASM_UNREACHABLE                                 \
-       ".popsection\n"                                 \
-       "113:\n"                                        \
-       _ASM_EXTABLE_REFCOUNT(112b, 113b)
-
-/* Trigger refcount exception if refcount result is negative. */
-#define REFCOUNT_CHECK_LT_ZERO                         \
-       "js 111f\n\t"                                   \
-       _REFCOUNT_EXCEPTION
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-#define REFCOUNT_CHECK_LE_ZERO                         \
-       "jz 111f\n\t"                                   \
-       REFCOUNT_CHECK_LT_ZERO
-
-/* Trigger refcount exception unconditionally. */
-#define REFCOUNT_ERROR                                 \
-       "jmp 111f\n\t"                                  \
-       _REFCOUNT_EXCEPTION
 
 static __always_inline void refcount_add(unsigned int i, refcount_t *r)
 {
        asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
-               REFCOUNT_CHECK_LT_ZERO
+               "REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
                : [counter] "+m" (r->refs.counter)
                : "ir" (i)
                : "cc", "cx");
@@ -51,7 +63,7 @@ static __always_inline void refcount_add(unsigned int i, refcount_t *r)
 static __always_inline void refcount_inc(refcount_t *r)
 {
        asm volatile(LOCK_PREFIX "incl %0\n\t"
-               REFCOUNT_CHECK_LT_ZERO
+               "REFCOUNT_CHECK_LT_ZERO counter=\"%[counter]\""
                : [counter] "+m" (r->refs.counter)
                : : "cc", "cx");
 }
@@ -59,7 +71,7 @@ static __always_inline void refcount_inc(refcount_t *r)
 static __always_inline void refcount_dec(refcount_t *r)
 {
        asm volatile(LOCK_PREFIX "decl %0\n\t"
-               REFCOUNT_CHECK_LE_ZERO
+               "REFCOUNT_CHECK_LE_ZERO counter=\"%[counter]\""
                : [counter] "+m" (r->refs.counter)
                : : "cc", "cx");
 }
@@ -67,14 +79,17 @@ static __always_inline void refcount_dec(refcount_t *r)
 static __always_inline __must_check
 bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
-       GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
-                                 r->refs.counter, "er", i, "%0", e, "cx");
+
+       return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
+                                        "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+                                        r->refs.counter, e, "er", i, "cx");
 }
 
 static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
 {
-       GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
-                                r->refs.counter, "%0", e, "cx");
+       return GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
+                                       "REFCOUNT_CHECK_LT_ZERO counter=\"%[var]\"",
+                                       r->refs.counter, e, "cx");
 }
 
 static __always_inline __must_check
@@ -91,7 +106,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 
                /* Did we try to increment from/to an undesirable state? */
                if (unlikely(c < 0 || c == INT_MAX || result < c)) {
-                       asm volatile(REFCOUNT_ERROR
+                       asm volatile("REFCOUNT_ERROR counter=\"%[counter]\""
                                     : : [counter] "m" (r->refs.counter)
                                     : "cc", "cx");
                        break;
@@ -107,4 +122,6 @@ static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
        return refcount_add_not_zero(1, r);
 }
 
+#endif /* __ASSEMBLY__ */
+
 #endif
index 4914a3e..46ac84b 100644 (file)
@@ -2,56 +2,69 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define __RMWcc_CONCAT(a, b) a ## b
+#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b)
+
 #define __CLOBBERS_MEM(clb...) "memory", ## clb
 
 #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
 
 /* Use asm goto */
 
-#define __GEN_RMWcc(fullop, var, cc, clobbers, ...)                    \
-do {                                                                   \
+#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...)                   \
+({                                                                     \
+       bool c = false;                                                 \
        asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"             \
-                       : : [counter] "m" (var), ## __VA_ARGS__         \
+                       : : [var] "m" (_var), ## __VA_ARGS__            \
                        : clobbers : cc_label);                         \
-       return 0;                                                       \
-cc_label:                                                              \
-       return 1;                                                       \
-} while (0)
-
-#define __BINARY_RMWcc_ARG     " %1, "
-
+       if (0) {                                                        \
+cc_label:      c = true;                                               \
+       }                                                               \
+       c;                                                              \
+})
 
 #else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 /* Use flags output or a set instruction */
 
-#define __GEN_RMWcc(fullop, var, cc, clobbers, ...)                    \
-do {                                                                   \
+#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...)                   \
+({                                                                     \
        bool c;                                                         \
        asm volatile (fullop CC_SET(cc)                                 \
-                       : [counter] "+m" (var), CC_OUT(cc) (c)          \
+                       : [var] "+m" (_var), CC_OUT(cc) (c)             \
                        : __VA_ARGS__ : clobbers);                      \
-       return c;                                                       \
-} while (0)
-
-#define __BINARY_RMWcc_ARG     " %2, "
+       c;                                                              \
+})
 
 #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
-#define GEN_UNARY_RMWcc(op, var, arg0, cc)                             \
+#define GEN_UNARY_RMWcc_4(op, var, cc, arg0)                           \
        __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
 
-#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
-       __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc,                 \
-                   __CLOBBERS_MEM(clobbers))
+#define GEN_UNARY_RMWcc_3(op, var, cc)                                 \
+       GEN_UNARY_RMWcc_4(op, var, cc, "%[var]")
 
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)                 \
-       __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc,                \
-                   __CLOBBERS_MEM(), vcon (val))
+#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X)
+
+#define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0)              \
+       __GEN_RMWcc(op " %[val], " arg0, var, cc,                       \
+                   __CLOBBERS_MEM(), [val] vcon (_val))
+
+#define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val)                     \
+       GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]")
+
+#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X)
+
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...)     \
+       __GEN_RMWcc(op " %[var]\n\t" suffix, var, cc,                   \
+                   __CLOBBERS_MEM(clobbers))
 
-#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc,        \
-                                 clobbers...)                          \
-       __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc,  \
-                   __CLOBBERS_MEM(clobbers), vcon (val))
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, cc, vcon, _val, clobbers...)\
+       __GEN_RMWcc(op " %[val], %[var]\n\t" suffix, var, cc,           \
+                   __CLOBBERS_MEM(clobbers), [val] vcon (_val))
 
 #endif /* _ASM_X86_RMWcc */
index aae77eb..b5e58cc 100644 (file)
@@ -198,8 +198,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
                     "4:        movl %3,%0\n"                           \
                     "  jmp 3b\n"                                       \
                     ".previous\n"                                      \
-                    _ASM_EXTABLE(1b, 4b)                               \
-                    _ASM_EXTABLE(2b, 4b)                               \
+                    _ASM_EXTABLE_UA(1b, 4b)                            \
+                    _ASM_EXTABLE_UA(2b, 4b)                            \
                     : "=r" (err)                                       \
                     : "A" (x), "r" (addr), "i" (errret), "0" (err))
 
@@ -340,8 +340,8 @@ do {                                                                        \
                     "  xorl %%edx,%%edx\n"                             \
                     "  jmp 3b\n"                                       \
                     ".previous\n"                                      \
-                    _ASM_EXTABLE(1b, 4b)                               \
-                    _ASM_EXTABLE(2b, 4b)                               \
+                    _ASM_EXTABLE_UA(1b, 4b)                            \
+                    _ASM_EXTABLE_UA(2b, 4b)                            \
                     : "=r" (retval), "=&A"(x)                          \
                     : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1),  \
                       "i" (errret), "0" (retval));                     \
@@ -386,7 +386,7 @@ do {                                                                        \
                     "  xor"itype" %"rtype"1,%"rtype"1\n"               \
                     "  jmp 2b\n"                                       \
                     ".previous\n"                                      \
-                    _ASM_EXTABLE(1b, 3b)                               \
+                    _ASM_EXTABLE_UA(1b, 3b)                            \
                     : "=r" (err), ltype(x)                             \
                     : "m" (__m(addr)), "i" (errret), "0" (err))
 
@@ -398,7 +398,7 @@ do {                                                                        \
                     "3:        mov %3,%0\n"                            \
                     "  jmp 2b\n"                                       \
                     ".previous\n"                                      \
-                    _ASM_EXTABLE(1b, 3b)                               \
+                    _ASM_EXTABLE_UA(1b, 3b)                            \
                     : "=r" (err), ltype(x)                             \
                     : "m" (__m(addr)), "i" (errret), "0" (err))
 
@@ -474,7 +474,7 @@ struct __large_struct { unsigned long buf[100]; };
                     "3:        mov %3,%0\n"                            \
                     "  jmp 2b\n"                                       \
                     ".previous\n"                                      \
-                    _ASM_EXTABLE(1b, 3b)                               \
+                    _ASM_EXTABLE_UA(1b, 3b)                            \
                     : "=r"(err)                                        \
                     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
 
@@ -602,7 +602,7 @@ extern void __cmpxchg_wrong_size(void)
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
                        "\t.previous\n"                                 \
-                       _ASM_EXTABLE(1b, 3b)                            \
+                       _ASM_EXTABLE_UA(1b, 3b)                         \
                        : "+r" (__ret), "=a" (__old), "+m" (*(ptr))     \
                        : "i" (-EFAULT), "q" (__new), "1" (__old)       \
                        : "memory"                                      \
@@ -618,7 +618,7 @@ extern void __cmpxchg_wrong_size(void)
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
                        "\t.previous\n"                                 \
-                       _ASM_EXTABLE(1b, 3b)                            \
+                       _ASM_EXTABLE_UA(1b, 3b)                         \
                        : "+r" (__ret), "=a" (__old), "+m" (*(ptr))     \
                        : "i" (-EFAULT), "r" (__new), "1" (__old)       \
                        : "memory"                                      \
@@ -634,7 +634,7 @@ extern void __cmpxchg_wrong_size(void)
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
                        "\t.previous\n"                                 \
-                       _ASM_EXTABLE(1b, 3b)                            \
+                       _ASM_EXTABLE_UA(1b, 3b)                         \
                        : "+r" (__ret), "=a" (__old), "+m" (*(ptr))     \
                        : "i" (-EFAULT), "r" (__new), "1" (__old)       \
                        : "memory"                                      \
@@ -653,7 +653,7 @@ extern void __cmpxchg_wrong_size(void)
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
                        "\t.previous\n"                                 \
-                       _ASM_EXTABLE(1b, 3b)                            \
+                       _ASM_EXTABLE_UA(1b, 3b)                         \
                        : "+r" (__ret), "=a" (__old), "+m" (*(ptr))     \
                        : "i" (-EFAULT), "r" (__new), "1" (__old)       \
                        : "memory"                                      \
index 953b3ce..ef8fd1f 100644 (file)
@@ -1315,7 +1315,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                local_irq_disable();
                ist_end_non_atomic();
        } else {
-               if (!fixup_exception(regs, X86_TRAP_MC))
+               if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
                        mce_panic("Failed kernel mode recovery", &m, NULL);
        }
 
index eeea935..aac0c1f 100644 (file)
@@ -42,55 +42,40 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
                                         void *(*poker)(void *, const void *, size_t),
                                         int init)
 {
-       union jump_code_union code;
+       union jump_code_union jmp;
        const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
        const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
+       const void *expect, *code;
+       int line;
+
+       jmp.jump = 0xe9;
+       jmp.offset = jump_entry_target(entry) -
+                    (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
 
        if (early_boot_irqs_disabled)
                poker = text_poke_early;
 
        if (type == JUMP_LABEL_JMP) {
                if (init) {
-                       /*
-                        * Jump label is enabled for the first time.
-                        * So we expect a default_nop...
-                        */
-                       if (unlikely(memcmp((void *)entry->code, default_nop, 5)
-                                    != 0))
-                               bug_at((void *)entry->code, __LINE__);
+                       expect = default_nop; line = __LINE__;
                } else {
-                       /*
-                        * ...otherwise expect an ideal_nop. Otherwise
-                        * something went horribly wrong.
-                        */
-                       if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
-                                    != 0))
-                               bug_at((void *)entry->code, __LINE__);
+                       expect = ideal_nop; line = __LINE__;
                }
 
-               code.jump = 0xe9;
-               code.offset = entry->target -
-                               (entry->code + JUMP_LABEL_NOP_SIZE);
+               code = &jmp.code;
        } else {
-               /*
-                * We are disabling this jump label. If it is not what
-                * we think it is, then something must have gone wrong.
-                * If this is the first initialization call, then we
-                * are converting the default nop to the ideal nop.
-                */
                if (init) {
-                       if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
-                               bug_at((void *)entry->code, __LINE__);
+                       expect = default_nop; line = __LINE__;
                } else {
-                       code.jump = 0xe9;
-                       code.offset = entry->target -
-                               (entry->code + JUMP_LABEL_NOP_SIZE);
-                       if (unlikely(memcmp((void *)entry->code, &code, 5) != 0))
-                               bug_at((void *)entry->code, __LINE__);
+                       expect = &jmp.code; line = __LINE__;
                }
-               memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+
+               code = ideal_nop;
        }
 
+       if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
+               bug_at((void *)jump_entry_code(entry), line);
+
        /*
         * Make text_poke_bp() a default fallback poker.
         *
@@ -99,11 +84,14 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
         * always nop being the 'currently valid' instruction
         *
         */
-       if (poker)
-               (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
-       else
-               text_poke_bp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE,
-                            (void *)entry->code + JUMP_LABEL_NOP_SIZE);
+       if (poker) {
+               (*poker)((void *)jump_entry_code(entry), code,
+                        JUMP_LABEL_NOP_SIZE);
+               return;
+       }
+
+       text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
+                    (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
 }
 
 void arch_jump_label_transform(struct jump_entry *entry,
index b0d1e81..f72a47b 100644 (file)
@@ -1020,50 +1020,12 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
                 */
                if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
                        return 1;
-
-               /*
-                * In case the user-specified fault handler returned
-                * zero, try to fix up.
-                */
-               if (fixup_exception(regs, trapnr))
-                       return 1;
-
-               /*
-                * fixup routine could not handle it,
-                * Let do_page_fault() fix it.
-                */
        }
 
        return 0;
 }
 NOKPROBE_SYMBOL(kprobe_fault_handler);
 
-/*
- * Wrapper routine for handling exceptions.
- */
-int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
-                            void *data)
-{
-       struct die_args *args = data;
-       int ret = NOTIFY_DONE;
-
-       if (args->regs && user_mode(args->regs))
-               return ret;
-
-       if (val == DIE_GPF) {
-               /*
-                * To be potentially processing a kprobe fault and to
-                * trust the result from kprobe_running(), we have
-                * be non-preemptible.
-                */
-               if (!preemptible() && kprobe_running() &&
-                   kprobe_fault_handler(args->regs, args->trapnr))
-                       ret = NOTIFY_STOP;
-       }
-       return ret;
-}
-NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
        bool is_in_entry_trampoline_section = false;
diff --git a/arch/x86/kernel/macros.S b/arch/x86/kernel/macros.S
new file mode 100644 (file)
index 0000000..161c950
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This file includes headers whose assembly part includes macros which are
+ * commonly used. The macros are precompiled into assmebly file which is later
+ * assembled together with each compiled file.
+ */
+
+#include <linux/compiler.h>
+#include <asm/refcount.h>
+#include <asm/alternative-asm.h>
+#include <asm/bug.h>
+#include <asm/paravirt.h>
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
+#include <asm/jump_label.h>
index f58336a..b052e88 100644 (file)
@@ -201,6 +201,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
                                goto overflow;
 #endif
                        break;
+               case R_X86_64_PC64:
+                       if (*(u64 *)loc != 0)
+                               goto invalid_relocation;
+                       val -= (u64)loc;
+                       *(u64 *)loc = val;
+                       break;
                default:
                        pr_err("%s: Unknown rela relocation: %llu\n",
                               me->name, ELF64_R_TYPE(rel[i].r_info));
index e6db475..16c95cb 100644 (file)
@@ -206,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
        }
 
        if (!user_mode(regs)) {
-               if (fixup_exception(regs, trapnr))
+               if (fixup_exception(regs, trapnr, error_code, 0))
                        return 0;
 
                tsk->thread.error_code = error_code;
@@ -551,11 +551,21 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
        tsk = current;
        if (!user_mode(regs)) {
-               if (fixup_exception(regs, X86_TRAP_GP))
+               if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
                        return;
 
                tsk->thread.error_code = error_code;
                tsk->thread.trap_nr = X86_TRAP_GP;
+
+               /*
+                * To be potentially processing a kprobe fault and to
+                * trust the result from kprobe_running(), we have to
+                * be non-preemptible.
+                */
+               if (!preemptible() && kprobe_running() &&
+                   kprobe_fault_handler(regs, X86_TRAP_GP))
+                       return;
+
                if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
                               X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
                        die("general protection fault", regs, error_code);
@@ -838,7 +848,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
        cond_local_irq_enable(regs);
 
        if (!user_mode(regs)) {
-               if (fixup_exception(regs, trapnr))
+               if (fixup_exception(regs, trapnr, error_code, 0))
                        return;
 
                task->thread.error_code = error_code;
index 46e71a7..ad8e090 100644 (file)
@@ -273,11 +273,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
 
 #define SRC(y...)                      \
        9999: y;                        \
-       _ASM_EXTABLE(9999b, 6001f)
+       _ASM_EXTABLE_UA(9999b, 6001f)
 
 #define DST(y...)                      \
        9999: y;                        \
-       _ASM_EXTABLE(9999b, 6002f)
+       _ASM_EXTABLE_UA(9999b, 6002f)
 
 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
 
index 020f75c..db4e5aa 100644 (file)
@@ -92,26 +92,26 @@ ENTRY(copy_user_generic_unrolled)
 60:    jmp copy_user_handle_tail /* ecx is zerorest also */
        .previous
 
-       _ASM_EXTABLE(1b,30b)
-       _ASM_EXTABLE(2b,30b)
-       _ASM_EXTABLE(3b,30b)
-       _ASM_EXTABLE(4b,30b)
-       _ASM_EXTABLE(5b,30b)
-       _ASM_EXTABLE(6b,30b)
-       _ASM_EXTABLE(7b,30b)
-       _ASM_EXTABLE(8b,30b)
-       _ASM_EXTABLE(9b,30b)
-       _ASM_EXTABLE(10b,30b)
-       _ASM_EXTABLE(11b,30b)
-       _ASM_EXTABLE(12b,30b)
-       _ASM_EXTABLE(13b,30b)
-       _ASM_EXTABLE(14b,30b)
-       _ASM_EXTABLE(15b,30b)
-       _ASM_EXTABLE(16b,30b)
-       _ASM_EXTABLE(18b,40b)
-       _ASM_EXTABLE(19b,40b)
-       _ASM_EXTABLE(21b,50b)
-       _ASM_EXTABLE(22b,50b)
+       _ASM_EXTABLE_UA(1b, 30b)
+       _ASM_EXTABLE_UA(2b, 30b)
+       _ASM_EXTABLE_UA(3b, 30b)
+       _ASM_EXTABLE_UA(4b, 30b)
+       _ASM_EXTABLE_UA(5b, 30b)
+       _ASM_EXTABLE_UA(6b, 30b)
+       _ASM_EXTABLE_UA(7b, 30b)
+       _ASM_EXTABLE_UA(8b, 30b)
+       _ASM_EXTABLE_UA(9b, 30b)
+       _ASM_EXTABLE_UA(10b, 30b)
+       _ASM_EXTABLE_UA(11b, 30b)
+       _ASM_EXTABLE_UA(12b, 30b)
+       _ASM_EXTABLE_UA(13b, 30b)
+       _ASM_EXTABLE_UA(14b, 30b)
+       _ASM_EXTABLE_UA(15b, 30b)
+       _ASM_EXTABLE_UA(16b, 30b)
+       _ASM_EXTABLE_UA(18b, 40b)
+       _ASM_EXTABLE_UA(19b, 40b)
+       _ASM_EXTABLE_UA(21b, 50b)
+       _ASM_EXTABLE_UA(22b, 50b)
 ENDPROC(copy_user_generic_unrolled)
 EXPORT_SYMBOL(copy_user_generic_unrolled)
 
@@ -156,8 +156,8 @@ ENTRY(copy_user_generic_string)
        jmp copy_user_handle_tail
        .previous
 
-       _ASM_EXTABLE(1b,11b)
-       _ASM_EXTABLE(3b,12b)
+       _ASM_EXTABLE_UA(1b, 11b)
+       _ASM_EXTABLE_UA(3b, 12b)
 ENDPROC(copy_user_generic_string)
 EXPORT_SYMBOL(copy_user_generic_string)
 
@@ -189,7 +189,7 @@ ENTRY(copy_user_enhanced_fast_string)
        jmp copy_user_handle_tail
        .previous
 
-       _ASM_EXTABLE(1b,12b)
+       _ASM_EXTABLE_UA(1b, 12b)
 ENDPROC(copy_user_enhanced_fast_string)
 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 
@@ -319,27 +319,27 @@ ENTRY(__copy_user_nocache)
        jmp copy_user_handle_tail
        .previous
 
-       _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
-       _ASM_EXTABLE(20b,.L_fixup_8b_copy)
-       _ASM_EXTABLE(21b,.L_fixup_8b_copy)
-       _ASM_EXTABLE(30b,.L_fixup_4b_copy)
-       _ASM_EXTABLE(31b,.L_fixup_4b_copy)
-       _ASM_EXTABLE(40b,.L_fixup_1b_copy)
-       _ASM_EXTABLE(41b,.L_fixup_1b_copy)
+       _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
+       _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
+       _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
+       _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
+       _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
+       _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
+       _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
 ENDPROC(__copy_user_nocache)
 EXPORT_SYMBOL(__copy_user_nocache)
index 45a53df..a4a379e 100644 (file)
 
        .macro source
 10:
-       _ASM_EXTABLE(10b, .Lbad_source)
+       _ASM_EXTABLE_UA(10b, .Lbad_source)
        .endm
 
        .macro dest
 20:
-       _ASM_EXTABLE(20b, .Lbad_dest)
+       _ASM_EXTABLE_UA(20b, .Lbad_dest)
        .endm
 
+       /*
+        * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
+        * potentially unmapped kernel address.
+        */
        .macro ignore L=.Lignore
 30:
        _ASM_EXTABLE(30b, \L)
index 49b167f..74fdff9 100644 (file)
@@ -132,12 +132,12 @@ bad_get_user_8:
 END(bad_get_user_8)
 #endif
 
-       _ASM_EXTABLE(1b,bad_get_user)
-       _ASM_EXTABLE(2b,bad_get_user)
-       _ASM_EXTABLE(3b,bad_get_user)
+       _ASM_EXTABLE_UA(1b, bad_get_user)
+       _ASM_EXTABLE_UA(2b, bad_get_user)
+       _ASM_EXTABLE_UA(3b, bad_get_user)
 #ifdef CONFIG_X86_64
-       _ASM_EXTABLE(4b,bad_get_user)
+       _ASM_EXTABLE_UA(4b, bad_get_user)
 #else
-       _ASM_EXTABLE(4b,bad_get_user_8)
-       _ASM_EXTABLE(5b,bad_get_user_8)
+       _ASM_EXTABLE_UA(4b, bad_get_user_8)
+       _ASM_EXTABLE_UA(5b, bad_get_user_8)
 #endif
index 96dce5f..d2e5c9c 100644 (file)
@@ -94,10 +94,10 @@ bad_put_user:
        EXIT
 END(bad_put_user)
 
-       _ASM_EXTABLE(1b,bad_put_user)
-       _ASM_EXTABLE(2b,bad_put_user)
-       _ASM_EXTABLE(3b,bad_put_user)
-       _ASM_EXTABLE(4b,bad_put_user)
+       _ASM_EXTABLE_UA(1b, bad_put_user)
+       _ASM_EXTABLE_UA(2b, bad_put_user)
+       _ASM_EXTABLE_UA(3b, bad_put_user)
+       _ASM_EXTABLE_UA(4b, bad_put_user)
 #ifdef CONFIG_X86_32
-       _ASM_EXTABLE(5b,bad_put_user)
+       _ASM_EXTABLE_UA(5b, bad_put_user)
 #endif
index 7add8ba..71fb58d 100644 (file)
@@ -47,8 +47,8 @@ do {                                                                  \
                "3:     lea 0(%2,%0,4),%0\n"                            \
                "       jmp 2b\n"                                       \
                ".previous\n"                                           \
-               _ASM_EXTABLE(0b,3b)                                     \
-               _ASM_EXTABLE(1b,2b)                                     \
+               _ASM_EXTABLE_UA(0b, 3b)                                 \
+               _ASM_EXTABLE_UA(1b, 2b)                                 \
                : "=&c"(size), "=&D" (__d0)                             \
                : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));     \
 } while (0)
@@ -153,44 +153,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
                       "101:   lea 0(%%eax,%0,4),%0\n"
                       "       jmp 100b\n"
                       ".previous\n"
-                      _ASM_EXTABLE(1b,100b)
-                      _ASM_EXTABLE(2b,100b)
-                      _ASM_EXTABLE(3b,100b)
-                      _ASM_EXTABLE(4b,100b)
-                      _ASM_EXTABLE(5b,100b)
-                      _ASM_EXTABLE(6b,100b)
-                      _ASM_EXTABLE(7b,100b)
-                      _ASM_EXTABLE(8b,100b)
-                      _ASM_EXTABLE(9b,100b)
-                      _ASM_EXTABLE(10b,100b)
-                      _ASM_EXTABLE(11b,100b)
-                      _ASM_EXTABLE(12b,100b)
-                      _ASM_EXTABLE(13b,100b)
-                      _ASM_EXTABLE(14b,100b)
-                      _ASM_EXTABLE(15b,100b)
-                      _ASM_EXTABLE(16b,100b)
-                      _ASM_EXTABLE(17b,100b)
-                      _ASM_EXTABLE(18b,100b)
-                      _ASM_EXTABLE(19b,100b)
-                      _ASM_EXTABLE(20b,100b)
-                      _ASM_EXTABLE(21b,100b)
-                      _ASM_EXTABLE(22b,100b)
-                      _ASM_EXTABLE(23b,100b)
-                      _ASM_EXTABLE(24b,100b)
-                      _ASM_EXTABLE(25b,100b)
-                      _ASM_EXTABLE(26b,100b)
-                      _ASM_EXTABLE(27b,100b)
-                      _ASM_EXTABLE(28b,100b)
-                      _ASM_EXTABLE(29b,100b)
-                      _ASM_EXTABLE(30b,100b)
-                      _ASM_EXTABLE(31b,100b)
-                      _ASM_EXTABLE(32b,100b)
-                      _ASM_EXTABLE(33b,100b)
-                      _ASM_EXTABLE(34b,100b)
-                      _ASM_EXTABLE(35b,100b)
-                      _ASM_EXTABLE(36b,100b)
-                      _ASM_EXTABLE(37b,100b)
-                      _ASM_EXTABLE(99b,101b)
+                      _ASM_EXTABLE_UA(1b, 100b)
+                      _ASM_EXTABLE_UA(2b, 100b)
+                      _ASM_EXTABLE_UA(3b, 100b)
+                      _ASM_EXTABLE_UA(4b, 100b)
+                      _ASM_EXTABLE_UA(5b, 100b)
+                      _ASM_EXTABLE_UA(6b, 100b)
+                      _ASM_EXTABLE_UA(7b, 100b)
+                      _ASM_EXTABLE_UA(8b, 100b)
+                      _ASM_EXTABLE_UA(9b, 100b)
+                      _ASM_EXTABLE_UA(10b, 100b)
+                      _ASM_EXTABLE_UA(11b, 100b)
+                      _ASM_EXTABLE_UA(12b, 100b)
+                      _ASM_EXTABLE_UA(13b, 100b)
+                      _ASM_EXTABLE_UA(14b, 100b)
+                      _ASM_EXTABLE_UA(15b, 100b)
+                      _ASM_EXTABLE_UA(16b, 100b)
+                      _ASM_EXTABLE_UA(17b, 100b)
+                      _ASM_EXTABLE_UA(18b, 100b)
+                      _ASM_EXTABLE_UA(19b, 100b)
+                      _ASM_EXTABLE_UA(20b, 100b)
+                      _ASM_EXTABLE_UA(21b, 100b)
+                      _ASM_EXTABLE_UA(22b, 100b)
+                      _ASM_EXTABLE_UA(23b, 100b)
+                      _ASM_EXTABLE_UA(24b, 100b)
+                      _ASM_EXTABLE_UA(25b, 100b)
+                      _ASM_EXTABLE_UA(26b, 100b)
+                      _ASM_EXTABLE_UA(27b, 100b)
+                      _ASM_EXTABLE_UA(28b, 100b)
+                      _ASM_EXTABLE_UA(29b, 100b)
+                      _ASM_EXTABLE_UA(30b, 100b)
+                      _ASM_EXTABLE_UA(31b, 100b)
+                      _ASM_EXTABLE_UA(32b, 100b)
+                      _ASM_EXTABLE_UA(33b, 100b)
+                      _ASM_EXTABLE_UA(34b, 100b)
+                      _ASM_EXTABLE_UA(35b, 100b)
+                      _ASM_EXTABLE_UA(36b, 100b)
+                      _ASM_EXTABLE_UA(37b, 100b)
+                      _ASM_EXTABLE_UA(99b, 101b)
                       : "=&c"(size), "=&D" (d0), "=&S" (d1)
                       :  "1"(to), "2"(from), "0"(size)
                       : "eax", "edx", "memory");
@@ -259,26 +259,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
               "9:      lea 0(%%eax,%0,4),%0\n"
               "16:     jmp 8b\n"
               ".previous\n"
-              _ASM_EXTABLE(0b,16b)
-              _ASM_EXTABLE(1b,16b)
-              _ASM_EXTABLE(2b,16b)
-              _ASM_EXTABLE(21b,16b)
-              _ASM_EXTABLE(3b,16b)
-              _ASM_EXTABLE(31b,16b)
-              _ASM_EXTABLE(4b,16b)
-              _ASM_EXTABLE(41b,16b)
-              _ASM_EXTABLE(10b,16b)
-              _ASM_EXTABLE(51b,16b)
-              _ASM_EXTABLE(11b,16b)
-              _ASM_EXTABLE(61b,16b)
-              _ASM_EXTABLE(12b,16b)
-              _ASM_EXTABLE(71b,16b)
-              _ASM_EXTABLE(13b,16b)
-              _ASM_EXTABLE(81b,16b)
-              _ASM_EXTABLE(14b,16b)
-              _ASM_EXTABLE(91b,16b)
-              _ASM_EXTABLE(6b,9b)
-              _ASM_EXTABLE(7b,16b)
+              _ASM_EXTABLE_UA(0b, 16b)
+              _ASM_EXTABLE_UA(1b, 16b)
+              _ASM_EXTABLE_UA(2b, 16b)
+              _ASM_EXTABLE_UA(21b, 16b)
+              _ASM_EXTABLE_UA(3b, 16b)
+              _ASM_EXTABLE_UA(31b, 16b)
+              _ASM_EXTABLE_UA(4b, 16b)
+              _ASM_EXTABLE_UA(41b, 16b)
+              _ASM_EXTABLE_UA(10b, 16b)
+              _ASM_EXTABLE_UA(51b, 16b)
+              _ASM_EXTABLE_UA(11b, 16b)
+              _ASM_EXTABLE_UA(61b, 16b)
+              _ASM_EXTABLE_UA(12b, 16b)
+              _ASM_EXTABLE_UA(71b, 16b)
+              _ASM_EXTABLE_UA(13b, 16b)
+              _ASM_EXTABLE_UA(81b, 16b)
+              _ASM_EXTABLE_UA(14b, 16b)
+              _ASM_EXTABLE_UA(91b, 16b)
+              _ASM_EXTABLE_UA(6b, 9b)
+              _ASM_EXTABLE_UA(7b, 16b)
               : "=&c"(size), "=&D" (d0), "=&S" (d1)
               :  "1"(to), "2"(from), "0"(size)
               : "eax", "edx", "memory");
@@ -321,9 +321,9 @@ do {                                                                        \
                "3:     lea 0(%3,%0,4),%0\n"                            \
                "       jmp 2b\n"                                       \
                ".previous\n"                                           \
-               _ASM_EXTABLE(4b,5b)                                     \
-               _ASM_EXTABLE(0b,3b)                                     \
-               _ASM_EXTABLE(1b,2b)                                     \
+               _ASM_EXTABLE_UA(4b, 5b)                                 \
+               _ASM_EXTABLE_UA(0b, 3b)                                 \
+               _ASM_EXTABLE_UA(1b, 2b)                                 \
                : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2)   \
                : "3"(size), "0"(size), "1"(to), "2"(from)              \
                : "memory");                                            \
index 9c5606d..fefe644 100644 (file)
@@ -37,8 +37,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
                "3:     lea 0(%[size1],%[size8],8),%[size8]\n"
                "       jmp 2b\n"
                ".previous\n"
-               _ASM_EXTABLE(0b,3b)
-               _ASM_EXTABLE(1b,2b)
+               _ASM_EXTABLE_UA(0b, 3b)
+               _ASM_EXTABLE_UA(1b, 2b)
                : [size8] "=&c"(size), [dst] "=&D" (__d0)
                : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
        clac();
index 45f5d6c..6521134 100644 (file)
@@ -8,7 +8,8 @@
 #include <asm/kdebug.h>
 
 typedef bool (*ex_handler_t)(const struct exception_table_entry *,
-                           struct pt_regs *, int);
+                           struct pt_regs *, int, unsigned long,
+                           unsigned long);
 
 static inline unsigned long
 ex_fixup_addr(const struct exception_table_entry *x)
@@ -22,7 +23,9 @@ ex_fixup_handler(const struct exception_table_entry *x)
 }
 
 __visible bool ex_handler_default(const struct exception_table_entry *fixup,
-                                 struct pt_regs *regs, int trapnr)
+                                 struct pt_regs *regs, int trapnr,
+                                 unsigned long error_code,
+                                 unsigned long fault_addr)
 {
        regs->ip = ex_fixup_addr(fixup);
        return true;
@@ -30,7 +33,9 @@ __visible bool ex_handler_default(const struct exception_table_entry *fixup,
 EXPORT_SYMBOL(ex_handler_default);
 
 __visible bool ex_handler_fault(const struct exception_table_entry *fixup,
-                               struct pt_regs *regs, int trapnr)
+                               struct pt_regs *regs, int trapnr,
+                               unsigned long error_code,
+                               unsigned long fault_addr)
 {
        regs->ip = ex_fixup_addr(fixup);
        regs->ax = trapnr;
@@ -43,7 +48,9 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
  * result of a refcount inc/dec/add/sub.
  */
 __visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
-                                  struct pt_regs *regs, int trapnr)
+                                  struct pt_regs *regs, int trapnr,
+                                  unsigned long error_code,
+                                  unsigned long fault_addr)
 {
        /* First unconditionally saturate the refcount. */
        *(int *)regs->cx = INT_MIN / 2;
@@ -96,7 +103,9 @@ EXPORT_SYMBOL(ex_handler_refcount);
  * out all the FPU registers) if we can't restore from the task's FPU state.
  */
 __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
-                                   struct pt_regs *regs, int trapnr)
+                                   struct pt_regs *regs, int trapnr,
+                                   unsigned long error_code,
+                                   unsigned long fault_addr)
 {
        regs->ip = ex_fixup_addr(fixup);
 
@@ -108,9 +117,79 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL_GPL(ex_handler_fprestore);
 
+/* Helper to check whether a uaccess fault indicates a kernel bug. */
+static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
+                         unsigned long fault_addr)
+{
+       /* This is the normal case: #PF with a fault address in userspace. */
+       if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
+               return false;
+
+       /*
+        * This code can be reached for machine checks, but only if the #MC
+        * handler has already decided that it looks like a candidate for fixup.
+        * This e.g. happens when attempting to access userspace memory which
+        * the CPU can't access because of uncorrectable bad memory.
+        */
+       if (trapnr == X86_TRAP_MC)
+               return false;
+
+       /*
+        * There are two remaining exception types we might encounter here:
+        *  - #PF for faulting accesses to kernel addresses
+        *  - #GP for faulting accesses to noncanonical addresses
+        * Complain about anything else.
+        */
+       if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
+               WARN(1, "unexpected trap %d in uaccess\n", trapnr);
+               return false;
+       }
+
+       /*
+        * This is a faulting memory access in kernel space, on a kernel
+        * address, in a usercopy function. This can e.g. be caused by improper
+        * use of helpers like __put_user and by improper attempts to access
+        * userspace addresses in KERNEL_DS regions.
+        * The one (semi-)legitimate exception are probe_kernel_{read,write}(),
+        * which can be invoked from places like kgdb, /dev/mem (for reading)
+        * and privileged BPF code (for reading).
+        * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
+        * to tell us that faulting on kernel addresses, and even noncanonical
+        * addresses, in a userspace accessor does not necessarily imply a
+        * kernel bug, root might just be doing weird stuff.
+        */
+       if (current->kernel_uaccess_faults_ok)
+               return false;
+
+       /* This is bad. Refuse the fixup so that we go into die(). */
+       if (trapnr == X86_TRAP_PF) {
+               pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
+                        fault_addr);
+       } else {
+               pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
+       }
+       return true;
+}
+
+__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
+                                 struct pt_regs *regs, int trapnr,
+                                 unsigned long error_code,
+                                 unsigned long fault_addr)
+{
+       if (bogus_uaccess(regs, trapnr, fault_addr))
+               return false;
+       regs->ip = ex_fixup_addr(fixup);
+       return true;
+}
+EXPORT_SYMBOL(ex_handler_uaccess);
+
 __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
-                             struct pt_regs *regs, int trapnr)
+                             struct pt_regs *regs, int trapnr,
+                             unsigned long error_code,
+                             unsigned long fault_addr)
 {
+       if (bogus_uaccess(regs, trapnr, fault_addr))
+               return false;
        /* Special hack for uaccess_err */
        current->thread.uaccess_err = 1;
        regs->ip = ex_fixup_addr(fixup);
@@ -119,7 +198,9 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
 EXPORT_SYMBOL(ex_handler_ext);
 
 __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
-                                      struct pt_regs *regs, int trapnr)
+                                      struct pt_regs *regs, int trapnr,
+                                      unsigned long error_code,
+                                      unsigned long fault_addr)
 {
        if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
                         (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
@@ -134,7 +215,9 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup
 EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
 
 __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
-                                      struct pt_regs *regs, int trapnr)
+                                      struct pt_regs *regs, int trapnr,
+                                      unsigned long error_code,
+                                      unsigned long fault_addr)
 {
        if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
                         (unsigned int)regs->cx, (unsigned int)regs->dx,
@@ -148,12 +231,14 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
 EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
 
 __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
-                                  struct pt_regs *regs, int trapnr)
+                                  struct pt_regs *regs, int trapnr,
+                                  unsigned long error_code,
+                                  unsigned long fault_addr)
 {
        if (static_cpu_has(X86_BUG_NULL_SEG))
                asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
        asm volatile ("mov %0, %%fs" : : "rm" (0));
-       return ex_handler_default(fixup, regs, trapnr);
+       return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr);
 }
 EXPORT_SYMBOL(ex_handler_clear_fs);
 
@@ -170,7 +255,8 @@ __visible bool ex_has_fault_handler(unsigned long ip)
        return handler == ex_handler_fault;
 }
 
-int fixup_exception(struct pt_regs *regs, int trapnr)
+int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
+                   unsigned long fault_addr)
 {
        const struct exception_table_entry *e;
        ex_handler_t handler;
@@ -194,7 +280,7 @@ int fixup_exception(struct pt_regs *regs, int trapnr)
                return 0;
 
        handler = ex_fixup_handler(e);
-       return handler(e, regs, trapnr);
+       return handler(e, regs, trapnr, error_code, fault_addr);
 }
 
 extern unsigned int early_recursion_flag;
@@ -230,9 +316,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
         * result in a hard-to-debug panic.
         *
         * Keep in mind that not all vectors actually get here.  Early
-        * fage faults, for example, are special.
+        * page faults, for example, are special.
         */
-       if (fixup_exception(regs, trapnr))
+       if (fixup_exception(regs, trapnr, regs->orig_ax, 0))
                return;
 
        if (fixup_bug(regs, trapnr))
index a5b9ddb..0d45f6d 100644 (file)
@@ -46,17 +46,19 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
 
 static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
 {
-       int ret = 0;
-
-       /* kprobe_running() needs smp_processor_id() */
-       if (kprobes_built_in() && !user_mode(regs)) {
-               preempt_disable();
-               if (kprobe_running() && kprobe_fault_handler(regs, 14))
-                       ret = 1;
-               preempt_enable();
-       }
-
-       return ret;
+       if (!kprobes_built_in())
+               return 0;
+       if (user_mode(regs))
+               return 0;
+       /*
+        * To be potentially processing a kprobe fault and to be allowed to call
+        * kprobe_running(), we have to be non-preemptible.
+        */
+       if (preemptible())
+               return 0;
+       if (!kprobe_running())
+               return 0;
+       return kprobe_fault_handler(regs, X86_TRAP_PF);
 }
 
 /*
@@ -711,7 +713,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
        int sig;
 
        /* Are we prepared to handle this kernel fault? */
-       if (fixup_exception(regs, X86_TRAP_PF)) {
+       if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
                /*
                 * Any interrupt that takes a fault gets the fixup. This makes
                 * the below recursive fault logic only apply to a faults from
index 3a6c8eb..0b08067 100644 (file)
@@ -196,6 +196,7 @@ static const char *rel_type(unsigned type)
 #if ELF_BITS == 64
                REL_TYPE(R_X86_64_NONE),
                REL_TYPE(R_X86_64_64),
+               REL_TYPE(R_X86_64_PC64),
                REL_TYPE(R_X86_64_PC32),
                REL_TYPE(R_X86_64_GOT32),
                REL_TYPE(R_X86_64_PLT32),
@@ -782,6 +783,15 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
                        add_reloc(&relocs32neg, offset);
                break;
 
+       case R_X86_64_PC64:
+               /*
+                * Only used by jump labels
+                */
+               if (is_percpu_sym(sym, symname))
+                       die("Invalid R_X86_64_PC64 relocation against per-CPU symbol %s\n",
+                           symname);
+               break;
+
        case R_X86_64_32:
        case R_X86_64_32S:
        case R_X86_64_64:
index 5481972..413f351 100644 (file)
@@ -116,8 +116,7 @@ do {                                                                \
 #define R_X86_64_PC16          13      /* 16 bit sign extended pc relative */
 #define R_X86_64_8             14      /* Direct 8 bit sign extended  */
 #define R_X86_64_PC8           15      /* 8 bit sign extended pc relative */
-
-#define R_X86_64_NUM           16
+#define R_X86_64_PC64          24      /* Place relative 64-bit signed */
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
index 9190759..8dff506 100644 (file)
@@ -35,8 +35,8 @@ sed-y = -e ':a; s/\*(\([^)]*\)\.text\.unlikely/*(\1.literal.unlikely .{text}.unl
        -e 's/\.{text}/.text/g'
 
 quiet_cmd__cpp_lds_S = LDS     $@
-cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $<    \
-                 | sed $(sed-y) >$@
+cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ \
+                -DLINKER_SCRIPT $< | sed $(sed-y) >$@
 
 $(obj)/vmlinux.lds: $(src)/vmlinux.lds.S FORCE
        $(call if_changed_dep,_cpp_lds_S)
index 2154d1b..5a75559 100644 (file)
@@ -183,6 +183,7 @@ static const struct crashtype crashtypes[] = {
        CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
        CRASHTYPE(USERCOPY_STACK_BEYOND),
        CRASHTYPE(USERCOPY_KERNEL),
+       CRASHTYPE(USERCOPY_KERNEL_DS),
 };
 
 
index 9e513dc..07db641 100644 (file)
@@ -82,5 +82,6 @@ void lkdtm_USERCOPY_STACK_FRAME_TO(void);
 void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
 void lkdtm_USERCOPY_STACK_BEYOND(void);
 void lkdtm_USERCOPY_KERNEL(void);
+void lkdtm_USERCOPY_KERNEL_DS(void);
 
 #endif
index 9725aed..389475b 100644 (file)
@@ -322,6 +322,19 @@ free_user:
        vm_munmap(user_addr, PAGE_SIZE);
 }
 
+void lkdtm_USERCOPY_KERNEL_DS(void)
+{
+       char __user *user_ptr = (char __user *)ERR_PTR(-EINVAL);
+       mm_segment_t old_fs = get_fs();
+       char buf[10] = {0};
+
+       pr_info("attempting copy_to_user on unmapped kernel address\n");
+       set_fs(KERNEL_DS);
+       if (copy_to_user(user_ptr, buf, sizeof(buf)))
+               pr_info("copy_to_user un unmapped kernel address failed\n");
+       set_fs(old_fs);
+}
+
 void __init lkdtm_usercopy_init(void)
 {
        /* Prepare cache that lacks SLAB_USERCOPY flag. */
index 9918655..d86830c 100644 (file)
@@ -2642,6 +2642,7 @@ static long exact_copy_from_user(void *to, const void __user * from,
        if (!access_ok(VERIFY_READ, from, n))
                return n;
 
+       current->kernel_uaccess_faults_ok++;
        while (n) {
                if (__get_user(c, f)) {
                        memset(t, 0, n);
@@ -2651,6 +2652,7 @@ static long exact_copy_from_user(void *to, const void __user * from,
                f++;
                n--;
        }
+       current->kernel_uaccess_faults_ok--;
        return n;
 }
 
index 20561a6..cdafa5e 100644 (file)
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
 
-#ifdef CONFIG_BUG
-
-#ifdef CONFIG_GENERIC_BUG
 struct bug_entry {
+#ifdef CONFIG_GENERIC_BUG
 #ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
        unsigned long   bug_addr;
 #else
@@ -35,8 +33,10 @@ struct bug_entry {
        unsigned short  line;
 #endif
        unsigned short  flags;
-};
 #endif /* CONFIG_GENERIC_BUG */
+};
+
+#ifdef CONFIG_BUG
 
 /*
  * Don't use BUG() or BUG_ON() unless there's really no way out; one
index 0f7062b..36254d2 100644 (file)
@@ -71,8 +71,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
        if (unlikely(cnts))
                return 0;
 
-       return likely(atomic_cmpxchg_acquire(&lock->cnts,
-                                            cnts, cnts | _QW_LOCKED) == cnts);
+       return likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts,
+                               _QW_LOCKED));
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -96,8 +96,9 @@ static inline void queued_read_lock(struct qrwlock *lock)
  */
 static inline void queued_write_lock(struct qrwlock *lock)
 {
+       u32 cnts = 0;
        /* Optimize for the unfair lock case where the fair flag is 0. */
-       if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+       if (likely(atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)))
                return;
 
        queued_write_lock_slowpath(lock);
index 9cc4575..7541fa7 100644 (file)
@@ -66,10 +66,12 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
  */
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-       if (!atomic_read(&lock->val) &&
-          (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
-               return 1;
-       return 0;
+       u32 val = atomic_read(&lock->val);
+
+       if (unlikely(val))
+               return 0;
+
+       return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
 }
 
 extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
@@ -80,11 +82,11 @@ extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
  */
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-       u32 val;
+       u32 val = 0;
 
-       val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
-       if (likely(val == 0))
+       if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
                return;
+
        queued_spin_lock_slowpath(lock, val);
 }
 
index d7701d4..d0bcea7 100644 (file)
        STRUCT_ALIGN();                                                 \
        *(__tracepoints)                                                \
        /* implement dynamic printk debug */                            \
-       . = ALIGN(8);                                                   \
-       __start___jump_table = .;                                       \
-       KEEP(*(__jump_table))                                           \
-       __stop___jump_table = .;                                        \
        . = ALIGN(8);                                                   \
        __start___verbose = .;                                          \
        KEEP(*(__verbose))                                              \
        . = __start_init_task + THREAD_SIZE;                            \
        __end_init_task = .;
 
+#define JUMP_TABLE_DATA                                                        \
+       . = ALIGN(8);                                                   \
+       __start___jump_table = .;                                       \
+       KEEP(*(__jump_table))                                           \
+       __stop___jump_table = .;
+
 /*
  * Allow architectures to handle ro_after_init data on their
  * own by defining an empty RO_AFTER_INIT_DATA.
 #define RO_AFTER_INIT_DATA                                             \
        __start_ro_after_init = .;                                      \
        *(.data..ro_after_init)                                         \
+       JUMP_TABLE_DATA                                                 \
        __end_ro_after_init = .;
 #endif
 
index 681d866..1921545 100644 (file)
@@ -99,22 +99,13 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  * unique, to convince GCC not to merge duplicate inline asm statements.
  */
 #define annotate_reachable() ({                                                \
-       asm volatile("%c0:\n\t"                                         \
-                    ".pushsection .discard.reachable\n\t"              \
-                    ".long %c0b - .\n\t"                               \
-                    ".popsection\n\t" : : "i" (__COUNTER__));          \
+       asm volatile("ANNOTATE_REACHABLE counter=%c0"                   \
+                    : : "i" (__COUNTER__));                            \
 })
 #define annotate_unreachable() ({                                      \
-       asm volatile("%c0:\n\t"                                         \
-                    ".pushsection .discard.unreachable\n\t"            \
-                    ".long %c0b - .\n\t"                               \
-                    ".popsection\n\t" : : "i" (__COUNTER__));          \
+       asm volatile("ANNOTATE_UNREACHABLE counter=%c0"                 \
+                    : : "i" (__COUNTER__));                            \
 })
-#define ASM_UNREACHABLE                                                        \
-       "999:\n\t"                                                      \
-       ".pushsection .discard.unreachable\n\t"                         \
-       ".long 999b - .\n\t"                                            \
-       ".popsection\n\t"
 #else
 #define annotate_reachable()
 #define annotate_unreachable()
@@ -299,6 +290,45 @@ static inline void *offset_to_ptr(const int *off)
        return (void *)((unsigned long)off + *off);
 }
 
+#else /* __ASSEMBLY__ */
+
+#ifdef __KERNEL__
+#ifndef LINKER_SCRIPT
+
+#ifdef CONFIG_STACK_VALIDATION
+.macro ANNOTATE_UNREACHABLE counter:req
+\counter:
+       .pushsection .discard.unreachable
+       .long \counter\()b -.
+       .popsection
+.endm
+
+.macro ANNOTATE_REACHABLE counter:req
+\counter:
+       .pushsection .discard.reachable
+       .long \counter\()b -.
+       .popsection
+.endm
+
+.macro ASM_UNREACHABLE
+999:
+       .pushsection .discard.unreachable
+       .long 999b - .
+       .popsection
+.endm
+#else /* CONFIG_STACK_VALIDATION */
+.macro ANNOTATE_UNREACHABLE counter:req
+.endm
+
+.macro ANNOTATE_REACHABLE counter:req
+.endm
+
+.macro ASM_UNREACHABLE
+.endm
+#endif /* CONFIG_STACK_VALIDATION */
+
+#endif /* LINKER_SCRIPT */
+#endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
 #ifndef __optimize
index 120225e..257ab3c 100644 (file)
@@ -8,8 +8,8 @@
 
 struct task_struct;
 
-extern int debug_locks;
-extern int debug_locks_silent;
+extern int debug_locks __read_mostly;
+extern int debug_locks_silent __read_mostly;
 
 
 static inline int __debug_locks_off(void)
index 1a0b6f1..5df6a62 100644 (file)
@@ -119,6 +119,68 @@ struct static_key {
 
 #ifdef HAVE_JUMP_LABEL
 #include <asm/jump_label.h>
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE
+
+struct jump_entry {
+       s32 code;
+       s32 target;
+       long key;       // key may be far away from the core kernel under KASLR
+};
+
+static inline unsigned long jump_entry_code(const struct jump_entry *entry)
+{
+       return (unsigned long)&entry->code + entry->code;
+}
+
+static inline unsigned long jump_entry_target(const struct jump_entry *entry)
+{
+       return (unsigned long)&entry->target + entry->target;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+       long offset = entry->key & ~3L;
+
+       return (struct static_key *)((unsigned long)&entry->key + offset);
+}
+
+#else
+
+static inline unsigned long jump_entry_code(const struct jump_entry *entry)
+{
+       return entry->code;
+}
+
+static inline unsigned long jump_entry_target(const struct jump_entry *entry)
+{
+       return entry->target;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+       return (struct static_key *)((unsigned long)entry->key & ~3UL);
+}
+
+#endif
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+       return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_init(const struct jump_entry *entry)
+{
+       return (unsigned long)entry->key & 2UL;
+}
+
+static inline void jump_entry_set_init(struct jump_entry *entry)
+{
+       entry->key |= 2;
+}
+
+#endif
 #endif
 
 #ifndef __ASSEMBLY__
@@ -151,7 +213,6 @@ extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
 extern void jump_label_init(void);
-extern void jump_label_invalidate_initmem(void);
 extern void jump_label_lock(void);
 extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
@@ -199,8 +260,6 @@ static __always_inline void jump_label_init(void)
        static_key_initialized = true;
 }
 
-static inline void jump_label_invalidate_initmem(void) {}
-
 static __always_inline bool static_key_false(struct static_key *key)
 {
        if (unlikely(static_key_count(key) > 0))
index b0d0b51..1fd82ff 100644 (file)
@@ -99,13 +99,8 @@ struct lock_class {
         */
        unsigned int                    version;
 
-       /*
-        * Statistics counter:
-        */
-       unsigned long                   ops;
-
-       const char                      *name;
        int                             name_version;
+       const char                      *name;
 
 #ifdef CONFIG_LOCK_STAT
        unsigned long                   contention_point[LOCKSTAT_POINTS];
index ab93b6e..67dbb57 100644 (file)
@@ -45,10 +45,10 @@ struct rw_semaphore {
 };
 
 /*
- * Setting bit 0 of the owner field with other non-zero bits will indicate
+ * Setting bit 1 of the owner field but not bit 0 will indicate
  * that the rwsem is writer-owned with an unknown owner.
  */
-#define RWSEM_OWNER_UNKNOWN    ((struct task_struct *)-1L)
+#define RWSEM_OWNER_UNKNOWN    ((struct task_struct *)-2L)
 
 extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
index 004ca21..58e2af8 100644 (file)
@@ -735,6 +735,12 @@ struct task_struct {
        unsigned                        use_memdelay:1;
 #endif
 
+       /*
+        * May usercopy functions fault on kernel addresses?
+        * This is not just a single bit because this can potentially nest.
+        */
+       unsigned int                    kernel_uaccess_faults_ok;
+
        unsigned long                   atomic_flags; /* Flags requiring atomic access. */
 
        struct restart_block            restart_block;
index 78b714a..1c3f902 100644 (file)
@@ -1069,7 +1069,6 @@ static int __ref kernel_init(void *unused)
        /* need to finish all async __init code before freeing the memory */
        async_synchronize_full();
        ftrace_free_init_mem();
-       jump_label_invalidate_initmem();
        free_initmem();
        mark_readonly();
 
index 0097ace..be4859f 100644 (file)
@@ -315,6 +315,16 @@ void lockdep_assert_cpus_held(void)
        percpu_rwsem_assert_held(&cpu_hotplug_lock);
 }
 
+static void lockdep_acquire_cpus_lock(void)
+{
+       rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
+}
+
+static void lockdep_release_cpus_lock(void)
+{
+       rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
+}
+
 /*
  * Wait for currently running CPU hotplug operations to complete (if any) and
  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
@@ -344,6 +354,17 @@ void cpu_hotplug_enable(void)
        cpu_maps_update_done();
 }
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
+
+#else
+
+static void lockdep_acquire_cpus_lock(void)
+{
+}
+
+static void lockdep_release_cpus_lock(void)
+{
+}
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_HOTPLUG_SMT
@@ -616,6 +637,12 @@ static void cpuhp_thread_fun(unsigned int cpu)
         */
        smp_mb();
 
+       /*
+        * The BP holds the hotplug lock, but we're now running on the AP,
+        * ensure that anybody asserting the lock is held, will actually find
+        * it so.
+        */
+       lockdep_acquire_cpus_lock();
        cpuhp_lock_acquire(bringup);
 
        if (st->single) {
@@ -661,6 +688,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
        }
 
        cpuhp_lock_release(bringup);
+       lockdep_release_cpus_lock();
 
        if (!st->should_run)
                complete_ap_thread(st, bringup);
index 11fc3bb..3e2de8f 100644 (file)
@@ -1365,9 +1365,9 @@ static void __unqueue_futex(struct futex_q *q)
 {
        struct futex_hash_bucket *hb;
 
-       if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
-           || WARN_ON(plist_node_empty(&q->list)))
+       if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
                return;
+       lockdep_assert_held(q->lock_ptr);
 
        hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
        plist_del(&q->list, &hb->chain);
index 2e62503..b28028b 100644 (file)
@@ -38,23 +38,43 @@ static int jump_label_cmp(const void *a, const void *b)
        const struct jump_entry *jea = a;
        const struct jump_entry *jeb = b;
 
-       if (jea->key < jeb->key)
+       if (jump_entry_key(jea) < jump_entry_key(jeb))
                return -1;
 
-       if (jea->key > jeb->key)
+       if (jump_entry_key(jea) > jump_entry_key(jeb))
                return 1;
 
        return 0;
 }
 
+static void jump_label_swap(void *a, void *b, int size)
+{
+       long delta = (unsigned long)a - (unsigned long)b;
+       struct jump_entry *jea = a;
+       struct jump_entry *jeb = b;
+       struct jump_entry tmp = *jea;
+
+       jea->code       = jeb->code - delta;
+       jea->target     = jeb->target - delta;
+       jea->key        = jeb->key - delta;
+
+       jeb->code       = tmp.code + delta;
+       jeb->target     = tmp.target + delta;
+       jeb->key        = tmp.key + delta;
+}
+
 static void
 jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 {
        unsigned long size;
+       void *swapfn = NULL;
+
+       if (IS_ENABLED(CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE))
+               swapfn = jump_label_swap;
 
        size = (((unsigned long)stop - (unsigned long)start)
                                        / sizeof(struct jump_entry));
-       sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
+       sort(start, size, sizeof(struct jump_entry), jump_label_cmp, swapfn);
 }
 
 static void jump_label_update(struct static_key *key);
@@ -85,6 +105,7 @@ void static_key_slow_inc_cpuslocked(struct static_key *key)
        int v, v1;
 
        STATIC_KEY_CHECK_USE(key);
+       lockdep_assert_cpus_held();
 
        /*
         * Careful if we get concurrent static_key_slow_inc() calls;
@@ -130,6 +151,7 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc);
 void static_key_enable_cpuslocked(struct static_key *key)
 {
        STATIC_KEY_CHECK_USE(key);
+       lockdep_assert_cpus_held();
 
        if (atomic_read(&key->enabled) > 0) {
                WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
@@ -160,6 +182,7 @@ EXPORT_SYMBOL_GPL(static_key_enable);
 void static_key_disable_cpuslocked(struct static_key *key)
 {
        STATIC_KEY_CHECK_USE(key);
+       lockdep_assert_cpus_held();
 
        if (atomic_read(&key->enabled) != 1) {
                WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
@@ -185,6 +208,8 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key,
                                           unsigned long rate_limit,
                                           struct delayed_work *work)
 {
+       lockdep_assert_cpus_held();
+
        /*
         * The negative count check is valid even when a negative
         * key->enabled is in use by static_key_slow_inc(); a
@@ -261,8 +286,8 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit);
 
 static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 {
-       if (entry->code <= (unsigned long)end &&
-               entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
+       if (jump_entry_code(entry) <= (unsigned long)end &&
+           jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
                return 1;
 
        return 0;
@@ -321,16 +346,6 @@ static inline void static_key_set_linked(struct static_key *key)
        key->type |= JUMP_TYPE_LINKED;
 }
 
-static inline struct static_key *jump_entry_key(struct jump_entry *entry)
-{
-       return (struct static_key *)((unsigned long)entry->key & ~1UL);
-}
-
-static bool jump_entry_branch(struct jump_entry *entry)
-{
-       return (unsigned long)entry->key & 1UL;
-}
-
 /***
  * A 'struct static_key' uses a union such that it either points directly
  * to a table of 'struct jump_entry' or to a linked list of modules which in
@@ -355,7 +370,7 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry)
 {
        struct static_key *key = jump_entry_key(entry);
        bool enabled = static_key_enabled(key);
-       bool branch = jump_entry_branch(entry);
+       bool branch = jump_entry_is_branch(entry);
 
        /* See the comment in linux/jump_label.h */
        return enabled ^ branch;
@@ -363,19 +378,20 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry)
 
 static void __jump_label_update(struct static_key *key,
                                struct jump_entry *entry,
-                               struct jump_entry *stop)
+                               struct jump_entry *stop,
+                               bool init)
 {
        for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
                /*
                 * An entry->code of 0 indicates an entry which has been
                 * disabled because it was in an init text area.
                 */
-               if (entry->code) {
-                       if (kernel_text_address(entry->code))
+               if (init || !jump_entry_is_init(entry)) {
+                       if (kernel_text_address(jump_entry_code(entry)))
                                arch_jump_label_transform(entry, jump_label_type(entry));
                        else
                                WARN_ONCE(1, "can't patch jump_label at %pS",
-                                         (void *)(unsigned long)entry->code);
+                                         (void *)jump_entry_code(entry));
                }
        }
 }
@@ -410,6 +426,9 @@ void __init jump_label_init(void)
                if (jump_label_type(iter) == JUMP_LABEL_NOP)
                        arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
 
+               if (init_section_contains((void *)jump_entry_code(iter), 1))
+                       jump_entry_set_init(iter);
+
                iterk = jump_entry_key(iter);
                if (iterk == key)
                        continue;
@@ -422,26 +441,13 @@ void __init jump_label_init(void)
        cpus_read_unlock();
 }
 
-/* Disable any jump label entries in __init/__exit code */
-void __init jump_label_invalidate_initmem(void)
-{
-       struct jump_entry *iter_start = __start___jump_table;
-       struct jump_entry *iter_stop = __stop___jump_table;
-       struct jump_entry *iter;
-
-       for (iter = iter_start; iter < iter_stop; iter++) {
-               if (init_section_contains((void *)(unsigned long)iter->code, 1))
-                       iter->code = 0;
-       }
-}
-
 #ifdef CONFIG_MODULES
 
 static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
 {
        struct static_key *key = jump_entry_key(entry);
        bool type = static_key_type(key);
-       bool branch = jump_entry_branch(entry);
+       bool branch = jump_entry_is_branch(entry);
 
        /* See the comment in linux/jump_label.h */
        return type ^ branch;
@@ -455,7 +461,7 @@ struct static_key_mod {
 
 static inline struct static_key_mod *static_key_mod(struct static_key *key)
 {
-       WARN_ON_ONCE(!(key->type & JUMP_TYPE_LINKED));
+       WARN_ON_ONCE(!static_key_linked(key));
        return (struct static_key_mod *)(key->type & ~JUMP_TYPE_MASK);
 }
 
@@ -514,7 +520,8 @@ static void __jump_label_mod_update(struct static_key *key)
                        stop = __stop___jump_table;
                else
                        stop = m->jump_entries + m->num_jump_entries;
-               __jump_label_update(key, mod->entries, stop);
+               __jump_label_update(key, mod->entries, stop,
+                                   m && m->state == MODULE_STATE_COMING);
        }
 }
 
@@ -560,12 +567,15 @@ static int jump_label_add_module(struct module *mod)
        for (iter = iter_start; iter < iter_stop; iter++) {
                struct static_key *iterk;
 
+               if (within_module_init(jump_entry_code(iter), mod))
+                       jump_entry_set_init(iter);
+
                iterk = jump_entry_key(iter);
                if (iterk == key)
                        continue;
 
                key = iterk;
-               if (within_module(iter->key, mod)) {
+               if (within_module((unsigned long)key, mod)) {
                        static_key_set_entries(key, iter);
                        continue;
                }
@@ -595,7 +605,7 @@ static int jump_label_add_module(struct module *mod)
 
                /* Only update if we've changed from our initial state */
                if (jump_label_type(iter) != jump_label_init_type(iter))
-                       __jump_label_update(key, iter, iter_stop);
+                       __jump_label_update(key, iter, iter_stop, true);
        }
 
        return 0;
@@ -615,7 +625,7 @@ static void jump_label_del_module(struct module *mod)
 
                key = jump_entry_key(iter);
 
-               if (within_module(iter->key, mod))
+               if (within_module((unsigned long)key, mod))
                        continue;
 
                /* No memory during module load */
@@ -651,19 +661,6 @@ static void jump_label_del_module(struct module *mod)
        }
 }
 
-/* Disable any jump label entries in module init code */
-static void jump_label_invalidate_module_init(struct module *mod)
-{
-       struct jump_entry *iter_start = mod->jump_entries;
-       struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
-       struct jump_entry *iter;
-
-       for (iter = iter_start; iter < iter_stop; iter++) {
-               if (within_module_init(iter->code, mod))
-                       iter->code = 0;
-       }
-}
-
 static int
 jump_label_module_notify(struct notifier_block *self, unsigned long val,
                         void *data)
@@ -685,9 +682,6 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
        case MODULE_STATE_GOING:
                jump_label_del_module(mod);
                break;
-       case MODULE_STATE_LIVE:
-               jump_label_invalidate_module_init(mod);
-               break;
        }
 
        jump_label_unlock();
@@ -757,7 +751,8 @@ static void jump_label_update(struct static_key *key)
        entry = static_key_entries(key);
        /* if there are no users, entry can be NULL */
        if (entry)
-               __jump_label_update(key, entry, stop);
+               __jump_label_update(key, entry, stop,
+                                   system_state < SYSTEM_RUNNING);
 }
 
 #ifdef CONFIG_STATIC_KEYS_SELFTEST
index dd13f86..1efada2 100644 (file)
@@ -138,7 +138,7 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
  * get freed - this significantly simplifies the debugging code.
  */
 unsigned long nr_lock_classes;
-static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
 
 static inline struct lock_class *hlock_class(struct held_lock *hlock)
 {
@@ -1391,7 +1391,9 @@ static void print_lock_class_header(struct lock_class *class, int depth)
 
        printk("%*s->", depth, "");
        print_lock_name(class);
-       printk(KERN_CONT " ops: %lu", class->ops);
+#ifdef CONFIG_DEBUG_LOCKDEP
+       printk(KERN_CONT " ops: %lu", debug_class_ops_read(class));
+#endif
        printk(KERN_CONT " {\n");
 
        for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
@@ -2147,76 +2149,6 @@ static int check_no_collision(struct task_struct *curr,
        return 1;
 }
 
-/*
- * This is for building a chain between just two different classes,
- * instead of adding a new hlock upon current, which is done by
- * add_chain_cache().
- *
- * This can be called in any context with two classes, while
- * add_chain_cache() must be done within the lock owener's context
- * since it uses hlock which might be racy in another context.
- */
-static inline int add_chain_cache_classes(unsigned int prev,
-                                         unsigned int next,
-                                         unsigned int irq_context,
-                                         u64 chain_key)
-{
-       struct hlist_head *hash_head = chainhashentry(chain_key);
-       struct lock_chain *chain;
-
-       /*
-        * Allocate a new chain entry from the static array, and add
-        * it to the hash:
-        */
-
-       /*
-        * We might need to take the graph lock, ensure we've got IRQs
-        * disabled to make this an IRQ-safe lock.. for recursion reasons
-        * lockdep won't complain about its own locking errors.
-        */
-       if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
-               return 0;
-
-       if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
-               if (!debug_locks_off_graph_unlock())
-                       return 0;
-
-               print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
-               dump_stack();
-               return 0;
-       }
-
-       chain = lock_chains + nr_lock_chains++;
-       chain->chain_key = chain_key;
-       chain->irq_context = irq_context;
-       chain->depth = 2;
-       if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
-               chain->base = nr_chain_hlocks;
-               nr_chain_hlocks += chain->depth;
-               chain_hlocks[chain->base] = prev - 1;
-               chain_hlocks[chain->base + 1] = next -1;
-       }
-#ifdef CONFIG_DEBUG_LOCKDEP
-       /*
-        * Important for check_no_collision().
-        */
-       else {
-               if (!debug_locks_off_graph_unlock())
-                       return 0;
-
-               print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
-               dump_stack();
-               return 0;
-       }
-#endif
-
-       hlist_add_head_rcu(&chain->entry, hash_head);
-       debug_atomic_inc(chain_lookup_misses);
-       inc_chains();
-
-       return 1;
-}
-
 /*
  * Adds a dependency chain into chain hashtable. And must be called with
  * graph_lock held.
@@ -3262,6 +3194,10 @@ static int __lock_is_held(const struct lockdep_map *lock, int read);
 /*
  * This gets called for every mutex_lock*()/spin_lock*() operation.
  * We maintain the dependency maps and validate the locking attempt:
+ *
+ * The callers must make sure that IRQs are disabled before calling it,
+ * otherwise we could get an interrupt which would want to take locks,
+ * which would end up in lockdep again.
  */
 static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
                          int trylock, int read, int check, int hardirqs_off,
@@ -3279,14 +3215,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        if (unlikely(!debug_locks))
                return 0;
 
-       /*
-        * Lockdep should run with IRQs disabled, otherwise we could
-        * get an interrupt which would want to take locks, which would
-        * end up in lockdep and have you got a head-ache already?
-        */
-       if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
-               return 0;
-
        if (!prove_locking || lock->key == &__lockdep_no_validate__)
                check = 0;
 
@@ -3300,7 +3228,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
                if (!class)
                        return 0;
        }
-       atomic_inc((atomic_t *)&class->ops);
+
+       debug_class_ops_inc(class);
+
        if (very_verbose(class)) {
                printk("\nacquire class [%px] %s", class->key, class->name);
                if (class->name_version > 1)
@@ -3543,6 +3473,9 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
 {
        struct held_lock *hlock;
 
+       if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+               return 0;
+
        for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) {
                if (!__lock_acquire(hlock->instance,
                                    hlock_class(hlock)->subclass,
@@ -3696,6 +3629,13 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
        curr->lockdep_depth = i;
        curr->curr_chain_key = hlock->prev_chain_key;
 
+       /*
+        * The most likely case is when the unlock is on the innermost
+        * lock. In this case, we are done!
+        */
+       if (i == depth-1)
+               return 1;
+
        if (reacquire_held_locks(curr, depth, i + 1))
                return 0;
 
@@ -3703,10 +3643,14 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
         * We had N bottles of beer on the wall, we drank one, but now
         * there's not N-1 bottles of beer left on the wall...
         */
-       if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
-               return 0;
+       DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth-1);
 
-       return 1;
+       /*
+        * Since reacquire_held_locks() would have called check_chain_key()
+        * indirectly via __lock_acquire(), we don't need to do it again
+        * on return.
+        */
+       return 0;
 }
 
 static int __lock_is_held(const struct lockdep_map *lock, int read)
@@ -4122,7 +4066,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
        unsigned long flags;
 
-       if (unlikely(!lock_stat))
+       if (unlikely(!lock_stat || !debug_locks))
                return;
 
        if (unlikely(current->lockdep_recursion))
@@ -4142,7 +4086,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
        unsigned long flags;
 
-       if (unlikely(!lock_stat))
+       if (unlikely(!lock_stat || !debug_locks))
                return;
 
        if (unlikely(current->lockdep_recursion))
index d459d62..88c847a 100644 (file)
@@ -152,9 +152,15 @@ struct lockdep_stats {
        int     nr_find_usage_forwards_recursions;
        int     nr_find_usage_backwards_checks;
        int     nr_find_usage_backwards_recursions;
+
+       /*
+        * Per lock class locking operation stat counts
+        */
+       unsigned long lock_class_ops[MAX_LOCKDEP_KEYS];
 };
 
 DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
+extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
 
 #define __debug_atomic_inc(ptr)                                        \
        this_cpu_inc(lockdep_stats.ptr);
@@ -179,9 +185,30 @@ DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
        }                                                               \
        __total;                                                        \
 })
+
+static inline void debug_class_ops_inc(struct lock_class *class)
+{
+       int idx;
+
+       idx = class - lock_classes;
+       __debug_atomic_inc(lock_class_ops[idx]);
+}
+
+static inline unsigned long debug_class_ops_read(struct lock_class *class)
+{
+       int idx, cpu;
+       unsigned long ops = 0;
+
+       idx = class - lock_classes;
+       for_each_possible_cpu(cpu)
+               ops += per_cpu(lockdep_stats.lock_class_ops[idx], cpu);
+       return ops;
+}
+
 #else
 # define __debug_atomic_inc(ptr)       do { } while (0)
 # define debug_atomic_inc(ptr)         do { } while (0)
 # define debug_atomic_dec(ptr)         do { } while (0)
 # define debug_atomic_read(ptr)                0
+# define debug_class_ops_inc(ptr)      do { } while (0)
 #endif
index 3dd980d..3d31f9b 100644 (file)
@@ -68,7 +68,7 @@ static int l_show(struct seq_file *m, void *v)
 
        seq_printf(m, "%p", class->key);
 #ifdef CONFIG_DEBUG_LOCKDEP
-       seq_printf(m, " OPS:%8ld", class->ops);
+       seq_printf(m, " OPS:%8ld", debug_class_ops_read(class));
 #endif
 #ifdef CONFIG_PROVE_LOCKING
        seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));
index bfaeb05..8a8c3c2 100644 (file)
  */
 
 #include "mcs_spinlock.h"
+#define MAX_NODES      4
 
+/*
+ * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
+ * size and four of them will fit nicely in one 64-byte cacheline. For
+ * pvqspinlock, however, we need more space for extra data. To accommodate
+ * that, we insert two more long words to pad it up to 32 bytes. IOW, only
+ * two of them can fit in a cacheline in this case. That is OK as it is rare
+ * to have more than 2 levels of slowpath nesting in actual use. We don't
+ * want to penalize pvqspinlocks to optimize for a rare case in native
+ * qspinlocks.
+ */
+struct qnode {
+       struct mcs_spinlock mcs;
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define MAX_NODES      8
-#else
-#define MAX_NODES      4
+       long reserved[2];
 #endif
+};
 
 /*
  * The pending bit spinning loop count.
  *
  * PV doubles the storage and uses the second cacheline for PV state.
  */
-static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
+static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
 
 /*
  * We must be able to distinguish between no-tail and the tail at 0:0,
@@ -126,7 +138,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
        int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
        int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
 
-       return per_cpu_ptr(&mcs_nodes[idx], cpu);
+       return per_cpu_ptr(&qnodes[idx].mcs, cpu);
+}
+
+static inline __pure
+struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
+{
+       return &((struct qnode *)base + idx)->mcs;
 }
 
 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
@@ -231,6 +249,20 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 }
 #endif /* _Q_PENDING_BITS == 8 */
 
+/**
+ * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
+ * @lock : Pointer to queued spinlock structure
+ * Return: The previous lock value
+ *
+ * *,*,* -> *,1,*
+ */
+#ifndef queued_fetch_set_pending_acquire
+static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
+{
+       return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
+}
+#endif
+
 /**
  * set_locked - Set the lock bit and own the lock
  * @lock: Pointer to queued spinlock structure
@@ -326,43 +358,48 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
        /*
         * trylock || pending
         *
-        * 0,0,0 -> 0,0,1 ; trylock
-        * 0,0,1 -> 0,1,1 ; pending
+        * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
         */
-       val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
-       if (!(val & ~_Q_LOCKED_MASK)) {
-               /*
-                * We're pending, wait for the owner to go away.
-                *
-                * *,1,1 -> *,1,0
-                *
-                * this wait loop must be a load-acquire such that we match the
-                * store-release that clears the locked bit and create lock
-                * sequentiality; this is because not all
-                * clear_pending_set_locked() implementations imply full
-                * barriers.
-                */
-               if (val & _Q_LOCKED_MASK) {
-                       atomic_cond_read_acquire(&lock->val,
-                                                !(VAL & _Q_LOCKED_MASK));
-               }
+       val = queued_fetch_set_pending_acquire(lock);
 
-               /*
-                * take ownership and clear the pending bit.
-                *
-                * *,1,0 -> *,0,1
-                */
-               clear_pending_set_locked(lock);
-               qstat_inc(qstat_lock_pending, true);
-               return;
+       /*
+        * If we observe contention, there is a concurrent locker.
+        *
+        * Undo and queue; our setting of PENDING might have made the
+        * n,0,0 -> 0,0,0 transition fail and it will now be waiting
+        * on @next to become !NULL.
+        */
+       if (unlikely(val & ~_Q_LOCKED_MASK)) {
+
+               /* Undo PENDING if we set it. */
+               if (!(val & _Q_PENDING_MASK))
+                       clear_pending(lock);
+
+               goto queue;
        }
 
        /*
-        * If pending was clear but there are waiters in the queue, then
-        * we need to undo our setting of pending before we queue ourselves.
+        * We're pending, wait for the owner to go away.
+        *
+        * 0,1,1 -> 0,1,0
+        *
+        * this wait loop must be a load-acquire such that we match the
+        * store-release that clears the locked bit and create lock
+        * sequentiality; this is because not all
+        * clear_pending_set_locked() implementations imply full
+        * barriers.
+        */
+       if (val & _Q_LOCKED_MASK)
+               atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK));
+
+       /*
+        * take ownership and clear the pending bit.
+        *
+        * 0,1,0 -> 0,0,1
         */
-       if (!(val & _Q_PENDING_MASK))
-               clear_pending(lock);
+       clear_pending_set_locked(lock);
+       qstat_inc(qstat_lock_pending, true);
+       return;
 
        /*
         * End of pending bit optimistic spinning and beginning of MCS
@@ -371,11 +408,16 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 queue:
        qstat_inc(qstat_lock_slowpath, true);
 pv_queue:
-       node = this_cpu_ptr(&mcs_nodes[0]);
+       node = this_cpu_ptr(&qnodes[0].mcs);
        idx = node->count++;
        tail = encode_tail(smp_processor_id(), idx);
 
-       node += idx;
+       node = grab_mcs_node(node, idx);
+
+       /*
+        * Keep counts of non-zero index values:
+        */
+       qstat_inc(qstat_lock_idx1 + idx - 1, idx);
 
        /*
         * Ensure that we increment the head node->count before initialising
@@ -476,16 +518,25 @@ locked:
         */
 
        /*
-        * In the PV case we might already have _Q_LOCKED_VAL set.
+        * In the PV case we might already have _Q_LOCKED_VAL set, because
+        * of lock stealing; therefore we must also allow:
+        *
+        * n,0,1 -> 0,0,1
         *
-        * The atomic_cond_read_acquire() call above has provided the
-        * necessary acquire semantics required for locking.
+        * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
+        *       above wait condition, therefore any concurrent setting of
+        *       PENDING will make the uncontended transition fail.
         */
-       if (((val & _Q_TAIL_MASK) == tail) &&
-           atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
-               goto release; /* No contention */
+       if ((val & _Q_TAIL_MASK) == tail) {
+               if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+                       goto release; /* No contention */
+       }
 
-       /* Either somebody is queued behind us or _Q_PENDING_VAL is set */
+       /*
+        * Either somebody is queued behind us or _Q_PENDING_VAL got set
+        * which will then detect the remaining tail and queue behind us
+        * ensuring we'll see a @next.
+        */
        set_locked(lock);
 
        /*
@@ -501,7 +552,7 @@ release:
        /*
         * release the node
         */
-       __this_cpu_dec(mcs_nodes[0].count);
+       __this_cpu_dec(qnodes[0].mcs.count);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
index 5a0cf5f..0130e48 100644 (file)
@@ -49,8 +49,6 @@ enum vcpu_state {
 
 struct pv_node {
        struct mcs_spinlock     mcs;
-       struct mcs_spinlock     __res[3];
-
        int                     cpu;
        u8                      state;
 };
@@ -281,7 +279,7 @@ static void pv_init_node(struct mcs_spinlock *node)
 {
        struct pv_node *pn = (struct pv_node *)node;
 
-       BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
+       BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode));
 
        pn->cpu = smp_processor_id();
        pn->state = vcpu_running;
index 6bd78c0..42d3d8d 100644 (file)
@@ -55,6 +55,9 @@ enum qlock_stats {
        qstat_pv_wait_node,
        qstat_lock_pending,
        qstat_lock_slowpath,
+       qstat_lock_idx1,
+       qstat_lock_idx2,
+       qstat_lock_idx3,
        qstat_num,      /* Total number of statistical counters */
        qstat_reset_cnts = qstat_num,
 };
@@ -82,6 +85,9 @@ static const char * const qstat_names[qstat_num + 1] = {
        [qstat_pv_wait_node]       = "pv_wait_node",
        [qstat_lock_pending]       = "lock_pending",
        [qstat_lock_slowpath]      = "lock_slowpath",
+       [qstat_lock_idx1]          = "lock_index1",
+       [qstat_lock_idx2]          = "lock_index2",
+       [qstat_lock_idx3]          = "lock_index3",
        [qstat_reset_cnts]         = "reset_counters",
 };
 
index 2823d41..581edcc 100644 (file)
@@ -1485,9 +1485,9 @@ void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
        __rt_mutex_lock(lock, subclass);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
-#endif
 
-#ifndef CONFIG_DEBUG_LOCK_ALLOC
+#else /* !CONFIG_DEBUG_LOCK_ALLOC */
+
 /**
  * rt_mutex_lock - lock a rt_mutex
  *
index 3064c50..09b1800 100644 (file)
@@ -180,7 +180,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
                 * but it gives the spinners an early indication that the
                 * readers now have the lock.
                 */
-               rwsem_set_reader_owned(sem);
+               __rwsem_set_reader_owned(sem, waiter->task);
        }
 
        /*
@@ -233,8 +233,19 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
        waiter.type = RWSEM_WAITING_FOR_READ;
 
        raw_spin_lock_irq(&sem->wait_lock);
-       if (list_empty(&sem->wait_list))
+       if (list_empty(&sem->wait_list)) {
+               /*
+                * In case the wait queue is empty and the lock isn't owned
+                * by a writer, this reader can exit the slowpath and return
+                * immediately as its RWSEM_ACTIVE_READ_BIAS has already
+                * been set in the count.
+                */
+               if (atomic_long_read(&sem->count) >= 0) {
+                       raw_spin_unlock_irq(&sem->wait_lock);
+                       return sem;
+               }
                adjustment += RWSEM_WAITING_BIAS;
+       }
        list_add_tail(&waiter.list, &sem->wait_list);
 
        /* we're now waiting on the lock, but no longer actively locking */
index 776308d..e586f0d 100644 (file)
@@ -117,8 +117,9 @@ EXPORT_SYMBOL(down_write_trylock);
 void up_read(struct rw_semaphore *sem)
 {
        rwsem_release(&sem->dep_map, 1, _RET_IP_);
-       DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);
+       DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
 
+       rwsem_clear_reader_owned(sem);
        __up_read(sem);
 }
 
@@ -181,7 +182,7 @@ void down_read_non_owner(struct rw_semaphore *sem)
        might_sleep();
 
        __down_read(sem);
-       rwsem_set_reader_owned(sem);
+       __rwsem_set_reader_owned(sem, NULL);
 }
 
 EXPORT_SYMBOL(down_read_non_owner);
@@ -215,7 +216,7 @@ EXPORT_SYMBOL(down_write_killable_nested);
 
 void up_read_non_owner(struct rw_semaphore *sem)
 {
-       DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);
+       DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
        __up_read(sem);
 }
 
index b9d0e72..bad2bca 100644 (file)
@@ -1,24 +1,30 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * The owner field of the rw_semaphore structure will be set to
- * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear
- * the owner field when it unlocks. A reader, on the other hand, will
- * not touch the owner field when it unlocks.
+ * The least significant 2 bits of the owner value has the following
+ * meanings when set.
+ *  - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers
+ *  - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned,
+ *    i.e. the owner(s) cannot be readily determined. It can be reader
+ *    owned or the owning writer is indeterminate.
  *
- * In essence, the owner field now has the following 4 states:
- *  1) 0
- *     - lock is free or the owner hasn't set the field yet
- *  2) RWSEM_READER_OWNED
- *     - lock is currently or previously owned by readers (lock is free
- *       or not set by owner yet)
- *  3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well
- *     - lock is owned by an anonymous writer, so spinning on the lock
- *       owner should be disabled.
- *  4) Other non-zero value
- *     - a writer owns the lock and other writers can spin on the lock owner.
+ * When a writer acquires a rwsem, it puts its task_struct pointer
+ * into the owner field. It is cleared after an unlock.
+ *
+ * When a reader acquires a rwsem, it will also puts its task_struct
+ * pointer into the owner field with both the RWSEM_READER_OWNED and
+ * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will
+ * largely be left untouched. So for a free or reader-owned rwsem,
+ * the owner value may contain information about the last reader that
+ * acquires the rwsem. The anonymous bit is set because that particular
+ * reader may or may not still own the lock.
+ *
+ * That information may be helpful in debugging cases where the system
+ * seems to hang on a reader owned rwsem especially if only one reader
+ * is involved. Ideally we would like to track all the readers that own
+ * a rwsem, but the overhead is simply too big.
  */
-#define RWSEM_ANONYMOUSLY_OWNED        (1UL << 0)
-#define RWSEM_READER_OWNED     ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED)
+#define RWSEM_READER_OWNED     (1UL << 0)
+#define RWSEM_ANONYMOUSLY_OWNED        (1UL << 1)
 
 #ifdef CONFIG_DEBUG_RWSEMS
 # define DEBUG_RWSEMS_WARN_ON(c)       DEBUG_LOCKS_WARN_ON(c)
@@ -44,15 +50,26 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem)
        WRITE_ONCE(sem->owner, NULL);
 }
 
+/*
+ * The task_struct pointer of the last owning reader will be left in
+ * the owner field.
+ *
+ * Note that the owner value just indicates the task has owned the rwsem
+ * previously, it may not be the real owner or one of the real owners
+ * anymore when that field is examined, so take it with a grain of salt.
+ */
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
+                                           struct task_struct *owner)
+{
+       unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED
+                                                | RWSEM_ANONYMOUSLY_OWNED;
+
+       WRITE_ONCE(sem->owner, (struct task_struct *)val);
+}
+
 static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
 {
-       /*
-        * We check the owner value first to make sure that we will only
-        * do a write to the rwsem cacheline when it is really necessary
-        * to minimize cacheline contention.
-        */
-       if (READ_ONCE(sem->owner) != RWSEM_READER_OWNED)
-               WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
+       __rwsem_set_reader_owned(sem, current);
 }
 
 /*
@@ -72,6 +89,25 @@ static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
 {
        return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
 }
+
+#ifdef CONFIG_DEBUG_RWSEMS
+/*
+ * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
+ * is a task pointer in owner of a reader-owned rwsem, it will be the
+ * real owner or one of the real owners. The only exception is when the
+ * unlock is done by up_read_non_owner().
+ */
+#define rwsem_clear_reader_owned rwsem_clear_reader_owned
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
+{
+       unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
+                                                  | RWSEM_ANONYMOUSLY_OWNED;
+       if (READ_ONCE(sem->owner) == (struct task_struct *)val)
+               cmpxchg_relaxed((unsigned long *)&sem->owner, val,
+                               RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED);
+}
+#endif
+
 #else
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
 {
@@ -81,7 +117,18 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem)
 {
 }
 
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
+                                          struct task_struct *owner)
+{
+}
+
 static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
 {
 }
 #endif
+
+#ifndef rwsem_clear_reader_owned
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
+{
+}
+#endif
index 6746c85..49a4058 100644 (file)
@@ -3315,6 +3315,15 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
         * Note: ro_after_init sections also have SHF_{WRITE,ALLOC} set.
         */
        ndx = find_sec(info, ".data..ro_after_init");
+       if (ndx)
+               info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT;
+       /*
+        * Mark the __jump_table section as ro_after_init as well: these data
+        * structures are never modified, with the exception of entries that
+        * refer to code in the __init section, which are annotated as such
+        * at module load time.
+        */
+       ndx = find_sec(info, "__jump_table");
        if (ndx)
                info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT;
 
index 96c4c63..ce51749 100644 (file)
@@ -21,7 +21,7 @@
  * that would just muddy the log. So we report the first one and
  * shut up after that.
  */
-int debug_locks = 1;
+int debug_locks __read_mostly = 1;
 EXPORT_SYMBOL_GPL(debug_locks);
 
 /*
@@ -29,7 +29,7 @@ EXPORT_SYMBOL_GPL(debug_locks);
  * 'silent failure': nothing is printed to the console when
  * a locking bug is detected.
  */
-int debug_locks_silent;
+int debug_locks_silent __read_mostly;
 EXPORT_SYMBOL_GPL(debug_locks_silent);
 
 /*
@@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
  */
 int debug_locks_off(void)
 {
-       if (__debug_locks_off()) {
+       if (debug_locks && __debug_locks_off()) {
                if (!debug_locks_silent) {
                        console_verbose();
                        return 1;
index ec00be5..f341663 100644 (file)
@@ -30,8 +30,10 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
 
        set_fs(KERNEL_DS);
        pagefault_disable();
+       current->kernel_uaccess_faults_ok++;
        ret = __copy_from_user_inatomic(dst,
                        (__force const void __user *)src, size);
+       current->kernel_uaccess_faults_ok--;
        pagefault_enable();
        set_fs(old_fs);
 
@@ -58,7 +60,9 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
 
        set_fs(KERNEL_DS);
        pagefault_disable();
+       current->kernel_uaccess_faults_ok++;
        ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
+       current->kernel_uaccess_faults_ok--;
        pagefault_enable();
        set_fs(old_fs);
 
@@ -94,11 +98,13 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
 
        set_fs(KERNEL_DS);
        pagefault_disable();
+       current->kernel_uaccess_faults_ok++;
 
        do {
                ret = __get_user(*dst++, (const char __user __force *)src++);
        } while (dst[-1] && ret == 0 && src - unsafe_addr < count);
 
+       current->kernel_uaccess_faults_ok--;
        dst[-1] = '\0';
        pagefault_enable();
        set_fs(old_fs);
index ce53639..8aeb60e 100644 (file)
@@ -115,7 +115,9 @@ __cc-option = $(call try-run,\
 
 # Do not attempt to build with gcc plugins during cc-option tests.
 # (And this uses delayed resolution so the flags will be up to date.)
-CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
+# In addition, do not include the asm macros which are built later.
+CC_OPTION_FILTERED = $(GCC_PLUGINS_CFLAGS) $(ASM_MACRO_FLAGS)
+CC_OPTION_CFLAGS = $(filter-out $(CC_OPTION_FILTERED),$(KBUILD_CFLAGS))
 
 # cc-option
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
index 42c5d50..a5b4af4 100644 (file)
@@ -4,6 +4,8 @@ OBJECT_FILES_NON_STANDARD := y
 hostprogs-y    := modpost mk_elfconfig
 always         := $(hostprogs-y) empty.o
 
+CFLAGS_REMOVE_empty.o := $(ASM_MACRO_FLAGS)
+
 modpost-objs   := modpost.o file2alias.o sumversion.o
 
 devicetable-offsets-file := devicetable-offsets.h
index 0cbd1ef..35bff92 100644 (file)
@@ -28,7 +28,8 @@ Explanation of the Linux-Kernel Memory Consistency Model
   20. THE HAPPENS-BEFORE RELATION: hb
   21. THE PROPAGATES-BEFORE RELATION: pb
   22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
-  23. ODDS AND ENDS
+  23. LOCKING
+  24. ODDS AND ENDS
 
 
 
@@ -1067,28 +1068,6 @@ allowing out-of-order writes like this to occur.  The model avoided
 violating the write-write coherence rule by requiring the CPU not to
 send the W write to the memory subsystem at all!)
 
-There is one last example of preserved program order in the LKMM: when
-a load-acquire reads from an earlier store-release.  For example:
-
-       smp_store_release(&x, 123);
-       r1 = smp_load_acquire(&x);
-
-If the smp_load_acquire() ends up obtaining the 123 value that was
-stored by the smp_store_release(), the LKMM says that the load must be
-executed after the store; the store cannot be forwarded to the load.
-This requirement does not arise from the operational model, but it
-yields correct predictions on all architectures supported by the Linux
-kernel, although for differing reasons.
-
-On some architectures, including x86 and ARMv8, it is true that the
-store cannot be forwarded to the load.  On others, including PowerPC
-and ARMv7, smp_store_release() generates object code that starts with
-a fence and smp_load_acquire() generates object code that ends with a
-fence.  The upshot is that even though the store may be forwarded to
-the load, it is still true that any instruction preceding the store
-will be executed before the load or any following instructions, and
-the store will be executed before any instruction following the load.
-
 
 AND THEN THERE WAS ALPHA
 ------------------------
@@ -1766,6 +1745,147 @@ before it does, and the critical section in P2 both starts after P1's
 grace period does and ends after it does.
 
 
+LOCKING
+-------
+
+The LKMM includes locking.  In fact, there is special code for locking
+in the formal model, added in order to make tools run faster.
+However, this special code is intended to be more or less equivalent
+to concepts we have already covered.  A spinlock_t variable is treated
+the same as an int, and spin_lock(&s) is treated almost the same as:
+
+       while (cmpxchg_acquire(&s, 0, 1) != 0)
+               cpu_relax();
+
+This waits until s is equal to 0 and then atomically sets it to 1,
+and the read part of the cmpxchg operation acts as an acquire fence.
+An alternate way to express the same thing would be:
+
+       r = xchg_acquire(&s, 1);
+
+along with a requirement that at the end, r = 0.  Similarly,
+spin_trylock(&s) is treated almost the same as:
+
+       return !cmpxchg_acquire(&s, 0, 1);
+
+which atomically sets s to 1 if it is currently equal to 0 and returns
+true if it succeeds (the read part of the cmpxchg operation acts as an
+acquire fence only if the operation is successful).  spin_unlock(&s)
+is treated almost the same as:
+
+       smp_store_release(&s, 0);
+
+The "almost" qualifiers above need some explanation.  In the LKMM, the
+store-release in a spin_unlock() and the load-acquire which forms the
+first half of the atomic rmw update in a spin_lock() or a successful
+spin_trylock() -- we can call these things lock-releases and
+lock-acquires -- have two properties beyond those of ordinary releases
+and acquires.
+
+First, when a lock-acquire reads from a lock-release, the LKMM
+requires that every instruction po-before the lock-release must
+execute before any instruction po-after the lock-acquire.  This would
+naturally hold if the release and acquire operations were on different
+CPUs, but the LKMM says it holds even when they are on the same CPU.
+For example:
+
+       int x, y;
+       spinlock_t s;
+
+       P0()
+       {
+               int r1, r2;
+
+               spin_lock(&s);
+               r1 = READ_ONCE(x);
+               spin_unlock(&s);
+               spin_lock(&s);
+               r2 = READ_ONCE(y);
+               spin_unlock(&s);
+       }
+
+       P1()
+       {
+               WRITE_ONCE(y, 1);
+               smp_wmb();
+               WRITE_ONCE(x, 1);
+       }
+
+Here the second spin_lock() reads from the first spin_unlock(), and
+therefore the load of x must execute before the load of y.  Thus we
+cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the
+MP pattern).
+
+This requirement does not apply to ordinary release and acquire
+fences, only to lock-related operations.  For instance, suppose P0()
+in the example had been written as:
+
+       P0()
+       {
+               int r1, r2, r3;
+
+               r1 = READ_ONCE(x);
+               smp_store_release(&s, 1);
+               r3 = smp_load_acquire(&s);
+               r2 = READ_ONCE(y);
+       }
+
+Then the CPU would be allowed to forward the s = 1 value from the
+smp_store_release() to the smp_load_acquire(), executing the
+instructions in the following order:
+
+               r3 = smp_load_acquire(&s);      // Obtains r3 = 1
+               r2 = READ_ONCE(y);
+               r1 = READ_ONCE(x);
+               smp_store_release(&s, 1);       // Value is forwarded
+
+and thus it could load y before x, obtaining r2 = 0 and r1 = 1.
+
+Second, when a lock-acquire reads from a lock-release, and some other
+stores W and W' occur po-before the lock-release and po-after the
+lock-acquire respectively, the LKMM requires that W must propagate to
+each CPU before W' does.  For example, consider:
+
+       int x, y;
+       spinlock_t x;
+
+       P0()
+       {
+               spin_lock(&s);
+               WRITE_ONCE(x, 1);
+               spin_unlock(&s);
+       }
+
+       P1()
+       {
+               int r1;
+
+               spin_lock(&s);
+               r1 = READ_ONCE(x);
+               WRITE_ONCE(y, 1);
+               spin_unlock(&s);
+       }
+
+       P2()
+       {
+               int r2, r3;
+
+               r2 = READ_ONCE(y);
+               smp_rmb();
+               r3 = READ_ONCE(x);
+       }
+
+If r1 = 1 at the end then the spin_lock() in P1 must have read from
+the spin_unlock() in P0.  Hence the store to x must propagate to P2
+before the store to y does, so we cannot have r2 = 1 and r3 = 0.
+
+These two special requirements for lock-release and lock-acquire do
+not arise from the operational model.  Nevertheless, kernel developers
+have come to expect and rely on them because they do hold on all
+architectures supported by the Linux kernel, albeit for various
+differing reasons.
+
+
 ODDS AND ENDS
 -------------
 
@@ -1831,26 +1951,6 @@ they behave as follows:
        events and the events preceding them against all po-later
        events.
 
-The LKMM includes locking.  In fact, there is special code for locking
-in the formal model, added in order to make tools run faster.
-However, this special code is intended to be exactly equivalent to
-concepts we have already covered.  A spinlock_t variable is treated
-the same as an int, and spin_lock(&s) is treated the same as:
-
-       while (cmpxchg_acquire(&s, 0, 1) != 0)
-               cpu_relax();
-
-which waits until s is equal to 0 and then atomically sets it to 1,
-and where the read part of the atomic update is also an acquire fence.
-An alternate way to express the same thing would be:
-
-       r = xchg_acquire(&s, 1);
-
-along with a requirement that at the end, r = 0.  spin_unlock(&s) is
-treated the same as:
-
-       smp_store_release(&s, 0);
-
 Interestingly, RCU and locking each introduce the possibility of
 deadlock.  When faced with code sequences such as:
 
index af72700..7fe8d7a 100644 (file)
@@ -311,7 +311,7 @@ The smp_wmb() macro orders prior stores against later stores, and the
 smp_rmb() macro orders prior loads against later loads.  Therefore, if
 the final value of r0 is 1, the final value of r1 must also be 1.
 
-The the xlog_state_switch_iclogs() function in fs/xfs/xfs_log.c contains
+The xlog_state_switch_iclogs() function in fs/xfs/xfs_log.c contains
 the following write-side code fragment:
 
        log->l_curr_block -= log->l_logBBsize;
index ee987ce..acf9077 100644 (file)
@@ -171,6 +171,12 @@ The Linux-kernel memory model has the following limitations:
        particular, the "THE PROGRAM ORDER RELATION: po AND po-loc"
        and "A WARNING" sections).
 
+       Note that this limitation in turn limits LKMM's ability to
+       accurately model address, control, and data dependencies.
+       For example, if the compiler can deduce the value of some variable
+       carrying a dependency, then the compiler can break that dependency
+       by substituting a constant of that value.
+
 2.     Multiple access sizes for a single variable are not supported,
        and neither are misaligned or partially overlapping accesses.
 
@@ -190,6 +196,36 @@ The Linux-kernel memory model has the following limitations:
        However, a substantial amount of support is provided for these
        operations, as shown in the linux-kernel.def file.
 
+       a.      When rcu_assign_pointer() is passed NULL, the Linux
+               kernel provides no ordering, but LKMM models this
+               case as a store release.
+
+       b.      The "unless" RMW operations are not currently modeled:
+               atomic_long_add_unless(), atomic_add_unless(),
+               atomic_inc_unless_negative(), and
+               atomic_dec_unless_positive().  These can be emulated
+               in litmus tests, for example, by using atomic_cmpxchg().
+
+       c.      The call_rcu() function is not modeled.  It can be
+               emulated in litmus tests by adding another process that
+               invokes synchronize_rcu() and the body of the callback
+               function, with (for example) a release-acquire from
+               the site of the emulated call_rcu() to the beginning
+               of the additional process.
+
+       d.      The rcu_barrier() function is not modeled.  It can be
+               emulated in litmus tests emulating call_rcu() via
+               (for example) a release-acquire from the end of each
+               additional call_rcu() process to the site of the
+               emulated rcu-barrier().
+
+       e.      Sleepable RCU (SRCU) is not modeled.  It can be
+               emulated, but perhaps not simply.
+
+       f.      Reader-writer locking is not modeled.  It can be
+               emulated in litmus tests using atomic read-modify-write
+               operations.
+
 The "herd7" tool has some additional limitations of its own, apart from
 the memory model:
 
@@ -204,3 +240,6 @@ the memory model:
 Some of these limitations may be overcome in the future, but others are
 more likely to be addressed by incorporating the Linux-kernel memory model
 into other tools.
+
+Finally, please note that LKMM is subject to change as hardware, use cases,
+and compilers evolve.
index 59b5cbe..882fc33 100644 (file)
@@ -38,7 +38,7 @@ let strong-fence = mb | gp
 (* Release Acquire *)
 let acq-po = [Acquire] ; po ; [M]
 let po-rel = [M] ; po ; [Release]
-let rfi-rel-acq = [Release] ; rfi ; [Acquire]
+let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
 
 (**********************************)
 (* Fundamental coherence ordering *)
@@ -60,13 +60,13 @@ let dep = addr | data
 let rwdep = (dep | ctrl) ; [W]
 let overwrite = co | fr
 let to-w = rwdep | (overwrite & int)
-let to-r = addr | (dep ; rfi) | rfi-rel-acq
+let to-r = addr | (dep ; rfi)
 let fence = strong-fence | wmb | po-rel | rmb | acq-po
-let ppo = to-r | to-w | fence
+let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
 
 (* Propagation: Ordering from release operations and strong fences. *)
 let A-cumul(r) = rfe? ; r
-let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
+let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | po-unlock-rf-lock-po
 let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
 
 (*
index 0f749e4..094d58d 100644 (file)
@@ -1,11 +1,10 @@
 C ISA2+pooncelock+pooncelock+pombonce
 
 (*
- * Result: Sometimes
+ * Result: Never
  *
- * This test shows that the ordering provided by a lock-protected S
- * litmus test (P0() and P1()) are not visible to external process P2().
- * This is likely to change soon.
+ * This test shows that write-write ordering provided by locks
+ * (in P0() and P1()) is visible to external process P2().
  *)
 
 {}
index 4581ec2..5ee08f1 100644 (file)
@@ -1,4 +1,6 @@
-This directory contains the following litmus tests:
+============
+LITMUS TESTS
+============
 
 CoRR+poonceonce+Once.litmus
        Test of read-read coherence, that is, whether or not two
@@ -36,7 +38,7 @@ IRIW+poonceonces+OnceOnce.litmus
 ISA2+pooncelock+pooncelock+pombonce.litmus
        Tests whether the ordering provided by a lock-protected S
        litmus test is visible to an external process whose accesses are
-       separated by smp_mb().  This addition of an external process to
+       separated by smp_mb().  This addition of an external process to
        S is otherwise known as ISA2.
 
 ISA2+poonceonces.litmus
@@ -151,3 +153,101 @@ Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
 A great many more litmus tests are available here:
 
        https://github.com/paulmckrcu/litmus
+
+==================
+LITMUS TEST NAMING
+==================
+
+Litmus tests are usually named based on their contents, which means that
+looking at the name tells you what the litmus test does.  The naming
+scheme covers litmus tests having a single cycle that passes through
+each process exactly once, so litmus tests not fitting this description
+are named on an ad-hoc basis.
+
+The structure of a litmus-test name is the litmus-test class, a plus
+sign ("+"), and one string for each process, separated by plus signs.
+The end of the name is ".litmus".
+
+The litmus-test classes may be found in the infamous test6.pdf:
+https://www.cl.cam.ac.uk/~pes20/ppc-supplemental/test6.pdf
+Each class defines the pattern of accesses and of the variables accessed.
+For example, if the one process writes to a pair of variables, and
+the other process reads from these same variables, the corresponding
+litmus-test class is "MP" (message passing), which may be found on the
+left-hand end of the second row of tests on page one of test6.pdf.
+
+The strings used to identify the actions carried out by each process are
+complex due to a desire to have short(er) names.  Thus, there is a tool to
+generate these strings from a given litmus test's actions.  For example,
+consider the processes from SB+rfionceonce-poonceonces.litmus:
+
+       P0(int *x, int *y)
+       {
+               int r1;
+               int r2;
+
+               WRITE_ONCE(*x, 1);
+               r1 = READ_ONCE(*x);
+               r2 = READ_ONCE(*y);
+       }
+
+       P1(int *x, int *y)
+       {
+               int r3;
+               int r4;
+
+               WRITE_ONCE(*y, 1);
+               r3 = READ_ONCE(*y);
+               r4 = READ_ONCE(*x);
+       }
+
+The next step is to construct a space-separated list of descriptors,
+interleaving descriptions of the relation between a pair of consecutive
+accesses with descriptions of the second access in the pair.
+
+P0()'s WRITE_ONCE() is read by its first READ_ONCE(), which is a
+reads-from link (rf) and internal to the P0() process.  This is
+"rfi", which is an abbreviation for "reads-from internal".  Because
+some of the tools string these abbreviations together with space
+characters separating processes, the first character is capitalized,
+resulting in "Rfi".
+
+P0()'s second access is a READ_ONCE(), as opposed to (for example)
+smp_load_acquire(), so next is "Once".  Thus far, we have "Rfi Once".
+
+P0()'s third access is also a READ_ONCE(), but to y rather than x.
+This is related to P0()'s second access by program order ("po"),
+to a different variable ("d"), and both accesses are reads ("RR").
+The resulting descriptor is "PodRR".  Because P0()'s third access is
+READ_ONCE(), we add another "Once" descriptor.
+
+A from-read ("fre") relation links P0()'s third to P1()'s first
+access, and the resulting descriptor is "Fre".  P1()'s first access is
+WRITE_ONCE(), which as before gives the descriptor "Once".  The string
+thus far is thus "Rfi Once PodRR Once Fre Once".
+
+The remainder of P1() is similar to P0(), which means we add
+"Rfi Once PodRR Once".  Another fre links P1()'s last access to
+P0()'s first access, which is WRITE_ONCE(), so we add "Fre Once".
+The full string is thus:
+
+       Rfi Once PodRR Once Fre Once Rfi Once PodRR Once Fre Once
+
+This string can be given to the "norm7" and "classify7" tools to
+produce the name:
+
+       $ norm7 -bell linux-kernel.bell \
+               Rfi Once PodRR Once Fre Once Rfi Once PodRR Once Fre Once | \
+         sed -e 's/:.*//g'
+       SB+rfionceonce-poonceonces
+
+Adding the ".litmus" suffix: SB+rfionceonce-poonceonces.litmus
+
+The descriptors that describe connections between consecutive accesses
+within the cycle through a given litmus test can be provided by the herd
+tool (Rfi, Po, Fre, and so on) or by the linux-kernel.bell file (Once,
+Release, Acquire, and so on).
+
+To see the full list of descriptors, execute the following command:
+
+       $ diyone7 -bell linux-kernel.bell -show edges
index 84f001d..50af4e1 100644 (file)
@@ -30,9 +30,9 @@
 #define EX_ORIG_OFFSET         0
 #define EX_NEW_OFFSET          4
 
-#define JUMP_ENTRY_SIZE                24
+#define JUMP_ENTRY_SIZE                16
 #define JUMP_ORIG_OFFSET       0
-#define JUMP_NEW_OFFSET                8
+#define JUMP_NEW_OFFSET                4
 
 #define ALT_ENTRY_SIZE         13
 #define ALT_ORIG_OFFSET                0