Lines Matching +full:ipa +full:- +full:reg
1 // SPDX-License-Identifier: GPL-2.0-only
5 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
6 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
16 #include <asm/nospec-branch.h>
17 #include <asm/text-patching.h>
18 #include <asm/asm-prototypes.h>
52 return value <= 127 && value >= -128; in is_imm8()
106 * The following table maps BPF registers to x86-64 registers.
108 * x86-64 register R12 is unused, since if used as base address
112 * x86-64 register R9 is not used by BPF programs, but can be used by BPF
113 * trampoline. x86-64 register R10 is used for blinding (if enabled).
146 * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
150 static bool is_ereg(u32 reg) in is_ereg() argument
152 return (1 << reg) & (BIT(BPF_REG_5) | in is_ereg()
162 * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
163 * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
166 static bool is_ereg_8l(u32 reg) in is_ereg_8l() argument
168 return is_ereg(reg) || in is_ereg_8l()
169 (1 << reg) & (BIT(BPF_REG_1) | in is_ereg_8l()
174 static bool is_axreg(u32 reg) in is_axreg() argument
176 return reg == BPF_REG_0; in is_axreg()
179 /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
180 static u8 add_1mod(u8 byte, u32 reg) in add_1mod() argument
182 if (is_ereg(reg)) in add_1mod()
196 /* Encode 'dst_reg' register into x86-64 opcode 'byte' */
202 /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
260 * Emit x86-64 prologue code for BPF program.
297 offset = func - (ip + X86_PATCH_SIZE); in emit_patch()
300 return -ERANGE; in emit_patch()
347 ret = -EBUSY; in __bpf_arch_text_poke()
370 return -EINVAL; in bpf_arch_text_poke()
395 * if (index >= array->map.max_entries)
399 * prog = array->ptrs[index];
402 * goto *(prog->bpf_func + prologue_size);
408 int tcc_off = -4 - round_up(stack_depth, 8); in emit_bpf_tail_call_indirect()
432 * rdi - pointer to ctx in emit_bpf_tail_call_indirect()
433 * rsi - pointer to bpf_array in emit_bpf_tail_call_indirect()
434 * rdx - index in bpf_array in emit_bpf_tail_call_indirect()
438 * if (index >= array->map.max_entries) in emit_bpf_tail_call_indirect()
451 EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ in emit_bpf_tail_call_indirect()
456 EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ in emit_bpf_tail_call_indirect()
458 /* prog = array->ptrs[index]; */ in emit_bpf_tail_call_indirect()
479 /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */ in emit_bpf_tail_call_indirect()
487 * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET in emit_bpf_tail_call_indirect()
499 int tcc_off = -4 - round_up(stack_depth, 8); in emit_bpf_tail_call_direct()
515 * - nop5/ jmpq $off in emit_bpf_tail_call_direct()
516 * - pop callee regs in emit_bpf_tail_call_direct()
517 * - sub rsp, $val if depth > 0 in emit_bpf_tail_call_direct()
518 * - pop rax in emit_bpf_tail_call_direct()
530 EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ in emit_bpf_tail_call_direct()
534 EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ in emit_bpf_tail_call_direct()
536 poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE); in emit_bpf_tail_call_direct()
537 poke->adj_off = X86_TAIL_CALL_OFFSET; in emit_bpf_tail_call_direct()
538 poke->tailcall_target = image + (addr - X86_PATCH_SIZE); in emit_bpf_tail_call_direct()
539 poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE; in emit_bpf_tail_call_direct()
541 emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, in emit_bpf_tail_call_direct()
542 poke->tailcall_bypass); in emit_bpf_tail_call_direct()
565 for (i = 0; i < prog->aux->size_poke_tab; i++) { in bpf_tail_call_direct_fixup()
566 poke = &prog->aux->poke_tab[i]; in bpf_tail_call_direct_fixup()
567 WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable)); in bpf_tail_call_direct_fixup()
569 if (poke->reason != BPF_POKE_REASON_TAIL_CALL) in bpf_tail_call_direct_fixup()
572 array = container_of(poke->tail_call.map, struct bpf_array, map); in bpf_tail_call_direct_fixup()
573 mutex_lock(&array->aux->poke_mutex); in bpf_tail_call_direct_fixup()
574 target = array->ptrs[poke->tail_call.key]; in bpf_tail_call_direct_fixup()
577 * and still not locked as read-only. Once poke in bpf_tail_call_direct_fixup()
578 * location is active (poke->tailcall_target_stable), in bpf_tail_call_direct_fixup()
580 * still on the read-write image until we finally in bpf_tail_call_direct_fixup()
581 * locked it as read-only. Both modifications on in bpf_tail_call_direct_fixup()
585 ret = __bpf_arch_text_poke(poke->tailcall_target, in bpf_tail_call_direct_fixup()
587 (u8 *)target->bpf_func + in bpf_tail_call_direct_fixup()
588 poke->adj_off, false); in bpf_tail_call_direct_fixup()
590 ret = __bpf_arch_text_poke(poke->tailcall_bypass, in bpf_tail_call_direct_fixup()
592 (u8 *)poke->tailcall_target + in bpf_tail_call_direct_fixup()
596 WRITE_ONCE(poke->tailcall_target_stable, true); in bpf_tail_call_direct_fixup()
597 mutex_unlock(&array->aux->poke_mutex); in bpf_tail_call_direct_fixup()
610 * (which zero-extends imm32) to save 2 bytes. in emit_mov_imm32()
712 * If insn->off == 0 we can save one extra byte, but in emit_ldx()
765 u32 reg = x->fixup >> 8; in ex_handler_bpf() local
768 *(unsigned long *)((void *)regs + reg) = 0; in ex_handler_bpf()
769 regs->ip += x->fixup & 0xff; in ex_handler_bpf()
779 if (insn->code == (BPF_JMP | BPF_TAIL_CALL)) in detect_reg_usage()
781 if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6) in detect_reg_usage()
783 if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7) in detect_reg_usage()
785 if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8) in detect_reg_usage()
787 if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9) in detect_reg_usage()
795 bool tail_call_reachable = bpf_prog->aux->tail_call_reachable; in do_jit()
796 struct bpf_insn *insn = bpf_prog->insnsi; in do_jit()
798 int insn_cnt = bpf_prog->len; in do_jit()
812 emit_prologue(&prog, bpf_prog->aux->stack_depth, in do_jit()
814 bpf_prog->aux->func_idx != 0); in do_jit()
816 addrs[0] = prog - temp; in do_jit()
819 const s32 imm32 = insn->imm; in do_jit()
820 u32 dst_reg = insn->dst_reg; in do_jit()
821 u32 src_reg = insn->src_reg; in do_jit()
828 switch (insn->code) { in do_jit()
840 switch (BPF_OP(insn->code)) { in do_jit()
847 if (BPF_CLASS(insn->code) == BPF_ALU64) in do_jit()
857 BPF_CLASS(insn->code) == BPF_ALU64, in do_jit()
864 if (BPF_CLASS(insn->code) == BPF_ALU64) in do_jit()
881 if (BPF_CLASS(insn->code) == BPF_ALU64) in do_jit()
890 switch (BPF_OP(insn->code)) { in do_jit()
923 emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64, in do_jit()
945 if (BPF_SRC(insn->code) == BPF_X) in do_jit()
961 if (BPF_CLASS(insn->code) == BPF_ALU64) in do_jit()
968 if (BPF_OP(insn->code) == BPF_MOD) in do_jit()
987 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; in do_jit()
997 if (BPF_SRC(insn->code) == BPF_X) in do_jit()
1025 if (BPF_CLASS(insn->code) == BPF_ALU64) in do_jit()
1030 switch (BPF_OP(insn->code)) { in do_jit()
1064 if (BPF_CLASS(insn->code) == BPF_ALU64) in do_jit()
1069 switch (BPF_OP(insn->code)) { in do_jit()
1079 if (insn->dst_reg == BPF_REG_4) in do_jit()
1081 EMIT_mov(insn->dst_reg, AUX_REG); in do_jit()
1120 * Emit 'movzwl eax, ax' to zero extend 16-bit in do_jit()
1130 /* Emit 'mov eax, eax' to clear upper 32-bits */ in do_jit()
1163 st: if (is_imm8(insn->off)) in do_jit()
1164 EMIT2(add_1reg(0x40, dst_reg), insn->off); in do_jit()
1166 EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); in do_jit()
1168 EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); in do_jit()
1176 emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); in do_jit()
1188 emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); in do_jit()
1189 if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { in do_jit()
1194 if (!bpf_prog->aux->extable) in do_jit()
1197 if (excnt >= bpf_prog->aux->num_exentries) { in do_jit()
1199 return -EFAULT; in do_jit()
1201 ex = &bpf_prog->aux->extable[excnt++]; in do_jit()
1203 delta = _insn - (u8 *)&ex->insn; in do_jit()
1205 pr_err("extable->insn doesn't fit into 32-bit\n"); in do_jit()
1206 return -EFAULT; in do_jit()
1208 ex->insn = delta; in do_jit()
1210 delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; in do_jit()
1212 pr_err("extable->handler doesn't fit into 32-bit\n"); in do_jit()
1213 return -EFAULT; in do_jit()
1215 ex->handler = delta; in do_jit()
1219 return -EFAULT; in do_jit()
1224 * pt_regs->ip to jump over this x86 instruction in do_jit()
1229 ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8); in do_jit()
1243 xadd: if (is_imm8(insn->off)) in do_jit()
1244 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); in do_jit()
1247 insn->off); in do_jit()
1255 -(bpf_prog->aux->stack_depth + 8)); in do_jit()
1256 if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7)) in do_jit()
1257 return -EINVAL; in do_jit()
1259 if (!imm32 || emit_call(&prog, func, image + addrs[i - 1])) in do_jit()
1260 return -EINVAL; in do_jit()
1266 emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], in do_jit()
1269 bpf_prog->aux->stack_depth); in do_jit()
1273 bpf_prog->aux->stack_depth); in do_jit()
1298 if (BPF_CLASS(insn->code) == BPF_JMP) in do_jit()
1308 if (BPF_CLASS(insn->code) == BPF_JMP) in do_jit()
1318 if (BPF_CLASS(insn->code) == BPF_JMP) in do_jit()
1347 if (BPF_CLASS(insn->code) == BPF_JMP) in do_jit()
1356 if (BPF_CLASS(insn->code) == BPF_JMP) in do_jit()
1367 switch (BPF_OP(insn->code)) { in do_jit()
1408 return -EFAULT; in do_jit()
1410 jmp_offset = addrs[i + insn->off] - addrs[i]; in do_jit()
1417 return -EFAULT; in do_jit()
1423 if (insn->off == -1) in do_jit()
1424 /* -1 jmp instructions will always jump in do_jit()
1430 jmp_offset = -2; in do_jit()
1432 jmp_offset = addrs[i + insn->off] - addrs[i]; in do_jit()
1444 return -EFAULT; in do_jit()
1450 jmp_offset = ctx->cleanup_addr - addrs[i]; in do_jit()
1455 ctx->cleanup_addr = proglen; in do_jit()
1463 * By design x86-64 JIT should support all BPF instructions. in do_jit()
1468 pr_err("bpf_jit: unknown opcode %02x\n", insn->code); in do_jit()
1469 return -EINVAL; in do_jit()
1472 ilen = prog - temp; in do_jit()
1475 return -EFAULT; in do_jit()
1481 return -EFAULT; in do_jit()
1490 if (image && excnt != bpf_prog->aux->num_exentries) { in do_jit()
1492 return -EFAULT; in do_jit()
1503 * mov QWORD PTR [rbp-0x10],rdi in save_regs()
1504 * mov QWORD PTR [rbp-0x8],rsi in save_regs()
1507 emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]), in save_regs()
1510 -(stack_size - i * 8)); in save_regs()
1520 * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] in restore_regs()
1521 * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] in restore_regs()
1524 emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]), in restore_regs()
1527 -(stack_size - i * 8)); in restore_regs()
1536 if (p->aux->sleepable) { in invoke_bpf_prog()
1538 return -EINVAL; in invoke_bpf_prog()
1541 return -EINVAL; in invoke_bpf_prog()
1546 /* arg1: lea rdi, [rbp - stack_size] */ in invoke_bpf_prog()
1547 EMIT4(0x48, 0x8D, 0x7D, -stack_size); in invoke_bpf_prog()
1548 /* arg2: progs[i]->insnsi for interpreter */ in invoke_bpf_prog()
1549 if (!p->jited) in invoke_bpf_prog()
1551 (long) p->insnsi >> 32, in invoke_bpf_prog()
1552 (u32) (long) p->insnsi); in invoke_bpf_prog()
1554 if (emit_call(&prog, p->bpf_func, prog)) in invoke_bpf_prog()
1555 return -EINVAL; in invoke_bpf_prog()
1562 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); in invoke_bpf_prog()
1564 if (p->aux->sleepable) { in invoke_bpf_prog()
1566 return -EINVAL; in invoke_bpf_prog()
1571 /* arg2: mov rsi, rbx <- start time in nsec */ in invoke_bpf_prog()
1574 return -EINVAL; in invoke_bpf_prog()
1595 len -= noplen; in emit_nops()
1607 emit_nops(&prog, target - prog); in emit_align()
1618 offset = func - (ip + 2 + 4); in emit_cond_near_jump()
1621 return -EINVAL; in emit_cond_near_jump()
1634 for (i = 0; i < tp->nr_progs; i++) { in invoke_bpf()
1635 if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) in invoke_bpf()
1636 return -EINVAL; in invoke_bpf()
1653 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); in invoke_bpf_mod_ret()
1654 for (i = 0; i < tp->nr_progs; i++) { in invoke_bpf_mod_ret()
1655 if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) in invoke_bpf_mod_ret()
1656 return -EINVAL; in invoke_bpf_mod_ret()
1658 /* mod_ret prog stored return value into [rbp - 8]. Emit: in invoke_bpf_mod_ret()
1659 * if (*(u64 *)(rbp - 8) != 0) in invoke_bpf_mod_ret()
1662 /* cmp QWORD PTR [rbp - 0x8], 0x0 */ in invoke_bpf_mod_ret()
1687 * mov qword ptr [rbp - 16], rdi // save skb pointer to stack
1688 * mov qword ptr [rbp - 8], rsi // save dev pointer to stack
1691 * lea rdi, [rbp - 16] // R1==ctx of bpf prog
1696 * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack
1697 * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack
1712 * mov qword ptr [rbp - 24], rdi // save skb pointer to stack
1713 * mov qword ptr [rbp - 16], rsi // save dev pointer to stack
1716 * lea rdi, [rbp - 24] // R1==ctx of bpf prog
1721 * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack
1722 * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack
1724 * mov qword ptr [rbp - 8], rax // save return value
1727 * lea rdi, [rbp - 24] // R1==ctx of bpf prog
1732 * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value
1743 int ret, i, cnt = 0, nr_args = m->nr_args; in arch_prepare_bpf_trampoline()
1751 /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ in arch_prepare_bpf_trampoline()
1753 return -ENOTSUPP; in arch_prepare_bpf_trampoline()
1757 return -EINVAL; in arch_prepare_bpf_trampoline()
1777 if (fentry->nr_progs) in arch_prepare_bpf_trampoline()
1779 return -EINVAL; in arch_prepare_bpf_trampoline()
1781 if (fmod_ret->nr_progs) { in arch_prepare_bpf_trampoline()
1782 branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), in arch_prepare_bpf_trampoline()
1785 return -ENOMEM; in arch_prepare_bpf_trampoline()
1789 ret = -EINVAL; in arch_prepare_bpf_trampoline()
1795 if (fentry->nr_progs || fmod_ret->nr_progs) in arch_prepare_bpf_trampoline()
1800 ret = -EINVAL; in arch_prepare_bpf_trampoline()
1804 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); in arch_prepare_bpf_trampoline()
1807 if (fmod_ret->nr_progs) { in arch_prepare_bpf_trampoline()
1808 /* From Intel 64 and IA-32 Architectures Optimization in arch_prepare_bpf_trampoline()
1810 * Coding Rule 11: All branch targets should be 16-byte in arch_prepare_bpf_trampoline()
1817 for (i = 0; i < fmod_ret->nr_progs; i++) in arch_prepare_bpf_trampoline()
1822 if (fexit->nr_progs) in arch_prepare_bpf_trampoline()
1824 ret = -EINVAL; in arch_prepare_bpf_trampoline()
1837 emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); in arch_prepare_bpf_trampoline()
1846 if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { in arch_prepare_bpf_trampoline()
1847 ret = -EFAULT; in arch_prepare_bpf_trampoline()
1850 ret = prog - (u8 *)image; in arch_prepare_bpf_trampoline()
1889 return -1; in emit_bpf_dispatcher()
1909 pivot = (b - a) / 2; in emit_bpf_dispatcher()
1912 return -1; in emit_bpf_dispatcher()
1929 /* From Intel 64 and IA-32 Architectures Optimization in emit_bpf_dispatcher()
1931 * Coding Rule 11: All branch targets should be 16-byte in emit_bpf_dispatcher()
1935 jg_offset = prog - jg_reloc; in emit_bpf_dispatcher()
1936 emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); in emit_bpf_dispatcher()
1949 const s64 *ipa = a; in cmp_ips() local
1952 if (*ipa > *ipb) in cmp_ips()
1954 if (*ipa < *ipb) in cmp_ips()
1955 return -1; in cmp_ips()
1964 return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs); in arch_prepare_bpf_dispatcher()
1989 if (!prog->jit_requested) in bpf_int_jit_compile()
2004 jit_data = prog->aux->jit_data; in bpf_int_jit_compile()
2011 prog->aux->jit_data = jit_data; in bpf_int_jit_compile()
2013 addrs = jit_data->addrs; in bpf_int_jit_compile()
2015 ctx = jit_data->ctx; in bpf_int_jit_compile()
2016 oldproglen = jit_data->proglen; in bpf_int_jit_compile()
2017 image = jit_data->image; in bpf_int_jit_compile()
2018 header = jit_data->header; in bpf_int_jit_compile()
2022 addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); in bpf_int_jit_compile()
2032 for (proglen = 0, i = 0; i <= prog->len; i++) { in bpf_int_jit_compile()
2071 u32 extable_size = prog->aux->num_exentries * in bpf_int_jit_compile()
2081 prog->aux->extable = (void *) image + roundup(proglen, align); in bpf_int_jit_compile()
2088 bpf_jit_dump(prog->len, proglen, pass + 1, image); in bpf_int_jit_compile()
2091 if (!prog->is_func || extra_pass) { in bpf_int_jit_compile()
2095 jit_data->addrs = addrs; in bpf_int_jit_compile()
2096 jit_data->ctx = ctx; in bpf_int_jit_compile()
2097 jit_data->proglen = proglen; in bpf_int_jit_compile()
2098 jit_data->image = image; in bpf_int_jit_compile()
2099 jit_data->header = header; in bpf_int_jit_compile()
2101 prog->bpf_func = (void *)image; in bpf_int_jit_compile()
2102 prog->jited = 1; in bpf_int_jit_compile()
2103 prog->jited_len = proglen; in bpf_int_jit_compile()
2108 if (!image || !prog->is_func || extra_pass) { in bpf_int_jit_compile()
2114 prog->aux->jit_data = NULL; in bpf_int_jit_compile()