1  // SPDX-License-Identifier: GPL-2.0-only
2  /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3   * Copyright (c) 2016 Facebook
4   * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5   */
6  #include <uapi/linux/btf.h>
7  #include <linux/bpf-cgroup.h>
8  #include <linux/kernel.h>
9  #include <linux/types.h>
10  #include <linux/slab.h>
11  #include <linux/bpf.h>
12  #include <linux/btf.h>
13  #include <linux/bpf_verifier.h>
14  #include <linux/filter.h>
15  #include <net/netlink.h>
16  #include <linux/file.h>
17  #include <linux/vmalloc.h>
18  #include <linux/stringify.h>
19  #include <linux/bsearch.h>
20  #include <linux/sort.h>
21  #include <linux/perf_event.h>
22  #include <linux/ctype.h>
23  #include <linux/error-injection.h>
24  #include <linux/bpf_lsm.h>
25  #include <linux/btf_ids.h>
26  #include <linux/poison.h>
27  #include <linux/module.h>
28  #include <linux/cpumask.h>
29  #include <net/xdp.h>
30  
31  #include "disasm.h"
32  
33  static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
34  #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
35  	[_id] = & _name ## _verifier_ops,
36  #define BPF_MAP_TYPE(_id, _ops)
37  #define BPF_LINK_TYPE(_id, _name)
38  #include <linux/bpf_types.h>
39  #undef BPF_PROG_TYPE
40  #undef BPF_MAP_TYPE
41  #undef BPF_LINK_TYPE
42  };
43  
44  /* bpf_check() is a static code analyzer that walks eBPF program
45   * instruction by instruction and updates register/stack state.
46   * All paths of conditional branches are analyzed until 'bpf_exit' insn.
47   *
48   * The first pass is depth-first-search to check that the program is a DAG.
49   * It rejects the following programs:
50   * - larger than BPF_MAXINSNS insns
51   * - if loop is present (detected via back-edge)
52   * - unreachable insns exist (shouldn't be a forest. program = one function)
53   * - out of bounds or malformed jumps
54   * The second pass is all possible path descent from the 1st insn.
55   * Since it's analyzing all paths through the program, the length of the
56   * analysis is limited to 64k insn, which may be hit even if total number of
57   * insn is less then 4K, but there are too many branches that change stack/regs.
58   * Number of 'branches to be analyzed' is limited to 1k
59   *
60   * On entry to each instruction, each register has a type, and the instruction
61   * changes the types of the registers depending on instruction semantics.
62   * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
63   * copied to R1.
64   *
65   * All registers are 64-bit.
66   * R0 - return register
67   * R1-R5 argument passing registers
68   * R6-R9 callee saved registers
69   * R10 - frame pointer read-only
70   *
71   * At the start of BPF program the register R1 contains a pointer to bpf_context
72   * and has type PTR_TO_CTX.
73   *
74   * Verifier tracks arithmetic operations on pointers in case:
75   *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
76   *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
77   * 1st insn copies R10 (which has FRAME_PTR) type into R1
78   * and 2nd arithmetic instruction is pattern matched to recognize
79   * that it wants to construct a pointer to some element within stack.
80   * So after 2nd insn, the register R1 has type PTR_TO_STACK
81   * (and -20 constant is saved for further stack bounds checking).
82   * Meaning that this reg is a pointer to stack plus known immediate constant.
83   *
84   * Most of the time the registers have SCALAR_VALUE type, which
85   * means the register has some value, but it's not a valid pointer.
86   * (like pointer plus pointer becomes SCALAR_VALUE type)
87   *
88   * When verifier sees load or store instructions the type of base register
89   * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
90   * four pointer types recognized by check_mem_access() function.
91   *
92   * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
93   * and the range of [ptr, ptr + map's value_size) is accessible.
94   *
95   * registers used to pass values to function calls are checked against
96   * function argument constraints.
97   *
98   * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
99   * It means that the register type passed to this function must be
100   * PTR_TO_STACK and it will be used inside the function as
101   * 'pointer to map element key'
102   *
103   * For example the argument constraints for bpf_map_lookup_elem():
104   *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
105   *   .arg1_type = ARG_CONST_MAP_PTR,
106   *   .arg2_type = ARG_PTR_TO_MAP_KEY,
107   *
108   * ret_type says that this function returns 'pointer to map elem value or null'
109   * function expects 1st argument to be a const pointer to 'struct bpf_map' and
110   * 2nd argument should be a pointer to stack, which will be used inside
111   * the helper function as a pointer to map element key.
112   *
113   * On the kernel side the helper function looks like:
114   * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
115   * {
116   *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
117   *    void *key = (void *) (unsigned long) r2;
118   *    void *value;
119   *
120   *    here kernel can access 'key' and 'map' pointers safely, knowing that
121   *    [key, key + map->key_size) bytes are valid and were initialized on
122   *    the stack of eBPF program.
123   * }
124   *
125   * Corresponding eBPF program may look like:
126   *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
127   *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
128   *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
129   *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
130   * here verifier looks at prototype of map_lookup_elem() and sees:
131   * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
132   * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
133   *
134   * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
135   * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
136   * and were initialized prior to this call.
137   * If it's ok, then verifier allows this BPF_CALL insn and looks at
138   * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
139   * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
140   * returns either pointer to map value or NULL.
141   *
142   * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
143   * insn, the register holding that pointer in the true branch changes state to
144   * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
145   * branch. See check_cond_jmp_op().
146   *
147   * After the call R0 is set to return type of the function and registers R1-R5
148   * are set to NOT_INIT to indicate that they are no longer readable.
149   *
150   * The following reference types represent a potential reference to a kernel
151   * resource which, after first being allocated, must be checked and freed by
152   * the BPF program:
153   * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
154   *
155   * When the verifier sees a helper call return a reference type, it allocates a
156   * pointer id for the reference and stores it in the current function state.
157   * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
158   * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
159   * passes through a NULL-check conditional. For the branch wherein the state is
160   * changed to CONST_IMM, the verifier releases the reference.
161   *
162   * For each helper function that allocates a reference, such as
163   * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
164   * bpf_sk_release(). When a reference type passes into the release function,
165   * the verifier also releases the reference. If any unchecked or unreleased
166   * reference remains at the end of the program, the verifier rejects it.
167   */
168  
169  /* verifier_state + insn_idx are pushed to stack when branch is encountered */
170  struct bpf_verifier_stack_elem {
171  	/* verifer state is 'st'
172  	 * before processing instruction 'insn_idx'
173  	 * and after processing instruction 'prev_insn_idx'
174  	 */
175  	struct bpf_verifier_state st;
176  	int insn_idx;
177  	int prev_insn_idx;
178  	struct bpf_verifier_stack_elem *next;
179  	/* length of verifier log at the time this state was pushed on stack */
180  	u32 log_pos;
181  };
182  
183  #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
184  #define BPF_COMPLEXITY_LIMIT_STATES	64
185  
186  #define BPF_MAP_KEY_POISON	(1ULL << 63)
187  #define BPF_MAP_KEY_SEEN	(1ULL << 62)
188  
189  #define BPF_MAP_PTR_UNPRIV	1UL
190  #define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
191  					  POISON_POINTER_DELTA))
192  #define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
193  
194  static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
195  static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
196  static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
197  static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
198  static int ref_set_non_owning(struct bpf_verifier_env *env,
199  			      struct bpf_reg_state *reg);
200  static void specialize_kfunc(struct bpf_verifier_env *env,
201  			     u32 func_id, u16 offset, unsigned long *addr);
202  static bool is_trusted_reg(const struct bpf_reg_state *reg);
203  
bpf_map_ptr_poisoned(const struct bpf_insn_aux_data * aux)204  static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
205  {
206  	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
207  }
208  
bpf_map_ptr_unpriv(const struct bpf_insn_aux_data * aux)209  static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
210  {
211  	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
212  }
213  
bpf_map_ptr_store(struct bpf_insn_aux_data * aux,const struct bpf_map * map,bool unpriv)214  static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
215  			      const struct bpf_map *map, bool unpriv)
216  {
217  	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
218  	unpriv |= bpf_map_ptr_unpriv(aux);
219  	aux->map_ptr_state = (unsigned long)map |
220  			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
221  }
222  
bpf_map_key_poisoned(const struct bpf_insn_aux_data * aux)223  static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
224  {
225  	return aux->map_key_state & BPF_MAP_KEY_POISON;
226  }
227  
bpf_map_key_unseen(const struct bpf_insn_aux_data * aux)228  static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
229  {
230  	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
231  }
232  
bpf_map_key_immediate(const struct bpf_insn_aux_data * aux)233  static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
234  {
235  	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
236  }
237  
bpf_map_key_store(struct bpf_insn_aux_data * aux,u64 state)238  static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
239  {
240  	bool poisoned = bpf_map_key_poisoned(aux);
241  
242  	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
243  			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
244  }
245  
bpf_helper_call(const struct bpf_insn * insn)246  static bool bpf_helper_call(const struct bpf_insn *insn)
247  {
248  	return insn->code == (BPF_JMP | BPF_CALL) &&
249  	       insn->src_reg == 0;
250  }
251  
bpf_pseudo_call(const struct bpf_insn * insn)252  static bool bpf_pseudo_call(const struct bpf_insn *insn)
253  {
254  	return insn->code == (BPF_JMP | BPF_CALL) &&
255  	       insn->src_reg == BPF_PSEUDO_CALL;
256  }
257  
bpf_pseudo_kfunc_call(const struct bpf_insn * insn)258  static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
259  {
260  	return insn->code == (BPF_JMP | BPF_CALL) &&
261  	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
262  }
263  
264  struct bpf_call_arg_meta {
265  	struct bpf_map *map_ptr;
266  	bool raw_mode;
267  	bool pkt_access;
268  	u8 release_regno;
269  	int regno;
270  	int access_size;
271  	int mem_size;
272  	u64 msize_max_value;
273  	int ref_obj_id;
274  	int dynptr_id;
275  	int map_uid;
276  	int func_id;
277  	struct btf *btf;
278  	u32 btf_id;
279  	struct btf *ret_btf;
280  	u32 ret_btf_id;
281  	u32 subprogno;
282  	struct btf_field *kptr_field;
283  };
284  
285  struct bpf_kfunc_call_arg_meta {
286  	/* In parameters */
287  	struct btf *btf;
288  	u32 func_id;
289  	u32 kfunc_flags;
290  	const struct btf_type *func_proto;
291  	const char *func_name;
292  	/* Out parameters */
293  	u32 ref_obj_id;
294  	u8 release_regno;
295  	bool r0_rdonly;
296  	u32 ret_btf_id;
297  	u64 r0_size;
298  	u32 subprogno;
299  	struct {
300  		u64 value;
301  		bool found;
302  	} arg_constant;
303  
304  	/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
305  	 * generally to pass info about user-defined local kptr types to later
306  	 * verification logic
307  	 *   bpf_obj_drop
308  	 *     Record the local kptr type to be drop'd
309  	 *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
310  	 *     Record the local kptr type to be refcount_incr'd and use
311  	 *     arg_owning_ref to determine whether refcount_acquire should be
312  	 *     fallible
313  	 */
314  	struct btf *arg_btf;
315  	u32 arg_btf_id;
316  	bool arg_owning_ref;
317  
318  	struct {
319  		struct btf_field *field;
320  	} arg_list_head;
321  	struct {
322  		struct btf_field *field;
323  	} arg_rbtree_root;
324  	struct {
325  		enum bpf_dynptr_type type;
326  		u32 id;
327  		u32 ref_obj_id;
328  	} initialized_dynptr;
329  	struct {
330  		u8 spi;
331  		u8 frameno;
332  	} iter;
333  	u64 mem_size;
334  };
335  
336  struct btf *btf_vmlinux;
337  
338  static DEFINE_MUTEX(bpf_verifier_lock);
339  
340  static const struct bpf_line_info *
find_linfo(const struct bpf_verifier_env * env,u32 insn_off)341  find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
342  {
343  	const struct bpf_line_info *linfo;
344  	const struct bpf_prog *prog;
345  	u32 i, nr_linfo;
346  
347  	prog = env->prog;
348  	nr_linfo = prog->aux->nr_linfo;
349  
350  	if (!nr_linfo || insn_off >= prog->len)
351  		return NULL;
352  
353  	linfo = prog->aux->linfo;
354  	for (i = 1; i < nr_linfo; i++)
355  		if (insn_off < linfo[i].insn_off)
356  			break;
357  
358  	return &linfo[i - 1];
359  }
360  
verbose(void * private_data,const char * fmt,...)361  __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
362  {
363  	struct bpf_verifier_env *env = private_data;
364  	va_list args;
365  
366  	if (!bpf_verifier_log_needed(&env->log))
367  		return;
368  
369  	va_start(args, fmt);
370  	bpf_verifier_vlog(&env->log, fmt, args);
371  	va_end(args);
372  }
373  
ltrim(const char * s)374  static const char *ltrim(const char *s)
375  {
376  	while (isspace(*s))
377  		s++;
378  
379  	return s;
380  }
381  
verbose_linfo(struct bpf_verifier_env * env,u32 insn_off,const char * prefix_fmt,...)382  __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
383  					 u32 insn_off,
384  					 const char *prefix_fmt, ...)
385  {
386  	const struct bpf_line_info *linfo;
387  
388  	if (!bpf_verifier_log_needed(&env->log))
389  		return;
390  
391  	linfo = find_linfo(env, insn_off);
392  	if (!linfo || linfo == env->prev_linfo)
393  		return;
394  
395  	if (prefix_fmt) {
396  		va_list args;
397  
398  		va_start(args, prefix_fmt);
399  		bpf_verifier_vlog(&env->log, prefix_fmt, args);
400  		va_end(args);
401  	}
402  
403  	verbose(env, "%s\n",
404  		ltrim(btf_name_by_offset(env->prog->aux->btf,
405  					 linfo->line_off)));
406  
407  	env->prev_linfo = linfo;
408  }
409  
verbose_invalid_scalar(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct tnum * range,const char * ctx,const char * reg_name)410  static void verbose_invalid_scalar(struct bpf_verifier_env *env,
411  				   struct bpf_reg_state *reg,
412  				   struct tnum *range, const char *ctx,
413  				   const char *reg_name)
414  {
415  	char tn_buf[48];
416  
417  	verbose(env, "At %s the register %s ", ctx, reg_name);
418  	if (!tnum_is_unknown(reg->var_off)) {
419  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
420  		verbose(env, "has value %s", tn_buf);
421  	} else {
422  		verbose(env, "has unknown scalar value");
423  	}
424  	tnum_strn(tn_buf, sizeof(tn_buf), *range);
425  	verbose(env, " should have been in %s\n", tn_buf);
426  }
427  
type_is_pkt_pointer(enum bpf_reg_type type)428  static bool type_is_pkt_pointer(enum bpf_reg_type type)
429  {
430  	type = base_type(type);
431  	return type == PTR_TO_PACKET ||
432  	       type == PTR_TO_PACKET_META;
433  }
434  
type_is_sk_pointer(enum bpf_reg_type type)435  static bool type_is_sk_pointer(enum bpf_reg_type type)
436  {
437  	return type == PTR_TO_SOCKET ||
438  		type == PTR_TO_SOCK_COMMON ||
439  		type == PTR_TO_TCP_SOCK ||
440  		type == PTR_TO_XDP_SOCK;
441  }
442  
type_may_be_null(u32 type)443  static bool type_may_be_null(u32 type)
444  {
445  	return type & PTR_MAYBE_NULL;
446  }
447  
reg_not_null(const struct bpf_reg_state * reg)448  static bool reg_not_null(const struct bpf_reg_state *reg)
449  {
450  	enum bpf_reg_type type;
451  
452  	type = reg->type;
453  	if (type_may_be_null(type))
454  		return false;
455  
456  	type = base_type(type);
457  	return type == PTR_TO_SOCKET ||
458  		type == PTR_TO_TCP_SOCK ||
459  		type == PTR_TO_MAP_VALUE ||
460  		type == PTR_TO_MAP_KEY ||
461  		type == PTR_TO_SOCK_COMMON ||
462  		(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
463  		type == PTR_TO_MEM;
464  }
465  
type_is_ptr_alloc_obj(u32 type)466  static bool type_is_ptr_alloc_obj(u32 type)
467  {
468  	return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
469  }
470  
type_is_non_owning_ref(u32 type)471  static bool type_is_non_owning_ref(u32 type)
472  {
473  	return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
474  }
475  
reg_btf_record(const struct bpf_reg_state * reg)476  static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
477  {
478  	struct btf_record *rec = NULL;
479  	struct btf_struct_meta *meta;
480  
481  	if (reg->type == PTR_TO_MAP_VALUE) {
482  		rec = reg->map_ptr->record;
483  	} else if (type_is_ptr_alloc_obj(reg->type)) {
484  		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
485  		if (meta)
486  			rec = meta->record;
487  	}
488  	return rec;
489  }
490  
subprog_is_global(const struct bpf_verifier_env * env,int subprog)491  static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
492  {
493  	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
494  
495  	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
496  }
497  
reg_may_point_to_spin_lock(const struct bpf_reg_state * reg)498  static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
499  {
500  	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
501  }
502  
type_is_rdonly_mem(u32 type)503  static bool type_is_rdonly_mem(u32 type)
504  {
505  	return type & MEM_RDONLY;
506  }
507  
is_acquire_function(enum bpf_func_id func_id,const struct bpf_map * map)508  static bool is_acquire_function(enum bpf_func_id func_id,
509  				const struct bpf_map *map)
510  {
511  	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
512  
513  	if (func_id == BPF_FUNC_sk_lookup_tcp ||
514  	    func_id == BPF_FUNC_sk_lookup_udp ||
515  	    func_id == BPF_FUNC_skc_lookup_tcp ||
516  	    func_id == BPF_FUNC_ringbuf_reserve ||
517  	    func_id == BPF_FUNC_kptr_xchg)
518  		return true;
519  
520  	if (func_id == BPF_FUNC_map_lookup_elem &&
521  	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
522  	     map_type == BPF_MAP_TYPE_SOCKHASH))
523  		return true;
524  
525  	return false;
526  }
527  
is_ptr_cast_function(enum bpf_func_id func_id)528  static bool is_ptr_cast_function(enum bpf_func_id func_id)
529  {
530  	return func_id == BPF_FUNC_tcp_sock ||
531  		func_id == BPF_FUNC_sk_fullsock ||
532  		func_id == BPF_FUNC_skc_to_tcp_sock ||
533  		func_id == BPF_FUNC_skc_to_tcp6_sock ||
534  		func_id == BPF_FUNC_skc_to_udp6_sock ||
535  		func_id == BPF_FUNC_skc_to_mptcp_sock ||
536  		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
537  		func_id == BPF_FUNC_skc_to_tcp_request_sock;
538  }
539  
is_dynptr_ref_function(enum bpf_func_id func_id)540  static bool is_dynptr_ref_function(enum bpf_func_id func_id)
541  {
542  	return func_id == BPF_FUNC_dynptr_data;
543  }
544  
545  static bool is_callback_calling_kfunc(u32 btf_id);
546  
is_callback_calling_function(enum bpf_func_id func_id)547  static bool is_callback_calling_function(enum bpf_func_id func_id)
548  {
549  	return func_id == BPF_FUNC_for_each_map_elem ||
550  	       func_id == BPF_FUNC_timer_set_callback ||
551  	       func_id == BPF_FUNC_find_vma ||
552  	       func_id == BPF_FUNC_loop ||
553  	       func_id == BPF_FUNC_user_ringbuf_drain;
554  }
555  
is_async_callback_calling_function(enum bpf_func_id func_id)556  static bool is_async_callback_calling_function(enum bpf_func_id func_id)
557  {
558  	return func_id == BPF_FUNC_timer_set_callback;
559  }
560  
is_storage_get_function(enum bpf_func_id func_id)561  static bool is_storage_get_function(enum bpf_func_id func_id)
562  {
563  	return func_id == BPF_FUNC_sk_storage_get ||
564  	       func_id == BPF_FUNC_inode_storage_get ||
565  	       func_id == BPF_FUNC_task_storage_get ||
566  	       func_id == BPF_FUNC_cgrp_storage_get;
567  }
568  
helper_multiple_ref_obj_use(enum bpf_func_id func_id,const struct bpf_map * map)569  static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
570  					const struct bpf_map *map)
571  {
572  	int ref_obj_uses = 0;
573  
574  	if (is_ptr_cast_function(func_id))
575  		ref_obj_uses++;
576  	if (is_acquire_function(func_id, map))
577  		ref_obj_uses++;
578  	if (is_dynptr_ref_function(func_id))
579  		ref_obj_uses++;
580  
581  	return ref_obj_uses > 1;
582  }
583  
is_cmpxchg_insn(const struct bpf_insn * insn)584  static bool is_cmpxchg_insn(const struct bpf_insn *insn)
585  {
586  	return BPF_CLASS(insn->code) == BPF_STX &&
587  	       BPF_MODE(insn->code) == BPF_ATOMIC &&
588  	       insn->imm == BPF_CMPXCHG;
589  }
590  
591  /* string representation of 'enum bpf_reg_type'
592   *
593   * Note that reg_type_str() can not appear more than once in a single verbose()
594   * statement.
595   */
reg_type_str(struct bpf_verifier_env * env,enum bpf_reg_type type)596  static const char *reg_type_str(struct bpf_verifier_env *env,
597  				enum bpf_reg_type type)
598  {
599  	char postfix[16] = {0}, prefix[64] = {0};
600  	static const char * const str[] = {
601  		[NOT_INIT]		= "?",
602  		[SCALAR_VALUE]		= "scalar",
603  		[PTR_TO_CTX]		= "ctx",
604  		[CONST_PTR_TO_MAP]	= "map_ptr",
605  		[PTR_TO_MAP_VALUE]	= "map_value",
606  		[PTR_TO_STACK]		= "fp",
607  		[PTR_TO_PACKET]		= "pkt",
608  		[PTR_TO_PACKET_META]	= "pkt_meta",
609  		[PTR_TO_PACKET_END]	= "pkt_end",
610  		[PTR_TO_FLOW_KEYS]	= "flow_keys",
611  		[PTR_TO_SOCKET]		= "sock",
612  		[PTR_TO_SOCK_COMMON]	= "sock_common",
613  		[PTR_TO_TCP_SOCK]	= "tcp_sock",
614  		[PTR_TO_TP_BUFFER]	= "tp_buffer",
615  		[PTR_TO_XDP_SOCK]	= "xdp_sock",
616  		[PTR_TO_BTF_ID]		= "ptr_",
617  		[PTR_TO_MEM]		= "mem",
618  		[PTR_TO_BUF]		= "buf",
619  		[PTR_TO_FUNC]		= "func",
620  		[PTR_TO_MAP_KEY]	= "map_key",
621  		[CONST_PTR_TO_DYNPTR]	= "dynptr_ptr",
622  	};
623  
624  	if (type & PTR_MAYBE_NULL) {
625  		if (base_type(type) == PTR_TO_BTF_ID)
626  			strncpy(postfix, "or_null_", 16);
627  		else
628  			strncpy(postfix, "_or_null", 16);
629  	}
630  
631  	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
632  		 type & MEM_RDONLY ? "rdonly_" : "",
633  		 type & MEM_RINGBUF ? "ringbuf_" : "",
634  		 type & MEM_USER ? "user_" : "",
635  		 type & MEM_PERCPU ? "percpu_" : "",
636  		 type & MEM_RCU ? "rcu_" : "",
637  		 type & PTR_UNTRUSTED ? "untrusted_" : "",
638  		 type & PTR_TRUSTED ? "trusted_" : ""
639  	);
640  
641  	snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s",
642  		 prefix, str[base_type(type)], postfix);
643  	return env->tmp_str_buf;
644  }
645  
646  static char slot_type_char[] = {
647  	[STACK_INVALID]	= '?',
648  	[STACK_SPILL]	= 'r',
649  	[STACK_MISC]	= 'm',
650  	[STACK_ZERO]	= '0',
651  	[STACK_DYNPTR]	= 'd',
652  	[STACK_ITER]	= 'i',
653  };
654  
print_liveness(struct bpf_verifier_env * env,enum bpf_reg_liveness live)655  static void print_liveness(struct bpf_verifier_env *env,
656  			   enum bpf_reg_liveness live)
657  {
658  	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
659  	    verbose(env, "_");
660  	if (live & REG_LIVE_READ)
661  		verbose(env, "r");
662  	if (live & REG_LIVE_WRITTEN)
663  		verbose(env, "w");
664  	if (live & REG_LIVE_DONE)
665  		verbose(env, "D");
666  }
667  
__get_spi(s32 off)668  static int __get_spi(s32 off)
669  {
670  	return (-off - 1) / BPF_REG_SIZE;
671  }
672  
func(struct bpf_verifier_env * env,const struct bpf_reg_state * reg)673  static struct bpf_func_state *func(struct bpf_verifier_env *env,
674  				   const struct bpf_reg_state *reg)
675  {
676  	struct bpf_verifier_state *cur = env->cur_state;
677  
678  	return cur->frame[reg->frameno];
679  }
680  
is_spi_bounds_valid(struct bpf_func_state * state,int spi,int nr_slots)681  static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
682  {
683         int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
684  
685         /* We need to check that slots between [spi - nr_slots + 1, spi] are
686  	* within [0, allocated_stack).
687  	*
688  	* Please note that the spi grows downwards. For example, a dynptr
689  	* takes the size of two stack slots; the first slot will be at
690  	* spi and the second slot will be at spi - 1.
691  	*/
692         return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
693  }
694  
stack_slot_obj_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * obj_kind,int nr_slots)695  static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
696  			          const char *obj_kind, int nr_slots)
697  {
698  	int off, spi;
699  
700  	if (!tnum_is_const(reg->var_off)) {
701  		verbose(env, "%s has to be at a constant offset\n", obj_kind);
702  		return -EINVAL;
703  	}
704  
705  	off = reg->off + reg->var_off.value;
706  	if (off % BPF_REG_SIZE) {
707  		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
708  		return -EINVAL;
709  	}
710  
711  	spi = __get_spi(off);
712  	if (spi + 1 < nr_slots) {
713  		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
714  		return -EINVAL;
715  	}
716  
717  	if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
718  		return -ERANGE;
719  	return spi;
720  }
721  
dynptr_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)722  static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
723  {
724  	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
725  }
726  
iter_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)727  static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
728  {
729  	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
730  }
731  
btf_type_name(const struct btf * btf,u32 id)732  static const char *btf_type_name(const struct btf *btf, u32 id)
733  {
734  	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
735  }
736  
dynptr_type_str(enum bpf_dynptr_type type)737  static const char *dynptr_type_str(enum bpf_dynptr_type type)
738  {
739  	switch (type) {
740  	case BPF_DYNPTR_TYPE_LOCAL:
741  		return "local";
742  	case BPF_DYNPTR_TYPE_RINGBUF:
743  		return "ringbuf";
744  	case BPF_DYNPTR_TYPE_SKB:
745  		return "skb";
746  	case BPF_DYNPTR_TYPE_XDP:
747  		return "xdp";
748  	case BPF_DYNPTR_TYPE_INVALID:
749  		return "<invalid>";
750  	default:
751  		WARN_ONCE(1, "unknown dynptr type %d\n", type);
752  		return "<unknown>";
753  	}
754  }
755  
iter_type_str(const struct btf * btf,u32 btf_id)756  static const char *iter_type_str(const struct btf *btf, u32 btf_id)
757  {
758  	if (!btf || btf_id == 0)
759  		return "<invalid>";
760  
761  	/* we already validated that type is valid and has conforming name */
762  	return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1;
763  }
764  
iter_state_str(enum bpf_iter_state state)765  static const char *iter_state_str(enum bpf_iter_state state)
766  {
767  	switch (state) {
768  	case BPF_ITER_STATE_ACTIVE:
769  		return "active";
770  	case BPF_ITER_STATE_DRAINED:
771  		return "drained";
772  	case BPF_ITER_STATE_INVALID:
773  		return "<invalid>";
774  	default:
775  		WARN_ONCE(1, "unknown iter state %d\n", state);
776  		return "<unknown>";
777  	}
778  }
779  
mark_reg_scratched(struct bpf_verifier_env * env,u32 regno)780  static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
781  {
782  	env->scratched_regs |= 1U << regno;
783  }
784  
mark_stack_slot_scratched(struct bpf_verifier_env * env,u32 spi)785  static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
786  {
787  	env->scratched_stack_slots |= 1ULL << spi;
788  }
789  
reg_scratched(const struct bpf_verifier_env * env,u32 regno)790  static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
791  {
792  	return (env->scratched_regs >> regno) & 1;
793  }
794  
stack_slot_scratched(const struct bpf_verifier_env * env,u64 regno)795  static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
796  {
797  	return (env->scratched_stack_slots >> regno) & 1;
798  }
799  
verifier_state_scratched(const struct bpf_verifier_env * env)800  static bool verifier_state_scratched(const struct bpf_verifier_env *env)
801  {
802  	return env->scratched_regs || env->scratched_stack_slots;
803  }
804  
mark_verifier_state_clean(struct bpf_verifier_env * env)805  static void mark_verifier_state_clean(struct bpf_verifier_env *env)
806  {
807  	env->scratched_regs = 0U;
808  	env->scratched_stack_slots = 0ULL;
809  }
810  
811  /* Used for printing the entire verifier state. */
mark_verifier_state_scratched(struct bpf_verifier_env * env)812  static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
813  {
814  	env->scratched_regs = ~0U;
815  	env->scratched_stack_slots = ~0ULL;
816  }
817  
arg_to_dynptr_type(enum bpf_arg_type arg_type)818  static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
819  {
820  	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
821  	case DYNPTR_TYPE_LOCAL:
822  		return BPF_DYNPTR_TYPE_LOCAL;
823  	case DYNPTR_TYPE_RINGBUF:
824  		return BPF_DYNPTR_TYPE_RINGBUF;
825  	case DYNPTR_TYPE_SKB:
826  		return BPF_DYNPTR_TYPE_SKB;
827  	case DYNPTR_TYPE_XDP:
828  		return BPF_DYNPTR_TYPE_XDP;
829  	default:
830  		return BPF_DYNPTR_TYPE_INVALID;
831  	}
832  }
833  
get_dynptr_type_flag(enum bpf_dynptr_type type)834  static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
835  {
836  	switch (type) {
837  	case BPF_DYNPTR_TYPE_LOCAL:
838  		return DYNPTR_TYPE_LOCAL;
839  	case BPF_DYNPTR_TYPE_RINGBUF:
840  		return DYNPTR_TYPE_RINGBUF;
841  	case BPF_DYNPTR_TYPE_SKB:
842  		return DYNPTR_TYPE_SKB;
843  	case BPF_DYNPTR_TYPE_XDP:
844  		return DYNPTR_TYPE_XDP;
845  	default:
846  		return 0;
847  	}
848  }
849  
dynptr_type_refcounted(enum bpf_dynptr_type type)850  static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
851  {
852  	return type == BPF_DYNPTR_TYPE_RINGBUF;
853  }
854  
855  static void __mark_dynptr_reg(struct bpf_reg_state *reg,
856  			      enum bpf_dynptr_type type,
857  			      bool first_slot, int dynptr_id);
858  
859  static void __mark_reg_not_init(const struct bpf_verifier_env *env,
860  				struct bpf_reg_state *reg);
861  
mark_dynptr_stack_regs(struct bpf_verifier_env * env,struct bpf_reg_state * sreg1,struct bpf_reg_state * sreg2,enum bpf_dynptr_type type)862  static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
863  				   struct bpf_reg_state *sreg1,
864  				   struct bpf_reg_state *sreg2,
865  				   enum bpf_dynptr_type type)
866  {
867  	int id = ++env->id_gen;
868  
869  	__mark_dynptr_reg(sreg1, type, true, id);
870  	__mark_dynptr_reg(sreg2, type, false, id);
871  }
872  
mark_dynptr_cb_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_dynptr_type type)873  static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
874  			       struct bpf_reg_state *reg,
875  			       enum bpf_dynptr_type type)
876  {
877  	__mark_dynptr_reg(reg, type, true, ++env->id_gen);
878  }
879  
880  static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
881  				        struct bpf_func_state *state, int spi);
882  
mark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type,int insn_idx,int clone_ref_obj_id)883  static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
884  				   enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
885  {
886  	struct bpf_func_state *state = func(env, reg);
887  	enum bpf_dynptr_type type;
888  	int spi, i, err;
889  
890  	spi = dynptr_get_spi(env, reg);
891  	if (spi < 0)
892  		return spi;
893  
894  	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
895  	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
896  	 * to ensure that for the following example:
897  	 *	[d1][d1][d2][d2]
898  	 * spi    3   2   1   0
899  	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
900  	 * case they do belong to same dynptr, second call won't see slot_type
901  	 * as STACK_DYNPTR and will simply skip destruction.
902  	 */
903  	err = destroy_if_dynptr_stack_slot(env, state, spi);
904  	if (err)
905  		return err;
906  	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
907  	if (err)
908  		return err;
909  
910  	for (i = 0; i < BPF_REG_SIZE; i++) {
911  		state->stack[spi].slot_type[i] = STACK_DYNPTR;
912  		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
913  	}
914  
915  	type = arg_to_dynptr_type(arg_type);
916  	if (type == BPF_DYNPTR_TYPE_INVALID)
917  		return -EINVAL;
918  
919  	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
920  			       &state->stack[spi - 1].spilled_ptr, type);
921  
922  	if (dynptr_type_refcounted(type)) {
923  		/* The id is used to track proper releasing */
924  		int id;
925  
926  		if (clone_ref_obj_id)
927  			id = clone_ref_obj_id;
928  		else
929  			id = acquire_reference_state(env, insn_idx);
930  
931  		if (id < 0)
932  			return id;
933  
934  		state->stack[spi].spilled_ptr.ref_obj_id = id;
935  		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
936  	}
937  
938  	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
939  	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
940  
941  	return 0;
942  }
943  
invalidate_dynptr(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)944  static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
945  {
946  	int i;
947  
948  	for (i = 0; i < BPF_REG_SIZE; i++) {
949  		state->stack[spi].slot_type[i] = STACK_INVALID;
950  		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
951  	}
952  
953  	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
954  	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
955  
956  	/* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
957  	 *
958  	 * While we don't allow reading STACK_INVALID, it is still possible to
959  	 * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
960  	 * helpers or insns can do partial read of that part without failing,
961  	 * but check_stack_range_initialized, check_stack_read_var_off, and
962  	 * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
963  	 * the slot conservatively. Hence we need to prevent those liveness
964  	 * marking walks.
965  	 *
966  	 * This was not a problem before because STACK_INVALID is only set by
967  	 * default (where the default reg state has its reg->parent as NULL), or
968  	 * in clean_live_states after REG_LIVE_DONE (at which point
969  	 * mark_reg_read won't walk reg->parent chain), but not randomly during
970  	 * verifier state exploration (like we did above). Hence, for our case
971  	 * parentage chain will still be live (i.e. reg->parent may be
972  	 * non-NULL), while earlier reg->parent was NULL, so we need
973  	 * REG_LIVE_WRITTEN to screen off read marker propagation when it is
974  	 * done later on reads or by mark_dynptr_read as well to unnecessary
975  	 * mark registers in verifier state.
976  	 */
977  	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
978  	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
979  }
980  
unmark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg)981  static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
982  {
983  	struct bpf_func_state *state = func(env, reg);
984  	int spi, ref_obj_id, i;
985  
986  	spi = dynptr_get_spi(env, reg);
987  	if (spi < 0)
988  		return spi;
989  
990  	if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
991  		invalidate_dynptr(env, state, spi);
992  		return 0;
993  	}
994  
995  	ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
996  
997  	/* If the dynptr has a ref_obj_id, then we need to invalidate
998  	 * two things:
999  	 *
1000  	 * 1) Any dynptrs with a matching ref_obj_id (clones)
1001  	 * 2) Any slices derived from this dynptr.
1002  	 */
1003  
1004  	/* Invalidate any slices associated with this dynptr */
1005  	WARN_ON_ONCE(release_reference(env, ref_obj_id));
1006  
1007  	/* Invalidate any dynptr clones */
1008  	for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1009  		if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
1010  			continue;
1011  
1012  		/* it should always be the case that if the ref obj id
1013  		 * matches then the stack slot also belongs to a
1014  		 * dynptr
1015  		 */
1016  		if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
1017  			verbose(env, "verifier internal error: misconfigured ref_obj_id\n");
1018  			return -EFAULT;
1019  		}
1020  		if (state->stack[i].spilled_ptr.dynptr.first_slot)
1021  			invalidate_dynptr(env, state, i);
1022  	}
1023  
1024  	return 0;
1025  }
1026  
1027  static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1028  			       struct bpf_reg_state *reg);
1029  
mark_reg_invalid(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)1030  static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1031  {
1032  	if (!env->allow_ptr_leaks)
1033  		__mark_reg_not_init(env, reg);
1034  	else
1035  		__mark_reg_unknown(env, reg);
1036  }
1037  
destroy_if_dynptr_stack_slot(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)1038  static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
1039  				        struct bpf_func_state *state, int spi)
1040  {
1041  	struct bpf_func_state *fstate;
1042  	struct bpf_reg_state *dreg;
1043  	int i, dynptr_id;
1044  
1045  	/* We always ensure that STACK_DYNPTR is never set partially,
1046  	 * hence just checking for slot_type[0] is enough. This is
1047  	 * different for STACK_SPILL, where it may be only set for
1048  	 * 1 byte, so code has to use is_spilled_reg.
1049  	 */
1050  	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
1051  		return 0;
1052  
1053  	/* Reposition spi to first slot */
1054  	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1055  		spi = spi + 1;
1056  
1057  	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
1058  		verbose(env, "cannot overwrite referenced dynptr\n");
1059  		return -EINVAL;
1060  	}
1061  
1062  	mark_stack_slot_scratched(env, spi);
1063  	mark_stack_slot_scratched(env, spi - 1);
1064  
1065  	/* Writing partially to one dynptr stack slot destroys both. */
1066  	for (i = 0; i < BPF_REG_SIZE; i++) {
1067  		state->stack[spi].slot_type[i] = STACK_INVALID;
1068  		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
1069  	}
1070  
1071  	dynptr_id = state->stack[spi].spilled_ptr.id;
1072  	/* Invalidate any slices associated with this dynptr */
1073  	bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
1074  		/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
1075  		if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
1076  			continue;
1077  		if (dreg->dynptr_id == dynptr_id)
1078  			mark_reg_invalid(env, dreg);
1079  	}));
1080  
1081  	/* Do not release reference state, we are destroying dynptr on stack,
1082  	 * not using some helper to release it. Just reset register.
1083  	 */
1084  	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
1085  	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
1086  
1087  	/* Same reason as unmark_stack_slots_dynptr above */
1088  	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1089  	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
1090  
1091  	return 0;
1092  }
1093  
is_dynptr_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1094  static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1095  {
1096  	int spi;
1097  
1098  	if (reg->type == CONST_PTR_TO_DYNPTR)
1099  		return false;
1100  
1101  	spi = dynptr_get_spi(env, reg);
1102  
1103  	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
1104  	 * error because this just means the stack state hasn't been updated yet.
1105  	 * We will do check_mem_access to check and update stack bounds later.
1106  	 */
1107  	if (spi < 0 && spi != -ERANGE)
1108  		return false;
1109  
1110  	/* We don't need to check if the stack slots are marked by previous
1111  	 * dynptr initializations because we allow overwriting existing unreferenced
1112  	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
1113  	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
1114  	 * touching are completely destructed before we reinitialize them for a new
1115  	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
1116  	 * instead of delaying it until the end where the user will get "Unreleased
1117  	 * reference" error.
1118  	 */
1119  	return true;
1120  }
1121  
is_dynptr_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1122  static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1123  {
1124  	struct bpf_func_state *state = func(env, reg);
1125  	int i, spi;
1126  
1127  	/* This already represents first slot of initialized bpf_dynptr.
1128  	 *
1129  	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
1130  	 * check_func_arg_reg_off's logic, so we don't need to check its
1131  	 * offset and alignment.
1132  	 */
1133  	if (reg->type == CONST_PTR_TO_DYNPTR)
1134  		return true;
1135  
1136  	spi = dynptr_get_spi(env, reg);
1137  	if (spi < 0)
1138  		return false;
1139  	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1140  		return false;
1141  
1142  	for (i = 0; i < BPF_REG_SIZE; i++) {
1143  		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
1144  		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
1145  			return false;
1146  	}
1147  
1148  	return true;
1149  }
1150  
is_dynptr_type_expected(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type)1151  static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1152  				    enum bpf_arg_type arg_type)
1153  {
1154  	struct bpf_func_state *state = func(env, reg);
1155  	enum bpf_dynptr_type dynptr_type;
1156  	int spi;
1157  
1158  	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
1159  	if (arg_type == ARG_PTR_TO_DYNPTR)
1160  		return true;
1161  
1162  	dynptr_type = arg_to_dynptr_type(arg_type);
1163  	if (reg->type == CONST_PTR_TO_DYNPTR) {
1164  		return reg->dynptr.type == dynptr_type;
1165  	} else {
1166  		spi = dynptr_get_spi(env, reg);
1167  		if (spi < 0)
1168  			return false;
1169  		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1170  	}
1171  }
1172  
1173  static void __mark_reg_known_zero(struct bpf_reg_state *reg);
1174  
mark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int insn_idx,struct btf * btf,u32 btf_id,int nr_slots)1175  static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1176  				 struct bpf_reg_state *reg, int insn_idx,
1177  				 struct btf *btf, u32 btf_id, int nr_slots)
1178  {
1179  	struct bpf_func_state *state = func(env, reg);
1180  	int spi, i, j, id;
1181  
1182  	spi = iter_get_spi(env, reg, nr_slots);
1183  	if (spi < 0)
1184  		return spi;
1185  
1186  	id = acquire_reference_state(env, insn_idx);
1187  	if (id < 0)
1188  		return id;
1189  
1190  	for (i = 0; i < nr_slots; i++) {
1191  		struct bpf_stack_state *slot = &state->stack[spi - i];
1192  		struct bpf_reg_state *st = &slot->spilled_ptr;
1193  
1194  		__mark_reg_known_zero(st);
1195  		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1196  		st->live |= REG_LIVE_WRITTEN;
1197  		st->ref_obj_id = i == 0 ? id : 0;
1198  		st->iter.btf = btf;
1199  		st->iter.btf_id = btf_id;
1200  		st->iter.state = BPF_ITER_STATE_ACTIVE;
1201  		st->iter.depth = 0;
1202  
1203  		for (j = 0; j < BPF_REG_SIZE; j++)
1204  			slot->slot_type[j] = STACK_ITER;
1205  
1206  		mark_stack_slot_scratched(env, spi - i);
1207  	}
1208  
1209  	return 0;
1210  }
1211  
unmark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1212  static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1213  				   struct bpf_reg_state *reg, int nr_slots)
1214  {
1215  	struct bpf_func_state *state = func(env, reg);
1216  	int spi, i, j;
1217  
1218  	spi = iter_get_spi(env, reg, nr_slots);
1219  	if (spi < 0)
1220  		return spi;
1221  
1222  	for (i = 0; i < nr_slots; i++) {
1223  		struct bpf_stack_state *slot = &state->stack[spi - i];
1224  		struct bpf_reg_state *st = &slot->spilled_ptr;
1225  
1226  		if (i == 0)
1227  			WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1228  
1229  		__mark_reg_not_init(env, st);
1230  
1231  		/* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1232  		st->live |= REG_LIVE_WRITTEN;
1233  
1234  		for (j = 0; j < BPF_REG_SIZE; j++)
1235  			slot->slot_type[j] = STACK_INVALID;
1236  
1237  		mark_stack_slot_scratched(env, spi - i);
1238  	}
1239  
1240  	return 0;
1241  }
1242  
is_iter_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1243  static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1244  				     struct bpf_reg_state *reg, int nr_slots)
1245  {
1246  	struct bpf_func_state *state = func(env, reg);
1247  	int spi, i, j;
1248  
1249  	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1250  	 * will do check_mem_access to check and update stack bounds later, so
1251  	 * return true for that case.
1252  	 */
1253  	spi = iter_get_spi(env, reg, nr_slots);
1254  	if (spi == -ERANGE)
1255  		return true;
1256  	if (spi < 0)
1257  		return false;
1258  
1259  	for (i = 0; i < nr_slots; i++) {
1260  		struct bpf_stack_state *slot = &state->stack[spi - i];
1261  
1262  		for (j = 0; j < BPF_REG_SIZE; j++)
1263  			if (slot->slot_type[j] == STACK_ITER)
1264  				return false;
1265  	}
1266  
1267  	return true;
1268  }
1269  
is_iter_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct btf * btf,u32 btf_id,int nr_slots)1270  static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1271  				   struct btf *btf, u32 btf_id, int nr_slots)
1272  {
1273  	struct bpf_func_state *state = func(env, reg);
1274  	int spi, i, j;
1275  
1276  	spi = iter_get_spi(env, reg, nr_slots);
1277  	if (spi < 0)
1278  		return false;
1279  
1280  	for (i = 0; i < nr_slots; i++) {
1281  		struct bpf_stack_state *slot = &state->stack[spi - i];
1282  		struct bpf_reg_state *st = &slot->spilled_ptr;
1283  
1284  		/* only main (first) slot has ref_obj_id set */
1285  		if (i == 0 && !st->ref_obj_id)
1286  			return false;
1287  		if (i != 0 && st->ref_obj_id)
1288  			return false;
1289  		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1290  			return false;
1291  
1292  		for (j = 0; j < BPF_REG_SIZE; j++)
1293  			if (slot->slot_type[j] != STACK_ITER)
1294  				return false;
1295  	}
1296  
1297  	return true;
1298  }
1299  
1300  /* Check if given stack slot is "special":
1301   *   - spilled register state (STACK_SPILL);
1302   *   - dynptr state (STACK_DYNPTR);
1303   *   - iter state (STACK_ITER).
1304   */
is_stack_slot_special(const struct bpf_stack_state * stack)1305  static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1306  {
1307  	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1308  
1309  	switch (type) {
1310  	case STACK_SPILL:
1311  	case STACK_DYNPTR:
1312  	case STACK_ITER:
1313  		return true;
1314  	case STACK_INVALID:
1315  	case STACK_MISC:
1316  	case STACK_ZERO:
1317  		return false;
1318  	default:
1319  		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1320  		return true;
1321  	}
1322  }
1323  
1324  /* The reg state of a pointer or a bounded scalar was saved when
1325   * it was spilled to the stack.
1326   */
is_spilled_reg(const struct bpf_stack_state * stack)1327  static bool is_spilled_reg(const struct bpf_stack_state *stack)
1328  {
1329  	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1330  }
1331  
is_spilled_scalar_reg(const struct bpf_stack_state * stack)1332  static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
1333  {
1334  	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
1335  	       stack->spilled_ptr.type == SCALAR_VALUE;
1336  }
1337  
scrub_spilled_slot(u8 * stype)1338  static void scrub_spilled_slot(u8 *stype)
1339  {
1340  	if (*stype != STACK_INVALID)
1341  		*stype = STACK_MISC;
1342  }
1343  
print_verifier_state(struct bpf_verifier_env * env,const struct bpf_func_state * state,bool print_all)1344  static void print_verifier_state(struct bpf_verifier_env *env,
1345  				 const struct bpf_func_state *state,
1346  				 bool print_all)
1347  {
1348  	const struct bpf_reg_state *reg;
1349  	enum bpf_reg_type t;
1350  	int i;
1351  
1352  	if (state->frameno)
1353  		verbose(env, " frame%d:", state->frameno);
1354  	for (i = 0; i < MAX_BPF_REG; i++) {
1355  		reg = &state->regs[i];
1356  		t = reg->type;
1357  		if (t == NOT_INIT)
1358  			continue;
1359  		if (!print_all && !reg_scratched(env, i))
1360  			continue;
1361  		verbose(env, " R%d", i);
1362  		print_liveness(env, reg->live);
1363  		verbose(env, "=");
1364  		if (t == SCALAR_VALUE && reg->precise)
1365  			verbose(env, "P");
1366  		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
1367  		    tnum_is_const(reg->var_off)) {
1368  			/* reg->off should be 0 for SCALAR_VALUE */
1369  			verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1370  			verbose(env, "%lld", reg->var_off.value + reg->off);
1371  		} else {
1372  			const char *sep = "";
1373  
1374  			verbose(env, "%s", reg_type_str(env, t));
1375  			if (base_type(t) == PTR_TO_BTF_ID)
1376  				verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
1377  			verbose(env, "(");
1378  /*
1379   * _a stands for append, was shortened to avoid multiline statements below.
1380   * This macro is used to output a comma separated list of attributes.
1381   */
1382  #define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
1383  
1384  			if (reg->id)
1385  				verbose_a("id=%d", reg->id);
1386  			if (reg->ref_obj_id)
1387  				verbose_a("ref_obj_id=%d", reg->ref_obj_id);
1388  			if (type_is_non_owning_ref(reg->type))
1389  				verbose_a("%s", "non_own_ref");
1390  			if (t != SCALAR_VALUE)
1391  				verbose_a("off=%d", reg->off);
1392  			if (type_is_pkt_pointer(t))
1393  				verbose_a("r=%d", reg->range);
1394  			else if (base_type(t) == CONST_PTR_TO_MAP ||
1395  				 base_type(t) == PTR_TO_MAP_KEY ||
1396  				 base_type(t) == PTR_TO_MAP_VALUE)
1397  				verbose_a("ks=%d,vs=%d",
1398  					  reg->map_ptr->key_size,
1399  					  reg->map_ptr->value_size);
1400  			if (tnum_is_const(reg->var_off)) {
1401  				/* Typically an immediate SCALAR_VALUE, but
1402  				 * could be a pointer whose offset is too big
1403  				 * for reg->off
1404  				 */
1405  				verbose_a("imm=%llx", reg->var_off.value);
1406  			} else {
1407  				if (reg->smin_value != reg->umin_value &&
1408  				    reg->smin_value != S64_MIN)
1409  					verbose_a("smin=%lld", (long long)reg->smin_value);
1410  				if (reg->smax_value != reg->umax_value &&
1411  				    reg->smax_value != S64_MAX)
1412  					verbose_a("smax=%lld", (long long)reg->smax_value);
1413  				if (reg->umin_value != 0)
1414  					verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
1415  				if (reg->umax_value != U64_MAX)
1416  					verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
1417  				if (!tnum_is_unknown(reg->var_off)) {
1418  					char tn_buf[48];
1419  
1420  					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1421  					verbose_a("var_off=%s", tn_buf);
1422  				}
1423  				if (reg->s32_min_value != reg->smin_value &&
1424  				    reg->s32_min_value != S32_MIN)
1425  					verbose_a("s32_min=%d", (int)(reg->s32_min_value));
1426  				if (reg->s32_max_value != reg->smax_value &&
1427  				    reg->s32_max_value != S32_MAX)
1428  					verbose_a("s32_max=%d", (int)(reg->s32_max_value));
1429  				if (reg->u32_min_value != reg->umin_value &&
1430  				    reg->u32_min_value != U32_MIN)
1431  					verbose_a("u32_min=%d", (int)(reg->u32_min_value));
1432  				if (reg->u32_max_value != reg->umax_value &&
1433  				    reg->u32_max_value != U32_MAX)
1434  					verbose_a("u32_max=%d", (int)(reg->u32_max_value));
1435  			}
1436  #undef verbose_a
1437  
1438  			verbose(env, ")");
1439  		}
1440  	}
1441  	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1442  		char types_buf[BPF_REG_SIZE + 1];
1443  		bool valid = false;
1444  		int j;
1445  
1446  		for (j = 0; j < BPF_REG_SIZE; j++) {
1447  			if (state->stack[i].slot_type[j] != STACK_INVALID)
1448  				valid = true;
1449  			types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1450  		}
1451  		types_buf[BPF_REG_SIZE] = 0;
1452  		if (!valid)
1453  			continue;
1454  		if (!print_all && !stack_slot_scratched(env, i))
1455  			continue;
1456  		switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
1457  		case STACK_SPILL:
1458  			reg = &state->stack[i].spilled_ptr;
1459  			t = reg->type;
1460  
1461  			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1462  			print_liveness(env, reg->live);
1463  			verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1464  			if (t == SCALAR_VALUE && reg->precise)
1465  				verbose(env, "P");
1466  			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
1467  				verbose(env, "%lld", reg->var_off.value + reg->off);
1468  			break;
1469  		case STACK_DYNPTR:
1470  			i += BPF_DYNPTR_NR_SLOTS - 1;
1471  			reg = &state->stack[i].spilled_ptr;
1472  
1473  			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1474  			print_liveness(env, reg->live);
1475  			verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
1476  			if (reg->ref_obj_id)
1477  				verbose(env, "(ref_id=%d)", reg->ref_obj_id);
1478  			break;
1479  		case STACK_ITER:
1480  			/* only main slot has ref_obj_id set; skip others */
1481  			reg = &state->stack[i].spilled_ptr;
1482  			if (!reg->ref_obj_id)
1483  				continue;
1484  
1485  			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1486  			print_liveness(env, reg->live);
1487  			verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
1488  				iter_type_str(reg->iter.btf, reg->iter.btf_id),
1489  				reg->ref_obj_id, iter_state_str(reg->iter.state),
1490  				reg->iter.depth);
1491  			break;
1492  		case STACK_MISC:
1493  		case STACK_ZERO:
1494  		default:
1495  			reg = &state->stack[i].spilled_ptr;
1496  
1497  			for (j = 0; j < BPF_REG_SIZE; j++)
1498  				types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1499  			types_buf[BPF_REG_SIZE] = 0;
1500  
1501  			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1502  			print_liveness(env, reg->live);
1503  			verbose(env, "=%s", types_buf);
1504  			break;
1505  		}
1506  	}
1507  	if (state->acquired_refs && state->refs[0].id) {
1508  		verbose(env, " refs=%d", state->refs[0].id);
1509  		for (i = 1; i < state->acquired_refs; i++)
1510  			if (state->refs[i].id)
1511  				verbose(env, ",%d", state->refs[i].id);
1512  	}
1513  	if (state->in_callback_fn)
1514  		verbose(env, " cb");
1515  	if (state->in_async_callback_fn)
1516  		verbose(env, " async_cb");
1517  	verbose(env, "\n");
1518  	mark_verifier_state_clean(env);
1519  }
1520  
vlog_alignment(u32 pos)1521  static inline u32 vlog_alignment(u32 pos)
1522  {
1523  	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
1524  			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
1525  }
1526  
print_insn_state(struct bpf_verifier_env * env,const struct bpf_func_state * state)1527  static void print_insn_state(struct bpf_verifier_env *env,
1528  			     const struct bpf_func_state *state)
1529  {
1530  	if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
1531  		/* remove new line character */
1532  		bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
1533  		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
1534  	} else {
1535  		verbose(env, "%d:", env->insn_idx);
1536  	}
1537  	print_verifier_state(env, state, false);
1538  }
1539  
1540  /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1541   * small to hold src. This is different from krealloc since we don't want to preserve
1542   * the contents of dst.
1543   *
1544   * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1545   * not be allocated.
1546   */
copy_array(void * dst,const void * src,size_t n,size_t size,gfp_t flags)1547  static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1548  {
1549  	size_t alloc_bytes;
1550  	void *orig = dst;
1551  	size_t bytes;
1552  
1553  	if (ZERO_OR_NULL_PTR(src))
1554  		goto out;
1555  
1556  	if (unlikely(check_mul_overflow(n, size, &bytes)))
1557  		return NULL;
1558  
1559  	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1560  	dst = krealloc(orig, alloc_bytes, flags);
1561  	if (!dst) {
1562  		kfree(orig);
1563  		return NULL;
1564  	}
1565  
1566  	memcpy(dst, src, bytes);
1567  out:
1568  	return dst ? dst : ZERO_SIZE_PTR;
1569  }
1570  
1571  /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1572   * small to hold new_n items. new items are zeroed out if the array grows.
1573   *
1574   * Contrary to krealloc_array, does not free arr if new_n is zero.
1575   */
realloc_array(void * arr,size_t old_n,size_t new_n,size_t size)1576  static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1577  {
1578  	size_t alloc_size;
1579  	void *new_arr;
1580  
1581  	if (!new_n || old_n == new_n)
1582  		goto out;
1583  
1584  	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1585  	new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
1586  	if (!new_arr) {
1587  		kfree(arr);
1588  		return NULL;
1589  	}
1590  	arr = new_arr;
1591  
1592  	if (new_n > old_n)
1593  		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1594  
1595  out:
1596  	return arr ? arr : ZERO_SIZE_PTR;
1597  }
1598  
copy_reference_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1599  static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1600  {
1601  	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1602  			       sizeof(struct bpf_reference_state), GFP_KERNEL);
1603  	if (!dst->refs)
1604  		return -ENOMEM;
1605  
1606  	dst->acquired_refs = src->acquired_refs;
1607  	return 0;
1608  }
1609  
copy_stack_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1610  static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1611  {
1612  	size_t n = src->allocated_stack / BPF_REG_SIZE;
1613  
1614  	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1615  				GFP_KERNEL);
1616  	if (!dst->stack)
1617  		return -ENOMEM;
1618  
1619  	dst->allocated_stack = src->allocated_stack;
1620  	return 0;
1621  }
1622  
resize_reference_state(struct bpf_func_state * state,size_t n)1623  static int resize_reference_state(struct bpf_func_state *state, size_t n)
1624  {
1625  	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1626  				    sizeof(struct bpf_reference_state));
1627  	if (!state->refs)
1628  		return -ENOMEM;
1629  
1630  	state->acquired_refs = n;
1631  	return 0;
1632  }
1633  
grow_stack_state(struct bpf_func_state * state,int size)1634  static int grow_stack_state(struct bpf_func_state *state, int size)
1635  {
1636  	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1637  
1638  	if (old_n >= n)
1639  		return 0;
1640  
1641  	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1642  	if (!state->stack)
1643  		return -ENOMEM;
1644  
1645  	state->allocated_stack = size;
1646  	return 0;
1647  }
1648  
1649  /* Acquire a pointer id from the env and update the state->refs to include
1650   * this new pointer reference.
1651   * On success, returns a valid pointer id to associate with the register
1652   * On failure, returns a negative errno.
1653   */
acquire_reference_state(struct bpf_verifier_env * env,int insn_idx)1654  static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1655  {
1656  	struct bpf_func_state *state = cur_func(env);
1657  	int new_ofs = state->acquired_refs;
1658  	int id, err;
1659  
1660  	err = resize_reference_state(state, state->acquired_refs + 1);
1661  	if (err)
1662  		return err;
1663  	id = ++env->id_gen;
1664  	state->refs[new_ofs].id = id;
1665  	state->refs[new_ofs].insn_idx = insn_idx;
1666  	state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
1667  
1668  	return id;
1669  }
1670  
1671  /* release function corresponding to acquire_reference_state(). Idempotent. */
release_reference_state(struct bpf_func_state * state,int ptr_id)1672  static int release_reference_state(struct bpf_func_state *state, int ptr_id)
1673  {
1674  	int i, last_idx;
1675  
1676  	last_idx = state->acquired_refs - 1;
1677  	for (i = 0; i < state->acquired_refs; i++) {
1678  		if (state->refs[i].id == ptr_id) {
1679  			/* Cannot release caller references in callbacks */
1680  			if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1681  				return -EINVAL;
1682  			if (last_idx && i != last_idx)
1683  				memcpy(&state->refs[i], &state->refs[last_idx],
1684  				       sizeof(*state->refs));
1685  			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1686  			state->acquired_refs--;
1687  			return 0;
1688  		}
1689  	}
1690  	return -EINVAL;
1691  }
1692  
free_func_state(struct bpf_func_state * state)1693  static void free_func_state(struct bpf_func_state *state)
1694  {
1695  	if (!state)
1696  		return;
1697  	kfree(state->refs);
1698  	kfree(state->stack);
1699  	kfree(state);
1700  }
1701  
clear_jmp_history(struct bpf_verifier_state * state)1702  static void clear_jmp_history(struct bpf_verifier_state *state)
1703  {
1704  	kfree(state->jmp_history);
1705  	state->jmp_history = NULL;
1706  	state->jmp_history_cnt = 0;
1707  }
1708  
free_verifier_state(struct bpf_verifier_state * state,bool free_self)1709  static void free_verifier_state(struct bpf_verifier_state *state,
1710  				bool free_self)
1711  {
1712  	int i;
1713  
1714  	for (i = 0; i <= state->curframe; i++) {
1715  		free_func_state(state->frame[i]);
1716  		state->frame[i] = NULL;
1717  	}
1718  	clear_jmp_history(state);
1719  	if (free_self)
1720  		kfree(state);
1721  }
1722  
1723  /* copy verifier state from src to dst growing dst stack space
1724   * when necessary to accommodate larger src stack
1725   */
copy_func_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1726  static int copy_func_state(struct bpf_func_state *dst,
1727  			   const struct bpf_func_state *src)
1728  {
1729  	int err;
1730  
1731  	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1732  	err = copy_reference_state(dst, src);
1733  	if (err)
1734  		return err;
1735  	return copy_stack_state(dst, src);
1736  }
1737  
copy_verifier_state(struct bpf_verifier_state * dst_state,const struct bpf_verifier_state * src)1738  static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1739  			       const struct bpf_verifier_state *src)
1740  {
1741  	struct bpf_func_state *dst;
1742  	int i, err;
1743  
1744  	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1745  					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1746  					    GFP_USER);
1747  	if (!dst_state->jmp_history)
1748  		return -ENOMEM;
1749  	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1750  
1751  	/* if dst has more stack frames then src frame, free them */
1752  	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1753  		free_func_state(dst_state->frame[i]);
1754  		dst_state->frame[i] = NULL;
1755  	}
1756  	dst_state->speculative = src->speculative;
1757  	dst_state->active_rcu_lock = src->active_rcu_lock;
1758  	dst_state->curframe = src->curframe;
1759  	dst_state->active_lock.ptr = src->active_lock.ptr;
1760  	dst_state->active_lock.id = src->active_lock.id;
1761  	dst_state->branches = src->branches;
1762  	dst_state->parent = src->parent;
1763  	dst_state->first_insn_idx = src->first_insn_idx;
1764  	dst_state->last_insn_idx = src->last_insn_idx;
1765  	for (i = 0; i <= src->curframe; i++) {
1766  		dst = dst_state->frame[i];
1767  		if (!dst) {
1768  			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1769  			if (!dst)
1770  				return -ENOMEM;
1771  			dst_state->frame[i] = dst;
1772  		}
1773  		err = copy_func_state(dst, src->frame[i]);
1774  		if (err)
1775  			return err;
1776  	}
1777  	return 0;
1778  }
1779  
update_branch_counts(struct bpf_verifier_env * env,struct bpf_verifier_state * st)1780  static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1781  {
1782  	while (st) {
1783  		u32 br = --st->branches;
1784  
1785  		/* WARN_ON(br > 1) technically makes sense here,
1786  		 * but see comment in push_stack(), hence:
1787  		 */
1788  		WARN_ONCE((int)br < 0,
1789  			  "BUG update_branch_counts:branches_to_explore=%d\n",
1790  			  br);
1791  		if (br)
1792  			break;
1793  		st = st->parent;
1794  	}
1795  }
1796  
pop_stack(struct bpf_verifier_env * env,int * prev_insn_idx,int * insn_idx,bool pop_log)1797  static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1798  		     int *insn_idx, bool pop_log)
1799  {
1800  	struct bpf_verifier_state *cur = env->cur_state;
1801  	struct bpf_verifier_stack_elem *elem, *head = env->head;
1802  	int err;
1803  
1804  	if (env->head == NULL)
1805  		return -ENOENT;
1806  
1807  	if (cur) {
1808  		err = copy_verifier_state(cur, &head->st);
1809  		if (err)
1810  			return err;
1811  	}
1812  	if (pop_log)
1813  		bpf_vlog_reset(&env->log, head->log_pos);
1814  	if (insn_idx)
1815  		*insn_idx = head->insn_idx;
1816  	if (prev_insn_idx)
1817  		*prev_insn_idx = head->prev_insn_idx;
1818  	elem = head->next;
1819  	free_verifier_state(&head->st, false);
1820  	kfree(head);
1821  	env->head = elem;
1822  	env->stack_size--;
1823  	return 0;
1824  }
1825  
push_stack(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,bool speculative)1826  static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1827  					     int insn_idx, int prev_insn_idx,
1828  					     bool speculative)
1829  {
1830  	struct bpf_verifier_state *cur = env->cur_state;
1831  	struct bpf_verifier_stack_elem *elem;
1832  	int err;
1833  
1834  	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1835  	if (!elem)
1836  		goto err;
1837  
1838  	elem->insn_idx = insn_idx;
1839  	elem->prev_insn_idx = prev_insn_idx;
1840  	elem->next = env->head;
1841  	elem->log_pos = env->log.end_pos;
1842  	env->head = elem;
1843  	env->stack_size++;
1844  	err = copy_verifier_state(&elem->st, cur);
1845  	if (err)
1846  		goto err;
1847  	elem->st.speculative |= speculative;
1848  	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1849  		verbose(env, "The sequence of %d jumps is too complex.\n",
1850  			env->stack_size);
1851  		goto err;
1852  	}
1853  	if (elem->st.parent) {
1854  		++elem->st.parent->branches;
1855  		/* WARN_ON(branches > 2) technically makes sense here,
1856  		 * but
1857  		 * 1. speculative states will bump 'branches' for non-branch
1858  		 * instructions
1859  		 * 2. is_state_visited() heuristics may decide not to create
1860  		 * a new state for a sequence of branches and all such current
1861  		 * and cloned states will be pointing to a single parent state
1862  		 * which might have large 'branches' count.
1863  		 */
1864  	}
1865  	return &elem->st;
1866  err:
1867  	free_verifier_state(env->cur_state, true);
1868  	env->cur_state = NULL;
1869  	/* pop all elements and return */
1870  	while (!pop_stack(env, NULL, NULL, false));
1871  	return NULL;
1872  }
1873  
1874  #define CALLER_SAVED_REGS 6
1875  static const int caller_saved[CALLER_SAVED_REGS] = {
1876  	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1877  };
1878  
1879  /* This helper doesn't clear reg->id */
___mark_reg_known(struct bpf_reg_state * reg,u64 imm)1880  static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1881  {
1882  	reg->var_off = tnum_const(imm);
1883  	reg->smin_value = (s64)imm;
1884  	reg->smax_value = (s64)imm;
1885  	reg->umin_value = imm;
1886  	reg->umax_value = imm;
1887  
1888  	reg->s32_min_value = (s32)imm;
1889  	reg->s32_max_value = (s32)imm;
1890  	reg->u32_min_value = (u32)imm;
1891  	reg->u32_max_value = (u32)imm;
1892  }
1893  
1894  /* Mark the unknown part of a register (variable offset or scalar value) as
1895   * known to have the value @imm.
1896   */
__mark_reg_known(struct bpf_reg_state * reg,u64 imm)1897  static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1898  {
1899  	/* Clear off and union(map_ptr, range) */
1900  	memset(((u8 *)reg) + sizeof(reg->type), 0,
1901  	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1902  	reg->id = 0;
1903  	reg->ref_obj_id = 0;
1904  	___mark_reg_known(reg, imm);
1905  }
1906  
__mark_reg32_known(struct bpf_reg_state * reg,u64 imm)1907  static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1908  {
1909  	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1910  	reg->s32_min_value = (s32)imm;
1911  	reg->s32_max_value = (s32)imm;
1912  	reg->u32_min_value = (u32)imm;
1913  	reg->u32_max_value = (u32)imm;
1914  }
1915  
1916  /* Mark the 'variable offset' part of a register as zero.  This should be
1917   * used only on registers holding a pointer type.
1918   */
__mark_reg_known_zero(struct bpf_reg_state * reg)1919  static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1920  {
1921  	__mark_reg_known(reg, 0);
1922  }
1923  
__mark_reg_const_zero(struct bpf_reg_state * reg)1924  static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1925  {
1926  	__mark_reg_known(reg, 0);
1927  	reg->type = SCALAR_VALUE;
1928  }
1929  
mark_reg_known_zero(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)1930  static void mark_reg_known_zero(struct bpf_verifier_env *env,
1931  				struct bpf_reg_state *regs, u32 regno)
1932  {
1933  	if (WARN_ON(regno >= MAX_BPF_REG)) {
1934  		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1935  		/* Something bad happened, let's kill all regs */
1936  		for (regno = 0; regno < MAX_BPF_REG; regno++)
1937  			__mark_reg_not_init(env, regs + regno);
1938  		return;
1939  	}
1940  	__mark_reg_known_zero(regs + regno);
1941  }
1942  
__mark_dynptr_reg(struct bpf_reg_state * reg,enum bpf_dynptr_type type,bool first_slot,int dynptr_id)1943  static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1944  			      bool first_slot, int dynptr_id)
1945  {
1946  	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1947  	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1948  	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1949  	 */
1950  	__mark_reg_known_zero(reg);
1951  	reg->type = CONST_PTR_TO_DYNPTR;
1952  	/* Give each dynptr a unique id to uniquely associate slices to it. */
1953  	reg->id = dynptr_id;
1954  	reg->dynptr.type = type;
1955  	reg->dynptr.first_slot = first_slot;
1956  }
1957  
mark_ptr_not_null_reg(struct bpf_reg_state * reg)1958  static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1959  {
1960  	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1961  		const struct bpf_map *map = reg->map_ptr;
1962  
1963  		if (map->inner_map_meta) {
1964  			reg->type = CONST_PTR_TO_MAP;
1965  			reg->map_ptr = map->inner_map_meta;
1966  			/* transfer reg's id which is unique for every map_lookup_elem
1967  			 * as UID of the inner map.
1968  			 */
1969  			if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
1970  				reg->map_uid = reg->id;
1971  		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1972  			reg->type = PTR_TO_XDP_SOCK;
1973  		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1974  			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1975  			reg->type = PTR_TO_SOCKET;
1976  		} else {
1977  			reg->type = PTR_TO_MAP_VALUE;
1978  		}
1979  		return;
1980  	}
1981  
1982  	reg->type &= ~PTR_MAYBE_NULL;
1983  }
1984  
mark_reg_graph_node(struct bpf_reg_state * regs,u32 regno,struct btf_field_graph_root * ds_head)1985  static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1986  				struct btf_field_graph_root *ds_head)
1987  {
1988  	__mark_reg_known_zero(&regs[regno]);
1989  	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1990  	regs[regno].btf = ds_head->btf;
1991  	regs[regno].btf_id = ds_head->value_btf_id;
1992  	regs[regno].off = ds_head->node_offset;
1993  }
1994  
reg_is_pkt_pointer(const struct bpf_reg_state * reg)1995  static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1996  {
1997  	return type_is_pkt_pointer(reg->type);
1998  }
1999  
reg_is_pkt_pointer_any(const struct bpf_reg_state * reg)2000  static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
2001  {
2002  	return reg_is_pkt_pointer(reg) ||
2003  	       reg->type == PTR_TO_PACKET_END;
2004  }
2005  
reg_is_dynptr_slice_pkt(const struct bpf_reg_state * reg)2006  static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
2007  {
2008  	return base_type(reg->type) == PTR_TO_MEM &&
2009  		(reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
2010  }
2011  
2012  /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
reg_is_init_pkt_pointer(const struct bpf_reg_state * reg,enum bpf_reg_type which)2013  static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
2014  				    enum bpf_reg_type which)
2015  {
2016  	/* The register can already have a range from prior markings.
2017  	 * This is fine as long as it hasn't been advanced from its
2018  	 * origin.
2019  	 */
2020  	return reg->type == which &&
2021  	       reg->id == 0 &&
2022  	       reg->off == 0 &&
2023  	       tnum_equals_const(reg->var_off, 0);
2024  }
2025  
2026  /* Reset the min/max bounds of a register */
__mark_reg_unbounded(struct bpf_reg_state * reg)2027  static void __mark_reg_unbounded(struct bpf_reg_state *reg)
2028  {
2029  	reg->smin_value = S64_MIN;
2030  	reg->smax_value = S64_MAX;
2031  	reg->umin_value = 0;
2032  	reg->umax_value = U64_MAX;
2033  
2034  	reg->s32_min_value = S32_MIN;
2035  	reg->s32_max_value = S32_MAX;
2036  	reg->u32_min_value = 0;
2037  	reg->u32_max_value = U32_MAX;
2038  }
2039  
__mark_reg64_unbounded(struct bpf_reg_state * reg)2040  static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
2041  {
2042  	reg->smin_value = S64_MIN;
2043  	reg->smax_value = S64_MAX;
2044  	reg->umin_value = 0;
2045  	reg->umax_value = U64_MAX;
2046  }
2047  
__mark_reg32_unbounded(struct bpf_reg_state * reg)2048  static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
2049  {
2050  	reg->s32_min_value = S32_MIN;
2051  	reg->s32_max_value = S32_MAX;
2052  	reg->u32_min_value = 0;
2053  	reg->u32_max_value = U32_MAX;
2054  }
2055  
__update_reg32_bounds(struct bpf_reg_state * reg)2056  static void __update_reg32_bounds(struct bpf_reg_state *reg)
2057  {
2058  	struct tnum var32_off = tnum_subreg(reg->var_off);
2059  
2060  	/* min signed is max(sign bit) | min(other bits) */
2061  	reg->s32_min_value = max_t(s32, reg->s32_min_value,
2062  			var32_off.value | (var32_off.mask & S32_MIN));
2063  	/* max signed is min(sign bit) | max(other bits) */
2064  	reg->s32_max_value = min_t(s32, reg->s32_max_value,
2065  			var32_off.value | (var32_off.mask & S32_MAX));
2066  	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
2067  	reg->u32_max_value = min(reg->u32_max_value,
2068  				 (u32)(var32_off.value | var32_off.mask));
2069  }
2070  
__update_reg64_bounds(struct bpf_reg_state * reg)2071  static void __update_reg64_bounds(struct bpf_reg_state *reg)
2072  {
2073  	/* min signed is max(sign bit) | min(other bits) */
2074  	reg->smin_value = max_t(s64, reg->smin_value,
2075  				reg->var_off.value | (reg->var_off.mask & S64_MIN));
2076  	/* max signed is min(sign bit) | max(other bits) */
2077  	reg->smax_value = min_t(s64, reg->smax_value,
2078  				reg->var_off.value | (reg->var_off.mask & S64_MAX));
2079  	reg->umin_value = max(reg->umin_value, reg->var_off.value);
2080  	reg->umax_value = min(reg->umax_value,
2081  			      reg->var_off.value | reg->var_off.mask);
2082  }
2083  
__update_reg_bounds(struct bpf_reg_state * reg)2084  static void __update_reg_bounds(struct bpf_reg_state *reg)
2085  {
2086  	__update_reg32_bounds(reg);
2087  	__update_reg64_bounds(reg);
2088  }
2089  
2090  /* Uses signed min/max values to inform unsigned, and vice-versa */
__reg32_deduce_bounds(struct bpf_reg_state * reg)2091  static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
2092  {
2093  	/* Learn sign from signed bounds.
2094  	 * If we cannot cross the sign boundary, then signed and unsigned bounds
2095  	 * are the same, so combine.  This works even in the negative case, e.g.
2096  	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2097  	 */
2098  	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
2099  		reg->s32_min_value = reg->u32_min_value =
2100  			max_t(u32, reg->s32_min_value, reg->u32_min_value);
2101  		reg->s32_max_value = reg->u32_max_value =
2102  			min_t(u32, reg->s32_max_value, reg->u32_max_value);
2103  		return;
2104  	}
2105  	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
2106  	 * boundary, so we must be careful.
2107  	 */
2108  	if ((s32)reg->u32_max_value >= 0) {
2109  		/* Positive.  We can't learn anything from the smin, but smax
2110  		 * is positive, hence safe.
2111  		 */
2112  		reg->s32_min_value = reg->u32_min_value;
2113  		reg->s32_max_value = reg->u32_max_value =
2114  			min_t(u32, reg->s32_max_value, reg->u32_max_value);
2115  	} else if ((s32)reg->u32_min_value < 0) {
2116  		/* Negative.  We can't learn anything from the smax, but smin
2117  		 * is negative, hence safe.
2118  		 */
2119  		reg->s32_min_value = reg->u32_min_value =
2120  			max_t(u32, reg->s32_min_value, reg->u32_min_value);
2121  		reg->s32_max_value = reg->u32_max_value;
2122  	}
2123  }
2124  
__reg64_deduce_bounds(struct bpf_reg_state * reg)2125  static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2126  {
2127  	/* Learn sign from signed bounds.
2128  	 * If we cannot cross the sign boundary, then signed and unsigned bounds
2129  	 * are the same, so combine.  This works even in the negative case, e.g.
2130  	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2131  	 */
2132  	if (reg->smin_value >= 0 || reg->smax_value < 0) {
2133  		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
2134  							  reg->umin_value);
2135  		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
2136  							  reg->umax_value);
2137  		return;
2138  	}
2139  	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
2140  	 * boundary, so we must be careful.
2141  	 */
2142  	if ((s64)reg->umax_value >= 0) {
2143  		/* Positive.  We can't learn anything from the smin, but smax
2144  		 * is positive, hence safe.
2145  		 */
2146  		reg->smin_value = reg->umin_value;
2147  		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
2148  							  reg->umax_value);
2149  	} else if ((s64)reg->umin_value < 0) {
2150  		/* Negative.  We can't learn anything from the smax, but smin
2151  		 * is negative, hence safe.
2152  		 */
2153  		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
2154  							  reg->umin_value);
2155  		reg->smax_value = reg->umax_value;
2156  	}
2157  }
2158  
__reg_deduce_bounds(struct bpf_reg_state * reg)2159  static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2160  {
2161  	__reg32_deduce_bounds(reg);
2162  	__reg64_deduce_bounds(reg);
2163  }
2164  
2165  /* Attempts to improve var_off based on unsigned min/max information */
__reg_bound_offset(struct bpf_reg_state * reg)2166  static void __reg_bound_offset(struct bpf_reg_state *reg)
2167  {
2168  	struct tnum var64_off = tnum_intersect(reg->var_off,
2169  					       tnum_range(reg->umin_value,
2170  							  reg->umax_value));
2171  	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2172  					       tnum_range(reg->u32_min_value,
2173  							  reg->u32_max_value));
2174  
2175  	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2176  }
2177  
reg_bounds_sync(struct bpf_reg_state * reg)2178  static void reg_bounds_sync(struct bpf_reg_state *reg)
2179  {
2180  	/* We might have learned new bounds from the var_off. */
2181  	__update_reg_bounds(reg);
2182  	/* We might have learned something about the sign bit. */
2183  	__reg_deduce_bounds(reg);
2184  	/* We might have learned some bits from the bounds. */
2185  	__reg_bound_offset(reg);
2186  	/* Intersecting with the old var_off might have improved our bounds
2187  	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2188  	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2189  	 */
2190  	__update_reg_bounds(reg);
2191  }
2192  
__reg32_bound_s64(s32 a)2193  static bool __reg32_bound_s64(s32 a)
2194  {
2195  	return a >= 0 && a <= S32_MAX;
2196  }
2197  
__reg_assign_32_into_64(struct bpf_reg_state * reg)2198  static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2199  {
2200  	reg->umin_value = reg->u32_min_value;
2201  	reg->umax_value = reg->u32_max_value;
2202  
2203  	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2204  	 * be positive otherwise set to worse case bounds and refine later
2205  	 * from tnum.
2206  	 */
2207  	if (__reg32_bound_s64(reg->s32_min_value) &&
2208  	    __reg32_bound_s64(reg->s32_max_value)) {
2209  		reg->smin_value = reg->s32_min_value;
2210  		reg->smax_value = reg->s32_max_value;
2211  	} else {
2212  		reg->smin_value = 0;
2213  		reg->smax_value = U32_MAX;
2214  	}
2215  }
2216  
__reg_combine_32_into_64(struct bpf_reg_state * reg)2217  static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
2218  {
2219  	/* special case when 64-bit register has upper 32-bit register
2220  	 * zeroed. Typically happens after zext or <<32, >>32 sequence
2221  	 * allowing us to use 32-bit bounds directly,
2222  	 */
2223  	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
2224  		__reg_assign_32_into_64(reg);
2225  	} else {
2226  		/* Otherwise the best we can do is push lower 32bit known and
2227  		 * unknown bits into register (var_off set from jmp logic)
2228  		 * then learn as much as possible from the 64-bit tnum
2229  		 * known and unknown bits. The previous smin/smax bounds are
2230  		 * invalid here because of jmp32 compare so mark them unknown
2231  		 * so they do not impact tnum bounds calculation.
2232  		 */
2233  		__mark_reg64_unbounded(reg);
2234  	}
2235  	reg_bounds_sync(reg);
2236  }
2237  
__reg64_bound_s32(s64 a)2238  static bool __reg64_bound_s32(s64 a)
2239  {
2240  	return a >= S32_MIN && a <= S32_MAX;
2241  }
2242  
__reg64_bound_u32(u64 a)2243  static bool __reg64_bound_u32(u64 a)
2244  {
2245  	return a >= U32_MIN && a <= U32_MAX;
2246  }
2247  
__reg_combine_64_into_32(struct bpf_reg_state * reg)2248  static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
2249  {
2250  	__mark_reg32_unbounded(reg);
2251  	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
2252  		reg->s32_min_value = (s32)reg->smin_value;
2253  		reg->s32_max_value = (s32)reg->smax_value;
2254  	}
2255  	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
2256  		reg->u32_min_value = (u32)reg->umin_value;
2257  		reg->u32_max_value = (u32)reg->umax_value;
2258  	}
2259  	reg_bounds_sync(reg);
2260  }
2261  
2262  /* Mark a register as having a completely unknown (scalar) value. */
__mark_reg_unknown(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2263  static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2264  			       struct bpf_reg_state *reg)
2265  {
2266  	/*
2267  	 * Clear type, off, and union(map_ptr, range) and
2268  	 * padding between 'type' and union
2269  	 */
2270  	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2271  	reg->type = SCALAR_VALUE;
2272  	reg->id = 0;
2273  	reg->ref_obj_id = 0;
2274  	reg->var_off = tnum_unknown;
2275  	reg->frameno = 0;
2276  	reg->precise = !env->bpf_capable;
2277  	__mark_reg_unbounded(reg);
2278  }
2279  
mark_reg_unknown(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2280  static void mark_reg_unknown(struct bpf_verifier_env *env,
2281  			     struct bpf_reg_state *regs, u32 regno)
2282  {
2283  	if (WARN_ON(regno >= MAX_BPF_REG)) {
2284  		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
2285  		/* Something bad happened, let's kill all regs except FP */
2286  		for (regno = 0; regno < BPF_REG_FP; regno++)
2287  			__mark_reg_not_init(env, regs + regno);
2288  		return;
2289  	}
2290  	__mark_reg_unknown(env, regs + regno);
2291  }
2292  
__mark_reg_not_init(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2293  static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2294  				struct bpf_reg_state *reg)
2295  {
2296  	__mark_reg_unknown(env, reg);
2297  	reg->type = NOT_INIT;
2298  }
2299  
mark_reg_not_init(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2300  static void mark_reg_not_init(struct bpf_verifier_env *env,
2301  			      struct bpf_reg_state *regs, u32 regno)
2302  {
2303  	if (WARN_ON(regno >= MAX_BPF_REG)) {
2304  		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
2305  		/* Something bad happened, let's kill all regs except FP */
2306  		for (regno = 0; regno < BPF_REG_FP; regno++)
2307  			__mark_reg_not_init(env, regs + regno);
2308  		return;
2309  	}
2310  	__mark_reg_not_init(env, regs + regno);
2311  }
2312  
mark_btf_ld_reg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum bpf_reg_type reg_type,struct btf * btf,u32 btf_id,enum bpf_type_flag flag)2313  static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2314  			    struct bpf_reg_state *regs, u32 regno,
2315  			    enum bpf_reg_type reg_type,
2316  			    struct btf *btf, u32 btf_id,
2317  			    enum bpf_type_flag flag)
2318  {
2319  	if (reg_type == SCALAR_VALUE) {
2320  		mark_reg_unknown(env, regs, regno);
2321  		return;
2322  	}
2323  	mark_reg_known_zero(env, regs, regno);
2324  	regs[regno].type = PTR_TO_BTF_ID | flag;
2325  	regs[regno].btf = btf;
2326  	regs[regno].btf_id = btf_id;
2327  }
2328  
2329  #define DEF_NOT_SUBREG	(0)
init_reg_state(struct bpf_verifier_env * env,struct bpf_func_state * state)2330  static void init_reg_state(struct bpf_verifier_env *env,
2331  			   struct bpf_func_state *state)
2332  {
2333  	struct bpf_reg_state *regs = state->regs;
2334  	int i;
2335  
2336  	for (i = 0; i < MAX_BPF_REG; i++) {
2337  		mark_reg_not_init(env, regs, i);
2338  		regs[i].live = REG_LIVE_NONE;
2339  		regs[i].parent = NULL;
2340  		regs[i].subreg_def = DEF_NOT_SUBREG;
2341  	}
2342  
2343  	/* frame pointer */
2344  	regs[BPF_REG_FP].type = PTR_TO_STACK;
2345  	mark_reg_known_zero(env, regs, BPF_REG_FP);
2346  	regs[BPF_REG_FP].frameno = state->frameno;
2347  }
2348  
2349  #define BPF_MAIN_FUNC (-1)
init_func_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int callsite,int frameno,int subprogno)2350  static void init_func_state(struct bpf_verifier_env *env,
2351  			    struct bpf_func_state *state,
2352  			    int callsite, int frameno, int subprogno)
2353  {
2354  	state->callsite = callsite;
2355  	state->frameno = frameno;
2356  	state->subprogno = subprogno;
2357  	state->callback_ret_range = tnum_range(0, 0);
2358  	init_reg_state(env, state);
2359  	mark_verifier_state_scratched(env);
2360  }
2361  
2362  /* Similar to push_stack(), but for async callbacks */
push_async_cb(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,int subprog)2363  static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2364  						int insn_idx, int prev_insn_idx,
2365  						int subprog)
2366  {
2367  	struct bpf_verifier_stack_elem *elem;
2368  	struct bpf_func_state *frame;
2369  
2370  	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2371  	if (!elem)
2372  		goto err;
2373  
2374  	elem->insn_idx = insn_idx;
2375  	elem->prev_insn_idx = prev_insn_idx;
2376  	elem->next = env->head;
2377  	elem->log_pos = env->log.end_pos;
2378  	env->head = elem;
2379  	env->stack_size++;
2380  	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2381  		verbose(env,
2382  			"The sequence of %d jumps is too complex for async cb.\n",
2383  			env->stack_size);
2384  		goto err;
2385  	}
2386  	/* Unlike push_stack() do not copy_verifier_state().
2387  	 * The caller state doesn't matter.
2388  	 * This is async callback. It starts in a fresh stack.
2389  	 * Initialize it similar to do_check_common().
2390  	 */
2391  	elem->st.branches = 1;
2392  	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
2393  	if (!frame)
2394  		goto err;
2395  	init_func_state(env, frame,
2396  			BPF_MAIN_FUNC /* callsite */,
2397  			0 /* frameno within this callchain */,
2398  			subprog /* subprog number within this prog */);
2399  	elem->st.frame[0] = frame;
2400  	return &elem->st;
2401  err:
2402  	free_verifier_state(env->cur_state, true);
2403  	env->cur_state = NULL;
2404  	/* pop all elements and return */
2405  	while (!pop_stack(env, NULL, NULL, false));
2406  	return NULL;
2407  }
2408  
2409  
2410  enum reg_arg_type {
2411  	SRC_OP,		/* register is used as source operand */
2412  	DST_OP,		/* register is used as destination operand */
2413  	DST_OP_NO_MARK	/* same as above, check only, don't mark */
2414  };
2415  
cmp_subprogs(const void * a,const void * b)2416  static int cmp_subprogs(const void *a, const void *b)
2417  {
2418  	return ((struct bpf_subprog_info *)a)->start -
2419  	       ((struct bpf_subprog_info *)b)->start;
2420  }
2421  
find_subprog(struct bpf_verifier_env * env,int off)2422  static int find_subprog(struct bpf_verifier_env *env, int off)
2423  {
2424  	struct bpf_subprog_info *p;
2425  
2426  	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
2427  		    sizeof(env->subprog_info[0]), cmp_subprogs);
2428  	if (!p)
2429  		return -ENOENT;
2430  	return p - env->subprog_info;
2431  
2432  }
2433  
add_subprog(struct bpf_verifier_env * env,int off)2434  static int add_subprog(struct bpf_verifier_env *env, int off)
2435  {
2436  	int insn_cnt = env->prog->len;
2437  	int ret;
2438  
2439  	if (off >= insn_cnt || off < 0) {
2440  		verbose(env, "call to invalid destination\n");
2441  		return -EINVAL;
2442  	}
2443  	ret = find_subprog(env, off);
2444  	if (ret >= 0)
2445  		return ret;
2446  	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2447  		verbose(env, "too many subprograms\n");
2448  		return -E2BIG;
2449  	}
2450  	/* determine subprog starts. The end is one before the next starts */
2451  	env->subprog_info[env->subprog_cnt++].start = off;
2452  	sort(env->subprog_info, env->subprog_cnt,
2453  	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2454  	return env->subprog_cnt - 1;
2455  }
2456  
2457  #define MAX_KFUNC_DESCS 256
2458  #define MAX_KFUNC_BTFS	256
2459  
2460  struct bpf_kfunc_desc {
2461  	struct btf_func_model func_model;
2462  	u32 func_id;
2463  	s32 imm;
2464  	u16 offset;
2465  	unsigned long addr;
2466  };
2467  
2468  struct bpf_kfunc_btf {
2469  	struct btf *btf;
2470  	struct module *module;
2471  	u16 offset;
2472  };
2473  
2474  struct bpf_kfunc_desc_tab {
2475  	/* Sorted by func_id (BTF ID) and offset (fd_array offset) during
2476  	 * verification. JITs do lookups by bpf_insn, where func_id may not be
2477  	 * available, therefore at the end of verification do_misc_fixups()
2478  	 * sorts this by imm and offset.
2479  	 */
2480  	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2481  	u32 nr_descs;
2482  };
2483  
2484  struct bpf_kfunc_btf_tab {
2485  	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2486  	u32 nr_descs;
2487  };
2488  
kfunc_desc_cmp_by_id_off(const void * a,const void * b)2489  static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2490  {
2491  	const struct bpf_kfunc_desc *d0 = a;
2492  	const struct bpf_kfunc_desc *d1 = b;
2493  
2494  	/* func_id is not greater than BTF_MAX_TYPE */
2495  	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2496  }
2497  
kfunc_btf_cmp_by_off(const void * a,const void * b)2498  static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2499  {
2500  	const struct bpf_kfunc_btf *d0 = a;
2501  	const struct bpf_kfunc_btf *d1 = b;
2502  
2503  	return d0->offset - d1->offset;
2504  }
2505  
2506  static const struct bpf_kfunc_desc *
find_kfunc_desc(const struct bpf_prog * prog,u32 func_id,u16 offset)2507  find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2508  {
2509  	struct bpf_kfunc_desc desc = {
2510  		.func_id = func_id,
2511  		.offset = offset,
2512  	};
2513  	struct bpf_kfunc_desc_tab *tab;
2514  
2515  	tab = prog->aux->kfunc_tab;
2516  	return bsearch(&desc, tab->descs, tab->nr_descs,
2517  		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2518  }
2519  
bpf_get_kfunc_addr(const struct bpf_prog * prog,u32 func_id,u16 btf_fd_idx,u8 ** func_addr)2520  int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2521  		       u16 btf_fd_idx, u8 **func_addr)
2522  {
2523  	const struct bpf_kfunc_desc *desc;
2524  
2525  	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2526  	if (!desc)
2527  		return -EFAULT;
2528  
2529  	*func_addr = (u8 *)desc->addr;
2530  	return 0;
2531  }
2532  
__find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)2533  static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2534  					 s16 offset)
2535  {
2536  	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2537  	struct bpf_kfunc_btf_tab *tab;
2538  	struct bpf_kfunc_btf *b;
2539  	struct module *mod;
2540  	struct btf *btf;
2541  	int btf_fd;
2542  
2543  	tab = env->prog->aux->kfunc_btf_tab;
2544  	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2545  		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2546  	if (!b) {
2547  		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2548  			verbose(env, "too many different module BTFs\n");
2549  			return ERR_PTR(-E2BIG);
2550  		}
2551  
2552  		if (bpfptr_is_null(env->fd_array)) {
2553  			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2554  			return ERR_PTR(-EPROTO);
2555  		}
2556  
2557  		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2558  					    offset * sizeof(btf_fd),
2559  					    sizeof(btf_fd)))
2560  			return ERR_PTR(-EFAULT);
2561  
2562  		btf = btf_get_by_fd(btf_fd);
2563  		if (IS_ERR(btf)) {
2564  			verbose(env, "invalid module BTF fd specified\n");
2565  			return btf;
2566  		}
2567  
2568  		if (!btf_is_module(btf)) {
2569  			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2570  			btf_put(btf);
2571  			return ERR_PTR(-EINVAL);
2572  		}
2573  
2574  		mod = btf_try_get_module(btf);
2575  		if (!mod) {
2576  			btf_put(btf);
2577  			return ERR_PTR(-ENXIO);
2578  		}
2579  
2580  		b = &tab->descs[tab->nr_descs++];
2581  		b->btf = btf;
2582  		b->module = mod;
2583  		b->offset = offset;
2584  
2585  		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2586  		     kfunc_btf_cmp_by_off, NULL);
2587  	}
2588  	return b->btf;
2589  }
2590  
bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab * tab)2591  void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2592  {
2593  	if (!tab)
2594  		return;
2595  
2596  	while (tab->nr_descs--) {
2597  		module_put(tab->descs[tab->nr_descs].module);
2598  		btf_put(tab->descs[tab->nr_descs].btf);
2599  	}
2600  	kfree(tab);
2601  }
2602  
find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)2603  static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2604  {
2605  	if (offset) {
2606  		if (offset < 0) {
2607  			/* In the future, this can be allowed to increase limit
2608  			 * of fd index into fd_array, interpreted as u16.
2609  			 */
2610  			verbose(env, "negative offset disallowed for kernel module function call\n");
2611  			return ERR_PTR(-EINVAL);
2612  		}
2613  
2614  		return __find_kfunc_desc_btf(env, offset);
2615  	}
2616  	return btf_vmlinux ?: ERR_PTR(-ENOENT);
2617  }
2618  
add_kfunc_call(struct bpf_verifier_env * env,u32 func_id,s16 offset)2619  static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
2620  {
2621  	const struct btf_type *func, *func_proto;
2622  	struct bpf_kfunc_btf_tab *btf_tab;
2623  	struct bpf_kfunc_desc_tab *tab;
2624  	struct bpf_prog_aux *prog_aux;
2625  	struct bpf_kfunc_desc *desc;
2626  	const char *func_name;
2627  	struct btf *desc_btf;
2628  	unsigned long call_imm;
2629  	unsigned long addr;
2630  	int err;
2631  
2632  	prog_aux = env->prog->aux;
2633  	tab = prog_aux->kfunc_tab;
2634  	btf_tab = prog_aux->kfunc_btf_tab;
2635  	if (!tab) {
2636  		if (!btf_vmlinux) {
2637  			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2638  			return -ENOTSUPP;
2639  		}
2640  
2641  		if (!env->prog->jit_requested) {
2642  			verbose(env, "JIT is required for calling kernel function\n");
2643  			return -ENOTSUPP;
2644  		}
2645  
2646  		if (!bpf_jit_supports_kfunc_call()) {
2647  			verbose(env, "JIT does not support calling kernel function\n");
2648  			return -ENOTSUPP;
2649  		}
2650  
2651  		if (!env->prog->gpl_compatible) {
2652  			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2653  			return -EINVAL;
2654  		}
2655  
2656  		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2657  		if (!tab)
2658  			return -ENOMEM;
2659  		prog_aux->kfunc_tab = tab;
2660  	}
2661  
2662  	/* func_id == 0 is always invalid, but instead of returning an error, be
2663  	 * conservative and wait until the code elimination pass before returning
2664  	 * error, so that invalid calls that get pruned out can be in BPF programs
2665  	 * loaded from userspace.  It is also required that offset be untouched
2666  	 * for such calls.
2667  	 */
2668  	if (!func_id && !offset)
2669  		return 0;
2670  
2671  	if (!btf_tab && offset) {
2672  		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2673  		if (!btf_tab)
2674  			return -ENOMEM;
2675  		prog_aux->kfunc_btf_tab = btf_tab;
2676  	}
2677  
2678  	desc_btf = find_kfunc_desc_btf(env, offset);
2679  	if (IS_ERR(desc_btf)) {
2680  		verbose(env, "failed to find BTF for kernel function\n");
2681  		return PTR_ERR(desc_btf);
2682  	}
2683  
2684  	if (find_kfunc_desc(env->prog, func_id, offset))
2685  		return 0;
2686  
2687  	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2688  		verbose(env, "too many different kernel function calls\n");
2689  		return -E2BIG;
2690  	}
2691  
2692  	func = btf_type_by_id(desc_btf, func_id);
2693  	if (!func || !btf_type_is_func(func)) {
2694  		verbose(env, "kernel btf_id %u is not a function\n",
2695  			func_id);
2696  		return -EINVAL;
2697  	}
2698  	func_proto = btf_type_by_id(desc_btf, func->type);
2699  	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2700  		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2701  			func_id);
2702  		return -EINVAL;
2703  	}
2704  
2705  	func_name = btf_name_by_offset(desc_btf, func->name_off);
2706  	addr = kallsyms_lookup_name(func_name);
2707  	if (!addr) {
2708  		verbose(env, "cannot find address for kernel function %s\n",
2709  			func_name);
2710  		return -EINVAL;
2711  	}
2712  	specialize_kfunc(env, func_id, offset, &addr);
2713  
2714  	if (bpf_jit_supports_far_kfunc_call()) {
2715  		call_imm = func_id;
2716  	} else {
2717  		call_imm = BPF_CALL_IMM(addr);
2718  		/* Check whether the relative offset overflows desc->imm */
2719  		if ((unsigned long)(s32)call_imm != call_imm) {
2720  			verbose(env, "address of kernel function %s is out of range\n",
2721  				func_name);
2722  			return -EINVAL;
2723  		}
2724  	}
2725  
2726  	if (bpf_dev_bound_kfunc_id(func_id)) {
2727  		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2728  		if (err)
2729  			return err;
2730  	}
2731  
2732  	desc = &tab->descs[tab->nr_descs++];
2733  	desc->func_id = func_id;
2734  	desc->imm = call_imm;
2735  	desc->offset = offset;
2736  	desc->addr = addr;
2737  	err = btf_distill_func_proto(&env->log, desc_btf,
2738  				     func_proto, func_name,
2739  				     &desc->func_model);
2740  	if (!err)
2741  		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2742  		     kfunc_desc_cmp_by_id_off, NULL);
2743  	return err;
2744  }
2745  
kfunc_desc_cmp_by_imm_off(const void * a,const void * b)2746  static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
2747  {
2748  	const struct bpf_kfunc_desc *d0 = a;
2749  	const struct bpf_kfunc_desc *d1 = b;
2750  
2751  	if (d0->imm != d1->imm)
2752  		return d0->imm < d1->imm ? -1 : 1;
2753  	if (d0->offset != d1->offset)
2754  		return d0->offset < d1->offset ? -1 : 1;
2755  	return 0;
2756  }
2757  
sort_kfunc_descs_by_imm_off(struct bpf_prog * prog)2758  static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
2759  {
2760  	struct bpf_kfunc_desc_tab *tab;
2761  
2762  	tab = prog->aux->kfunc_tab;
2763  	if (!tab)
2764  		return;
2765  
2766  	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2767  	     kfunc_desc_cmp_by_imm_off, NULL);
2768  }
2769  
bpf_prog_has_kfunc_call(const struct bpf_prog * prog)2770  bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2771  {
2772  	return !!prog->aux->kfunc_tab;
2773  }
2774  
2775  const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog * prog,const struct bpf_insn * insn)2776  bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2777  			 const struct bpf_insn *insn)
2778  {
2779  	const struct bpf_kfunc_desc desc = {
2780  		.imm = insn->imm,
2781  		.offset = insn->off,
2782  	};
2783  	const struct bpf_kfunc_desc *res;
2784  	struct bpf_kfunc_desc_tab *tab;
2785  
2786  	tab = prog->aux->kfunc_tab;
2787  	res = bsearch(&desc, tab->descs, tab->nr_descs,
2788  		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
2789  
2790  	return res ? &res->func_model : NULL;
2791  }
2792  
add_subprog_and_kfunc(struct bpf_verifier_env * env)2793  static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2794  {
2795  	struct bpf_subprog_info *subprog = env->subprog_info;
2796  	struct bpf_insn *insn = env->prog->insnsi;
2797  	int i, ret, insn_cnt = env->prog->len;
2798  
2799  	/* Add entry function. */
2800  	ret = add_subprog(env, 0);
2801  	if (ret)
2802  		return ret;
2803  
2804  	for (i = 0; i < insn_cnt; i++, insn++) {
2805  		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2806  		    !bpf_pseudo_kfunc_call(insn))
2807  			continue;
2808  
2809  		if (!env->bpf_capable) {
2810  			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2811  			return -EPERM;
2812  		}
2813  
2814  		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2815  			ret = add_subprog(env, i + insn->imm + 1);
2816  		else
2817  			ret = add_kfunc_call(env, insn->imm, insn->off);
2818  
2819  		if (ret < 0)
2820  			return ret;
2821  	}
2822  
2823  	/* Add a fake 'exit' subprog which could simplify subprog iteration
2824  	 * logic. 'subprog_cnt' should not be increased.
2825  	 */
2826  	subprog[env->subprog_cnt].start = insn_cnt;
2827  
2828  	if (env->log.level & BPF_LOG_LEVEL2)
2829  		for (i = 0; i < env->subprog_cnt; i++)
2830  			verbose(env, "func#%d @%d\n", i, subprog[i].start);
2831  
2832  	return 0;
2833  }
2834  
check_subprogs(struct bpf_verifier_env * env)2835  static int check_subprogs(struct bpf_verifier_env *env)
2836  {
2837  	int i, subprog_start, subprog_end, off, cur_subprog = 0;
2838  	struct bpf_subprog_info *subprog = env->subprog_info;
2839  	struct bpf_insn *insn = env->prog->insnsi;
2840  	int insn_cnt = env->prog->len;
2841  
2842  	/* now check that all jumps are within the same subprog */
2843  	subprog_start = subprog[cur_subprog].start;
2844  	subprog_end = subprog[cur_subprog + 1].start;
2845  	for (i = 0; i < insn_cnt; i++) {
2846  		u8 code = insn[i].code;
2847  
2848  		if (code == (BPF_JMP | BPF_CALL) &&
2849  		    insn[i].src_reg == 0 &&
2850  		    insn[i].imm == BPF_FUNC_tail_call)
2851  			subprog[cur_subprog].has_tail_call = true;
2852  		if (BPF_CLASS(code) == BPF_LD &&
2853  		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2854  			subprog[cur_subprog].has_ld_abs = true;
2855  		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2856  			goto next;
2857  		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2858  			goto next;
2859  		if (code == (BPF_JMP32 | BPF_JA))
2860  			off = i + insn[i].imm + 1;
2861  		else
2862  			off = i + insn[i].off + 1;
2863  		if (off < subprog_start || off >= subprog_end) {
2864  			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2865  			return -EINVAL;
2866  		}
2867  next:
2868  		if (i == subprog_end - 1) {
2869  			/* to avoid fall-through from one subprog into another
2870  			 * the last insn of the subprog should be either exit
2871  			 * or unconditional jump back
2872  			 */
2873  			if (code != (BPF_JMP | BPF_EXIT) &&
2874  			    code != (BPF_JMP32 | BPF_JA) &&
2875  			    code != (BPF_JMP | BPF_JA)) {
2876  				verbose(env, "last insn is not an exit or jmp\n");
2877  				return -EINVAL;
2878  			}
2879  			subprog_start = subprog_end;
2880  			cur_subprog++;
2881  			if (cur_subprog < env->subprog_cnt)
2882  				subprog_end = subprog[cur_subprog + 1].start;
2883  		}
2884  	}
2885  	return 0;
2886  }
2887  
2888  /* Parentage chain of this register (or stack slot) should take care of all
2889   * issues like callee-saved registers, stack slot allocation time, etc.
2890   */
mark_reg_read(struct bpf_verifier_env * env,const struct bpf_reg_state * state,struct bpf_reg_state * parent,u8 flag)2891  static int mark_reg_read(struct bpf_verifier_env *env,
2892  			 const struct bpf_reg_state *state,
2893  			 struct bpf_reg_state *parent, u8 flag)
2894  {
2895  	bool writes = parent == state->parent; /* Observe write marks */
2896  	int cnt = 0;
2897  
2898  	while (parent) {
2899  		/* if read wasn't screened by an earlier write ... */
2900  		if (writes && state->live & REG_LIVE_WRITTEN)
2901  			break;
2902  		if (parent->live & REG_LIVE_DONE) {
2903  			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2904  				reg_type_str(env, parent->type),
2905  				parent->var_off.value, parent->off);
2906  			return -EFAULT;
2907  		}
2908  		/* The first condition is more likely to be true than the
2909  		 * second, checked it first.
2910  		 */
2911  		if ((parent->live & REG_LIVE_READ) == flag ||
2912  		    parent->live & REG_LIVE_READ64)
2913  			/* The parentage chain never changes and
2914  			 * this parent was already marked as LIVE_READ.
2915  			 * There is no need to keep walking the chain again and
2916  			 * keep re-marking all parents as LIVE_READ.
2917  			 * This case happens when the same register is read
2918  			 * multiple times without writes into it in-between.
2919  			 * Also, if parent has the stronger REG_LIVE_READ64 set,
2920  			 * then no need to set the weak REG_LIVE_READ32.
2921  			 */
2922  			break;
2923  		/* ... then we depend on parent's value */
2924  		parent->live |= flag;
2925  		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2926  		if (flag == REG_LIVE_READ64)
2927  			parent->live &= ~REG_LIVE_READ32;
2928  		state = parent;
2929  		parent = state->parent;
2930  		writes = true;
2931  		cnt++;
2932  	}
2933  
2934  	if (env->longest_mark_read_walk < cnt)
2935  		env->longest_mark_read_walk = cnt;
2936  	return 0;
2937  }
2938  
mark_dynptr_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)2939  static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
2940  {
2941  	struct bpf_func_state *state = func(env, reg);
2942  	int spi, ret;
2943  
2944  	/* For CONST_PTR_TO_DYNPTR, it must have already been done by
2945  	 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
2946  	 * check_kfunc_call.
2947  	 */
2948  	if (reg->type == CONST_PTR_TO_DYNPTR)
2949  		return 0;
2950  	spi = dynptr_get_spi(env, reg);
2951  	if (spi < 0)
2952  		return spi;
2953  	/* Caller ensures dynptr is valid and initialized, which means spi is in
2954  	 * bounds and spi is the first dynptr slot. Simply mark stack slot as
2955  	 * read.
2956  	 */
2957  	ret = mark_reg_read(env, &state->stack[spi].spilled_ptr,
2958  			    state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
2959  	if (ret)
2960  		return ret;
2961  	return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr,
2962  			     state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
2963  }
2964  
mark_iter_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)2965  static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
2966  			  int spi, int nr_slots)
2967  {
2968  	struct bpf_func_state *state = func(env, reg);
2969  	int err, i;
2970  
2971  	for (i = 0; i < nr_slots; i++) {
2972  		struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
2973  
2974  		err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64);
2975  		if (err)
2976  			return err;
2977  
2978  		mark_stack_slot_scratched(env, spi - i);
2979  	}
2980  
2981  	return 0;
2982  }
2983  
2984  /* This function is supposed to be used by the following 32-bit optimization
2985   * code only. It returns TRUE if the source or destination register operates
2986   * on 64-bit, otherwise return FALSE.
2987   */
is_reg64(struct bpf_verifier_env * env,struct bpf_insn * insn,u32 regno,struct bpf_reg_state * reg,enum reg_arg_type t)2988  static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2989  		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2990  {
2991  	u8 code, class, op;
2992  
2993  	code = insn->code;
2994  	class = BPF_CLASS(code);
2995  	op = BPF_OP(code);
2996  	if (class == BPF_JMP) {
2997  		/* BPF_EXIT for "main" will reach here. Return TRUE
2998  		 * conservatively.
2999  		 */
3000  		if (op == BPF_EXIT)
3001  			return true;
3002  		if (op == BPF_CALL) {
3003  			/* BPF to BPF call will reach here because of marking
3004  			 * caller saved clobber with DST_OP_NO_MARK for which we
3005  			 * don't care the register def because they are anyway
3006  			 * marked as NOT_INIT already.
3007  			 */
3008  			if (insn->src_reg == BPF_PSEUDO_CALL)
3009  				return false;
3010  			/* Helper call will reach here because of arg type
3011  			 * check, conservatively return TRUE.
3012  			 */
3013  			if (t == SRC_OP)
3014  				return true;
3015  
3016  			return false;
3017  		}
3018  	}
3019  
3020  	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3021  		return false;
3022  
3023  	if (class == BPF_ALU64 || class == BPF_JMP ||
3024  	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3025  		return true;
3026  
3027  	if (class == BPF_ALU || class == BPF_JMP32)
3028  		return false;
3029  
3030  	if (class == BPF_LDX) {
3031  		if (t != SRC_OP)
3032  			return BPF_SIZE(code) == BPF_DW;
3033  		/* LDX source must be ptr. */
3034  		return true;
3035  	}
3036  
3037  	if (class == BPF_STX) {
3038  		/* BPF_STX (including atomic variants) has multiple source
3039  		 * operands, one of which is a ptr. Check whether the caller is
3040  		 * asking about it.
3041  		 */
3042  		if (t == SRC_OP && reg->type != SCALAR_VALUE)
3043  			return true;
3044  		return BPF_SIZE(code) == BPF_DW;
3045  	}
3046  
3047  	if (class == BPF_LD) {
3048  		u8 mode = BPF_MODE(code);
3049  
3050  		/* LD_IMM64 */
3051  		if (mode == BPF_IMM)
3052  			return true;
3053  
3054  		/* Both LD_IND and LD_ABS return 32-bit data. */
3055  		if (t != SRC_OP)
3056  			return  false;
3057  
3058  		/* Implicit ctx ptr. */
3059  		if (regno == BPF_REG_6)
3060  			return true;
3061  
3062  		/* Explicit source could be any width. */
3063  		return true;
3064  	}
3065  
3066  	if (class == BPF_ST)
3067  		/* The only source register for BPF_ST is a ptr. */
3068  		return true;
3069  
3070  	/* Conservatively return true at default. */
3071  	return true;
3072  }
3073  
3074  /* Return the regno defined by the insn, or -1. */
insn_def_regno(const struct bpf_insn * insn)3075  static int insn_def_regno(const struct bpf_insn *insn)
3076  {
3077  	switch (BPF_CLASS(insn->code)) {
3078  	case BPF_JMP:
3079  	case BPF_JMP32:
3080  	case BPF_ST:
3081  		return -1;
3082  	case BPF_STX:
3083  		if (BPF_MODE(insn->code) == BPF_ATOMIC &&
3084  		    (insn->imm & BPF_FETCH)) {
3085  			if (insn->imm == BPF_CMPXCHG)
3086  				return BPF_REG_0;
3087  			else
3088  				return insn->src_reg;
3089  		} else {
3090  			return -1;
3091  		}
3092  	default:
3093  		return insn->dst_reg;
3094  	}
3095  }
3096  
3097  /* Return TRUE if INSN has defined any 32-bit value explicitly. */
insn_has_def32(struct bpf_verifier_env * env,struct bpf_insn * insn)3098  static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
3099  {
3100  	int dst_reg = insn_def_regno(insn);
3101  
3102  	if (dst_reg == -1)
3103  		return false;
3104  
3105  	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
3106  }
3107  
mark_insn_zext(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3108  static void mark_insn_zext(struct bpf_verifier_env *env,
3109  			   struct bpf_reg_state *reg)
3110  {
3111  	s32 def_idx = reg->subreg_def;
3112  
3113  	if (def_idx == DEF_NOT_SUBREG)
3114  		return;
3115  
3116  	env->insn_aux_data[def_idx - 1].zext_dst = true;
3117  	/* The dst will be zero extended, so won't be sub-register anymore. */
3118  	reg->subreg_def = DEF_NOT_SUBREG;
3119  }
3120  
check_reg_arg(struct bpf_verifier_env * env,u32 regno,enum reg_arg_type t)3121  static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3122  			 enum reg_arg_type t)
3123  {
3124  	struct bpf_verifier_state *vstate = env->cur_state;
3125  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3126  	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3127  	struct bpf_reg_state *reg, *regs = state->regs;
3128  	bool rw64;
3129  
3130  	if (regno >= MAX_BPF_REG) {
3131  		verbose(env, "R%d is invalid\n", regno);
3132  		return -EINVAL;
3133  	}
3134  
3135  	mark_reg_scratched(env, regno);
3136  
3137  	reg = &regs[regno];
3138  	rw64 = is_reg64(env, insn, regno, reg, t);
3139  	if (t == SRC_OP) {
3140  		/* check whether register used as source operand can be read */
3141  		if (reg->type == NOT_INIT) {
3142  			verbose(env, "R%d !read_ok\n", regno);
3143  			return -EACCES;
3144  		}
3145  		/* We don't need to worry about FP liveness because it's read-only */
3146  		if (regno == BPF_REG_FP)
3147  			return 0;
3148  
3149  		if (rw64)
3150  			mark_insn_zext(env, reg);
3151  
3152  		return mark_reg_read(env, reg, reg->parent,
3153  				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
3154  	} else {
3155  		/* check whether register used as dest operand can be written to */
3156  		if (regno == BPF_REG_FP) {
3157  			verbose(env, "frame pointer is read only\n");
3158  			return -EACCES;
3159  		}
3160  		reg->live |= REG_LIVE_WRITTEN;
3161  		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3162  		if (t == DST_OP)
3163  			mark_reg_unknown(env, regs, regno);
3164  	}
3165  	return 0;
3166  }
3167  
mark_jmp_point(struct bpf_verifier_env * env,int idx)3168  static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
3169  {
3170  	env->insn_aux_data[idx].jmp_point = true;
3171  }
3172  
is_jmp_point(struct bpf_verifier_env * env,int insn_idx)3173  static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
3174  {
3175  	return env->insn_aux_data[insn_idx].jmp_point;
3176  }
3177  
3178  /* for any branch, call, exit record the history of jmps in the given state */
push_jmp_history(struct bpf_verifier_env * env,struct bpf_verifier_state * cur)3179  static int push_jmp_history(struct bpf_verifier_env *env,
3180  			    struct bpf_verifier_state *cur)
3181  {
3182  	u32 cnt = cur->jmp_history_cnt;
3183  	struct bpf_idx_pair *p;
3184  	size_t alloc_size;
3185  
3186  	if (!is_jmp_point(env, env->insn_idx))
3187  		return 0;
3188  
3189  	cnt++;
3190  	alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
3191  	p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
3192  	if (!p)
3193  		return -ENOMEM;
3194  	p[cnt - 1].idx = env->insn_idx;
3195  	p[cnt - 1].prev_idx = env->prev_insn_idx;
3196  	cur->jmp_history = p;
3197  	cur->jmp_history_cnt = cnt;
3198  	return 0;
3199  }
3200  
3201  /* Backtrack one insn at a time. If idx is not at the top of recorded
3202   * history then previous instruction came from straight line execution.
3203   */
get_prev_insn_idx(struct bpf_verifier_state * st,int i,u32 * history)3204  static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
3205  			     u32 *history)
3206  {
3207  	u32 cnt = *history;
3208  
3209  	if (cnt && st->jmp_history[cnt - 1].idx == i) {
3210  		i = st->jmp_history[cnt - 1].prev_idx;
3211  		(*history)--;
3212  	} else {
3213  		i--;
3214  	}
3215  	return i;
3216  }
3217  
disasm_kfunc_name(void * data,const struct bpf_insn * insn)3218  static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3219  {
3220  	const struct btf_type *func;
3221  	struct btf *desc_btf;
3222  
3223  	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3224  		return NULL;
3225  
3226  	desc_btf = find_kfunc_desc_btf(data, insn->off);
3227  	if (IS_ERR(desc_btf))
3228  		return "<error>";
3229  
3230  	func = btf_type_by_id(desc_btf, insn->imm);
3231  	return btf_name_by_offset(desc_btf, func->name_off);
3232  }
3233  
bt_init(struct backtrack_state * bt,u32 frame)3234  static inline void bt_init(struct backtrack_state *bt, u32 frame)
3235  {
3236  	bt->frame = frame;
3237  }
3238  
bt_reset(struct backtrack_state * bt)3239  static inline void bt_reset(struct backtrack_state *bt)
3240  {
3241  	struct bpf_verifier_env *env = bt->env;
3242  
3243  	memset(bt, 0, sizeof(*bt));
3244  	bt->env = env;
3245  }
3246  
bt_empty(struct backtrack_state * bt)3247  static inline u32 bt_empty(struct backtrack_state *bt)
3248  {
3249  	u64 mask = 0;
3250  	int i;
3251  
3252  	for (i = 0; i <= bt->frame; i++)
3253  		mask |= bt->reg_masks[i] | bt->stack_masks[i];
3254  
3255  	return mask == 0;
3256  }
3257  
bt_subprog_enter(struct backtrack_state * bt)3258  static inline int bt_subprog_enter(struct backtrack_state *bt)
3259  {
3260  	if (bt->frame == MAX_CALL_FRAMES - 1) {
3261  		verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
3262  		WARN_ONCE(1, "verifier backtracking bug");
3263  		return -EFAULT;
3264  	}
3265  	bt->frame++;
3266  	return 0;
3267  }
3268  
bt_subprog_exit(struct backtrack_state * bt)3269  static inline int bt_subprog_exit(struct backtrack_state *bt)
3270  {
3271  	if (bt->frame == 0) {
3272  		verbose(bt->env, "BUG subprog exit from frame 0\n");
3273  		WARN_ONCE(1, "verifier backtracking bug");
3274  		return -EFAULT;
3275  	}
3276  	bt->frame--;
3277  	return 0;
3278  }
3279  
bt_set_frame_reg(struct backtrack_state * bt,u32 frame,u32 reg)3280  static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3281  {
3282  	bt->reg_masks[frame] |= 1 << reg;
3283  }
3284  
bt_clear_frame_reg(struct backtrack_state * bt,u32 frame,u32 reg)3285  static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3286  {
3287  	bt->reg_masks[frame] &= ~(1 << reg);
3288  }
3289  
bt_set_reg(struct backtrack_state * bt,u32 reg)3290  static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
3291  {
3292  	bt_set_frame_reg(bt, bt->frame, reg);
3293  }
3294  
bt_clear_reg(struct backtrack_state * bt,u32 reg)3295  static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
3296  {
3297  	bt_clear_frame_reg(bt, bt->frame, reg);
3298  }
3299  
bt_set_frame_slot(struct backtrack_state * bt,u32 frame,u32 slot)3300  static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3301  {
3302  	bt->stack_masks[frame] |= 1ull << slot;
3303  }
3304  
bt_clear_frame_slot(struct backtrack_state * bt,u32 frame,u32 slot)3305  static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3306  {
3307  	bt->stack_masks[frame] &= ~(1ull << slot);
3308  }
3309  
bt_set_slot(struct backtrack_state * bt,u32 slot)3310  static inline void bt_set_slot(struct backtrack_state *bt, u32 slot)
3311  {
3312  	bt_set_frame_slot(bt, bt->frame, slot);
3313  }
3314  
bt_clear_slot(struct backtrack_state * bt,u32 slot)3315  static inline void bt_clear_slot(struct backtrack_state *bt, u32 slot)
3316  {
3317  	bt_clear_frame_slot(bt, bt->frame, slot);
3318  }
3319  
bt_frame_reg_mask(struct backtrack_state * bt,u32 frame)3320  static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
3321  {
3322  	return bt->reg_masks[frame];
3323  }
3324  
bt_reg_mask(struct backtrack_state * bt)3325  static inline u32 bt_reg_mask(struct backtrack_state *bt)
3326  {
3327  	return bt->reg_masks[bt->frame];
3328  }
3329  
bt_frame_stack_mask(struct backtrack_state * bt,u32 frame)3330  static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
3331  {
3332  	return bt->stack_masks[frame];
3333  }
3334  
bt_stack_mask(struct backtrack_state * bt)3335  static inline u64 bt_stack_mask(struct backtrack_state *bt)
3336  {
3337  	return bt->stack_masks[bt->frame];
3338  }
3339  
bt_is_reg_set(struct backtrack_state * bt,u32 reg)3340  static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
3341  {
3342  	return bt->reg_masks[bt->frame] & (1 << reg);
3343  }
3344  
bt_is_slot_set(struct backtrack_state * bt,u32 slot)3345  static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
3346  {
3347  	return bt->stack_masks[bt->frame] & (1ull << slot);
3348  }
3349  
3350  /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
fmt_reg_mask(char * buf,ssize_t buf_sz,u32 reg_mask)3351  static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
3352  {
3353  	DECLARE_BITMAP(mask, 64);
3354  	bool first = true;
3355  	int i, n;
3356  
3357  	buf[0] = '\0';
3358  
3359  	bitmap_from_u64(mask, reg_mask);
3360  	for_each_set_bit(i, mask, 32) {
3361  		n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
3362  		first = false;
3363  		buf += n;
3364  		buf_sz -= n;
3365  		if (buf_sz < 0)
3366  			break;
3367  	}
3368  }
3369  /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
fmt_stack_mask(char * buf,ssize_t buf_sz,u64 stack_mask)3370  static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
3371  {
3372  	DECLARE_BITMAP(mask, 64);
3373  	bool first = true;
3374  	int i, n;
3375  
3376  	buf[0] = '\0';
3377  
3378  	bitmap_from_u64(mask, stack_mask);
3379  	for_each_set_bit(i, mask, 64) {
3380  		n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
3381  		first = false;
3382  		buf += n;
3383  		buf_sz -= n;
3384  		if (buf_sz < 0)
3385  			break;
3386  	}
3387  }
3388  
3389  /* For given verifier state backtrack_insn() is called from the last insn to
3390   * the first insn. Its purpose is to compute a bitmask of registers and
3391   * stack slots that needs precision in the parent verifier state.
3392   *
3393   * @idx is an index of the instruction we are currently processing;
3394   * @subseq_idx is an index of the subsequent instruction that:
3395   *   - *would be* executed next, if jump history is viewed in forward order;
3396   *   - *was* processed previously during backtracking.
3397   */
backtrack_insn(struct bpf_verifier_env * env,int idx,int subseq_idx,struct backtrack_state * bt)3398  static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
3399  			  struct backtrack_state *bt)
3400  {
3401  	const struct bpf_insn_cbs cbs = {
3402  		.cb_call	= disasm_kfunc_name,
3403  		.cb_print	= verbose,
3404  		.private_data	= env,
3405  	};
3406  	struct bpf_insn *insn = env->prog->insnsi + idx;
3407  	u8 class = BPF_CLASS(insn->code);
3408  	u8 opcode = BPF_OP(insn->code);
3409  	u8 mode = BPF_MODE(insn->code);
3410  	u32 dreg = insn->dst_reg;
3411  	u32 sreg = insn->src_reg;
3412  	u32 spi, i;
3413  
3414  	if (insn->code == 0)
3415  		return 0;
3416  	if (env->log.level & BPF_LOG_LEVEL2) {
3417  		fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
3418  		verbose(env, "mark_precise: frame%d: regs=%s ",
3419  			bt->frame, env->tmp_str_buf);
3420  		fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
3421  		verbose(env, "stack=%s before ", env->tmp_str_buf);
3422  		verbose(env, "%d: ", idx);
3423  		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3424  	}
3425  
3426  	if (class == BPF_ALU || class == BPF_ALU64) {
3427  		if (!bt_is_reg_set(bt, dreg))
3428  			return 0;
3429  		if (opcode == BPF_MOV) {
3430  			if (BPF_SRC(insn->code) == BPF_X) {
3431  				/* dreg = sreg or dreg = (s8, s16, s32)sreg
3432  				 * dreg needs precision after this insn
3433  				 * sreg needs precision before this insn
3434  				 */
3435  				bt_clear_reg(bt, dreg);
3436  				bt_set_reg(bt, sreg);
3437  			} else {
3438  				/* dreg = K
3439  				 * dreg needs precision after this insn.
3440  				 * Corresponding register is already marked
3441  				 * as precise=true in this verifier state.
3442  				 * No further markings in parent are necessary
3443  				 */
3444  				bt_clear_reg(bt, dreg);
3445  			}
3446  		} else {
3447  			if (BPF_SRC(insn->code) == BPF_X) {
3448  				/* dreg += sreg
3449  				 * both dreg and sreg need precision
3450  				 * before this insn
3451  				 */
3452  				bt_set_reg(bt, sreg);
3453  			} /* else dreg += K
3454  			   * dreg still needs precision before this insn
3455  			   */
3456  		}
3457  	} else if (class == BPF_LDX) {
3458  		if (!bt_is_reg_set(bt, dreg))
3459  			return 0;
3460  		bt_clear_reg(bt, dreg);
3461  
3462  		/* scalars can only be spilled into stack w/o losing precision.
3463  		 * Load from any other memory can be zero extended.
3464  		 * The desire to keep that precision is already indicated
3465  		 * by 'precise' mark in corresponding register of this state.
3466  		 * No further tracking necessary.
3467  		 */
3468  		if (insn->src_reg != BPF_REG_FP)
3469  			return 0;
3470  
3471  		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
3472  		 * that [fp - off] slot contains scalar that needs to be
3473  		 * tracked with precision
3474  		 */
3475  		spi = (-insn->off - 1) / BPF_REG_SIZE;
3476  		if (spi >= 64) {
3477  			verbose(env, "BUG spi %d\n", spi);
3478  			WARN_ONCE(1, "verifier backtracking bug");
3479  			return -EFAULT;
3480  		}
3481  		bt_set_slot(bt, spi);
3482  	} else if (class == BPF_STX || class == BPF_ST) {
3483  		if (bt_is_reg_set(bt, dreg))
3484  			/* stx & st shouldn't be using _scalar_ dst_reg
3485  			 * to access memory. It means backtracking
3486  			 * encountered a case of pointer subtraction.
3487  			 */
3488  			return -ENOTSUPP;
3489  		/* scalars can only be spilled into stack */
3490  		if (insn->dst_reg != BPF_REG_FP)
3491  			return 0;
3492  		spi = (-insn->off - 1) / BPF_REG_SIZE;
3493  		if (spi >= 64) {
3494  			verbose(env, "BUG spi %d\n", spi);
3495  			WARN_ONCE(1, "verifier backtracking bug");
3496  			return -EFAULT;
3497  		}
3498  		if (!bt_is_slot_set(bt, spi))
3499  			return 0;
3500  		bt_clear_slot(bt, spi);
3501  		if (class == BPF_STX)
3502  			bt_set_reg(bt, sreg);
3503  	} else if (class == BPF_JMP || class == BPF_JMP32) {
3504  		if (bpf_pseudo_call(insn)) {
3505  			int subprog_insn_idx, subprog;
3506  
3507  			subprog_insn_idx = idx + insn->imm + 1;
3508  			subprog = find_subprog(env, subprog_insn_idx);
3509  			if (subprog < 0)
3510  				return -EFAULT;
3511  
3512  			if (subprog_is_global(env, subprog)) {
3513  				/* check that jump history doesn't have any
3514  				 * extra instructions from subprog; the next
3515  				 * instruction after call to global subprog
3516  				 * should be literally next instruction in
3517  				 * caller program
3518  				 */
3519  				WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug");
3520  				/* r1-r5 are invalidated after subprog call,
3521  				 * so for global func call it shouldn't be set
3522  				 * anymore
3523  				 */
3524  				if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3525  					verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3526  					WARN_ONCE(1, "verifier backtracking bug");
3527  					return -EFAULT;
3528  				}
3529  				/* global subprog always sets R0 */
3530  				bt_clear_reg(bt, BPF_REG_0);
3531  				return 0;
3532  			} else {
3533  				/* static subprog call instruction, which
3534  				 * means that we are exiting current subprog,
3535  				 * so only r1-r5 could be still requested as
3536  				 * precise, r0 and r6-r10 or any stack slot in
3537  				 * the current frame should be zero by now
3538  				 */
3539  				if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3540  					verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3541  					WARN_ONCE(1, "verifier backtracking bug");
3542  					return -EFAULT;
3543  				}
3544  				/* we don't track register spills perfectly,
3545  				 * so fallback to force-precise instead of failing */
3546  				if (bt_stack_mask(bt) != 0)
3547  					return -ENOTSUPP;
3548  				/* propagate r1-r5 to the caller */
3549  				for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
3550  					if (bt_is_reg_set(bt, i)) {
3551  						bt_clear_reg(bt, i);
3552  						bt_set_frame_reg(bt, bt->frame - 1, i);
3553  					}
3554  				}
3555  				if (bt_subprog_exit(bt))
3556  					return -EFAULT;
3557  				return 0;
3558  			}
3559  		} else if ((bpf_helper_call(insn) &&
3560  			    is_callback_calling_function(insn->imm) &&
3561  			    !is_async_callback_calling_function(insn->imm)) ||
3562  			   (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) {
3563  			/* callback-calling helper or kfunc call, which means
3564  			 * we are exiting from subprog, but unlike the subprog
3565  			 * call handling above, we shouldn't propagate
3566  			 * precision of r1-r5 (if any requested), as they are
3567  			 * not actually arguments passed directly to callback
3568  			 * subprogs
3569  			 */
3570  			if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3571  				verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3572  				WARN_ONCE(1, "verifier backtracking bug");
3573  				return -EFAULT;
3574  			}
3575  			if (bt_stack_mask(bt) != 0)
3576  				return -ENOTSUPP;
3577  			/* clear r1-r5 in callback subprog's mask */
3578  			for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3579  				bt_clear_reg(bt, i);
3580  			if (bt_subprog_exit(bt))
3581  				return -EFAULT;
3582  			return 0;
3583  		} else if (opcode == BPF_CALL) {
3584  			/* kfunc with imm==0 is invalid and fixup_kfunc_call will
3585  			 * catch this error later. Make backtracking conservative
3586  			 * with ENOTSUPP.
3587  			 */
3588  			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
3589  				return -ENOTSUPP;
3590  			/* regular helper call sets R0 */
3591  			bt_clear_reg(bt, BPF_REG_0);
3592  			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3593  				/* if backtracing was looking for registers R1-R5
3594  				 * they should have been found already.
3595  				 */
3596  				verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3597  				WARN_ONCE(1, "verifier backtracking bug");
3598  				return -EFAULT;
3599  			}
3600  		} else if (opcode == BPF_EXIT) {
3601  			bool r0_precise;
3602  
3603  			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3604  				/* if backtracing was looking for registers R1-R5
3605  				 * they should have been found already.
3606  				 */
3607  				verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3608  				WARN_ONCE(1, "verifier backtracking bug");
3609  				return -EFAULT;
3610  			}
3611  
3612  			/* BPF_EXIT in subprog or callback always returns
3613  			 * right after the call instruction, so by checking
3614  			 * whether the instruction at subseq_idx-1 is subprog
3615  			 * call or not we can distinguish actual exit from
3616  			 * *subprog* from exit from *callback*. In the former
3617  			 * case, we need to propagate r0 precision, if
3618  			 * necessary. In the former we never do that.
3619  			 */
3620  			r0_precise = subseq_idx - 1 >= 0 &&
3621  				     bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
3622  				     bt_is_reg_set(bt, BPF_REG_0);
3623  
3624  			bt_clear_reg(bt, BPF_REG_0);
3625  			if (bt_subprog_enter(bt))
3626  				return -EFAULT;
3627  
3628  			if (r0_precise)
3629  				bt_set_reg(bt, BPF_REG_0);
3630  			/* r6-r9 and stack slots will stay set in caller frame
3631  			 * bitmasks until we return back from callee(s)
3632  			 */
3633  			return 0;
3634  		} else if (BPF_SRC(insn->code) == BPF_X) {
3635  			if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
3636  				return 0;
3637  			/* dreg <cond> sreg
3638  			 * Both dreg and sreg need precision before
3639  			 * this insn. If only sreg was marked precise
3640  			 * before it would be equally necessary to
3641  			 * propagate it to dreg.
3642  			 */
3643  			bt_set_reg(bt, dreg);
3644  			bt_set_reg(bt, sreg);
3645  			 /* else dreg <cond> K
3646  			  * Only dreg still needs precision before
3647  			  * this insn, so for the K-based conditional
3648  			  * there is nothing new to be marked.
3649  			  */
3650  		}
3651  	} else if (class == BPF_LD) {
3652  		if (!bt_is_reg_set(bt, dreg))
3653  			return 0;
3654  		bt_clear_reg(bt, dreg);
3655  		/* It's ld_imm64 or ld_abs or ld_ind.
3656  		 * For ld_imm64 no further tracking of precision
3657  		 * into parent is necessary
3658  		 */
3659  		if (mode == BPF_IND || mode == BPF_ABS)
3660  			/* to be analyzed */
3661  			return -ENOTSUPP;
3662  	}
3663  	return 0;
3664  }
3665  
3666  /* the scalar precision tracking algorithm:
3667   * . at the start all registers have precise=false.
3668   * . scalar ranges are tracked as normal through alu and jmp insns.
3669   * . once precise value of the scalar register is used in:
3670   *   .  ptr + scalar alu
3671   *   . if (scalar cond K|scalar)
3672   *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
3673   *   backtrack through the verifier states and mark all registers and
3674   *   stack slots with spilled constants that these scalar regisers
3675   *   should be precise.
3676   * . during state pruning two registers (or spilled stack slots)
3677   *   are equivalent if both are not precise.
3678   *
3679   * Note the verifier cannot simply walk register parentage chain,
3680   * since many different registers and stack slots could have been
3681   * used to compute single precise scalar.
3682   *
3683   * The approach of starting with precise=true for all registers and then
3684   * backtrack to mark a register as not precise when the verifier detects
3685   * that program doesn't care about specific value (e.g., when helper
3686   * takes register as ARG_ANYTHING parameter) is not safe.
3687   *
3688   * It's ok to walk single parentage chain of the verifier states.
3689   * It's possible that this backtracking will go all the way till 1st insn.
3690   * All other branches will be explored for needing precision later.
3691   *
3692   * The backtracking needs to deal with cases like:
3693   *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
3694   * r9 -= r8
3695   * r5 = r9
3696   * if r5 > 0x79f goto pc+7
3697   *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
3698   * r5 += 1
3699   * ...
3700   * call bpf_perf_event_output#25
3701   *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
3702   *
3703   * and this case:
3704   * r6 = 1
3705   * call foo // uses callee's r6 inside to compute r0
3706   * r0 += r6
3707   * if r0 == 0 goto
3708   *
3709   * to track above reg_mask/stack_mask needs to be independent for each frame.
3710   *
3711   * Also if parent's curframe > frame where backtracking started,
3712   * the verifier need to mark registers in both frames, otherwise callees
3713   * may incorrectly prune callers. This is similar to
3714   * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
3715   *
3716   * For now backtracking falls back into conservative marking.
3717   */
mark_all_scalars_precise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)3718  static void mark_all_scalars_precise(struct bpf_verifier_env *env,
3719  				     struct bpf_verifier_state *st)
3720  {
3721  	struct bpf_func_state *func;
3722  	struct bpf_reg_state *reg;
3723  	int i, j;
3724  
3725  	if (env->log.level & BPF_LOG_LEVEL2) {
3726  		verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
3727  			st->curframe);
3728  	}
3729  
3730  	/* big hammer: mark all scalars precise in this path.
3731  	 * pop_stack may still get !precise scalars.
3732  	 * We also skip current state and go straight to first parent state,
3733  	 * because precision markings in current non-checkpointed state are
3734  	 * not needed. See why in the comment in __mark_chain_precision below.
3735  	 */
3736  	for (st = st->parent; st; st = st->parent) {
3737  		for (i = 0; i <= st->curframe; i++) {
3738  			func = st->frame[i];
3739  			for (j = 0; j < BPF_REG_FP; j++) {
3740  				reg = &func->regs[j];
3741  				if (reg->type != SCALAR_VALUE || reg->precise)
3742  					continue;
3743  				reg->precise = true;
3744  				if (env->log.level & BPF_LOG_LEVEL2) {
3745  					verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
3746  						i, j);
3747  				}
3748  			}
3749  			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3750  				if (!is_spilled_reg(&func->stack[j]))
3751  					continue;
3752  				reg = &func->stack[j].spilled_ptr;
3753  				if (reg->type != SCALAR_VALUE || reg->precise)
3754  					continue;
3755  				reg->precise = true;
3756  				if (env->log.level & BPF_LOG_LEVEL2) {
3757  					verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
3758  						i, -(j + 1) * 8);
3759  				}
3760  			}
3761  		}
3762  	}
3763  }
3764  
mark_all_scalars_imprecise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)3765  static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
3766  {
3767  	struct bpf_func_state *func;
3768  	struct bpf_reg_state *reg;
3769  	int i, j;
3770  
3771  	for (i = 0; i <= st->curframe; i++) {
3772  		func = st->frame[i];
3773  		for (j = 0; j < BPF_REG_FP; j++) {
3774  			reg = &func->regs[j];
3775  			if (reg->type != SCALAR_VALUE)
3776  				continue;
3777  			reg->precise = false;
3778  		}
3779  		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3780  			if (!is_spilled_reg(&func->stack[j]))
3781  				continue;
3782  			reg = &func->stack[j].spilled_ptr;
3783  			if (reg->type != SCALAR_VALUE)
3784  				continue;
3785  			reg->precise = false;
3786  		}
3787  	}
3788  }
3789  
idset_contains(struct bpf_idset * s,u32 id)3790  static bool idset_contains(struct bpf_idset *s, u32 id)
3791  {
3792  	u32 i;
3793  
3794  	for (i = 0; i < s->count; ++i)
3795  		if (s->ids[i] == id)
3796  			return true;
3797  
3798  	return false;
3799  }
3800  
idset_push(struct bpf_idset * s,u32 id)3801  static int idset_push(struct bpf_idset *s, u32 id)
3802  {
3803  	if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
3804  		return -EFAULT;
3805  	s->ids[s->count++] = id;
3806  	return 0;
3807  }
3808  
idset_reset(struct bpf_idset * s)3809  static void idset_reset(struct bpf_idset *s)
3810  {
3811  	s->count = 0;
3812  }
3813  
3814  /* Collect a set of IDs for all registers currently marked as precise in env->bt.
3815   * Mark all registers with these IDs as precise.
3816   */
mark_precise_scalar_ids(struct bpf_verifier_env * env,struct bpf_verifier_state * st)3817  static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
3818  {
3819  	struct bpf_idset *precise_ids = &env->idset_scratch;
3820  	struct backtrack_state *bt = &env->bt;
3821  	struct bpf_func_state *func;
3822  	struct bpf_reg_state *reg;
3823  	DECLARE_BITMAP(mask, 64);
3824  	int i, fr;
3825  
3826  	idset_reset(precise_ids);
3827  
3828  	for (fr = bt->frame; fr >= 0; fr--) {
3829  		func = st->frame[fr];
3830  
3831  		bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
3832  		for_each_set_bit(i, mask, 32) {
3833  			reg = &func->regs[i];
3834  			if (!reg->id || reg->type != SCALAR_VALUE)
3835  				continue;
3836  			if (idset_push(precise_ids, reg->id))
3837  				return -EFAULT;
3838  		}
3839  
3840  		bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
3841  		for_each_set_bit(i, mask, 64) {
3842  			if (i >= func->allocated_stack / BPF_REG_SIZE)
3843  				break;
3844  			if (!is_spilled_scalar_reg(&func->stack[i]))
3845  				continue;
3846  			reg = &func->stack[i].spilled_ptr;
3847  			if (!reg->id)
3848  				continue;
3849  			if (idset_push(precise_ids, reg->id))
3850  				return -EFAULT;
3851  		}
3852  	}
3853  
3854  	for (fr = 0; fr <= st->curframe; ++fr) {
3855  		func = st->frame[fr];
3856  
3857  		for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
3858  			reg = &func->regs[i];
3859  			if (!reg->id)
3860  				continue;
3861  			if (!idset_contains(precise_ids, reg->id))
3862  				continue;
3863  			bt_set_frame_reg(bt, fr, i);
3864  		}
3865  		for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
3866  			if (!is_spilled_scalar_reg(&func->stack[i]))
3867  				continue;
3868  			reg = &func->stack[i].spilled_ptr;
3869  			if (!reg->id)
3870  				continue;
3871  			if (!idset_contains(precise_ids, reg->id))
3872  				continue;
3873  			bt_set_frame_slot(bt, fr, i);
3874  		}
3875  	}
3876  
3877  	return 0;
3878  }
3879  
3880  /*
3881   * __mark_chain_precision() backtracks BPF program instruction sequence and
3882   * chain of verifier states making sure that register *regno* (if regno >= 0)
3883   * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
3884   * SCALARS, as well as any other registers and slots that contribute to
3885   * a tracked state of given registers/stack slots, depending on specific BPF
3886   * assembly instructions (see backtrack_insns() for exact instruction handling
3887   * logic). This backtracking relies on recorded jmp_history and is able to
3888   * traverse entire chain of parent states. This process ends only when all the
3889   * necessary registers/slots and their transitive dependencies are marked as
3890   * precise.
3891   *
3892   * One important and subtle aspect is that precise marks *do not matter* in
3893   * the currently verified state (current state). It is important to understand
3894   * why this is the case.
3895   *
3896   * First, note that current state is the state that is not yet "checkpointed",
3897   * i.e., it is not yet put into env->explored_states, and it has no children
3898   * states as well. It's ephemeral, and can end up either a) being discarded if
3899   * compatible explored state is found at some point or BPF_EXIT instruction is
3900   * reached or b) checkpointed and put into env->explored_states, branching out
3901   * into one or more children states.
3902   *
3903   * In the former case, precise markings in current state are completely
3904   * ignored by state comparison code (see regsafe() for details). Only
3905   * checkpointed ("old") state precise markings are important, and if old
3906   * state's register/slot is precise, regsafe() assumes current state's
3907   * register/slot as precise and checks value ranges exactly and precisely. If
3908   * states turn out to be compatible, current state's necessary precise
3909   * markings and any required parent states' precise markings are enforced
3910   * after the fact with propagate_precision() logic, after the fact. But it's
3911   * important to realize that in this case, even after marking current state
3912   * registers/slots as precise, we immediately discard current state. So what
3913   * actually matters is any of the precise markings propagated into current
3914   * state's parent states, which are always checkpointed (due to b) case above).
3915   * As such, for scenario a) it doesn't matter if current state has precise
3916   * markings set or not.
3917   *
3918   * Now, for the scenario b), checkpointing and forking into child(ren)
3919   * state(s). Note that before current state gets to checkpointing step, any
3920   * processed instruction always assumes precise SCALAR register/slot
3921   * knowledge: if precise value or range is useful to prune jump branch, BPF
3922   * verifier takes this opportunity enthusiastically. Similarly, when
3923   * register's value is used to calculate offset or memory address, exact
3924   * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
3925   * what we mentioned above about state comparison ignoring precise markings
3926   * during state comparison, BPF verifier ignores and also assumes precise
3927   * markings *at will* during instruction verification process. But as verifier
3928   * assumes precision, it also propagates any precision dependencies across
3929   * parent states, which are not yet finalized, so can be further restricted
3930   * based on new knowledge gained from restrictions enforced by their children
3931   * states. This is so that once those parent states are finalized, i.e., when
3932   * they have no more active children state, state comparison logic in
3933   * is_state_visited() would enforce strict and precise SCALAR ranges, if
3934   * required for correctness.
3935   *
3936   * To build a bit more intuition, note also that once a state is checkpointed,
3937   * the path we took to get to that state is not important. This is crucial
3938   * property for state pruning. When state is checkpointed and finalized at
3939   * some instruction index, it can be correctly and safely used to "short
3940   * circuit" any *compatible* state that reaches exactly the same instruction
3941   * index. I.e., if we jumped to that instruction from a completely different
3942   * code path than original finalized state was derived from, it doesn't
3943   * matter, current state can be discarded because from that instruction
3944   * forward having a compatible state will ensure we will safely reach the
3945   * exit. States describe preconditions for further exploration, but completely
3946   * forget the history of how we got here.
3947   *
3948   * This also means that even if we needed precise SCALAR range to get to
3949   * finalized state, but from that point forward *that same* SCALAR register is
3950   * never used in a precise context (i.e., it's precise value is not needed for
3951   * correctness), it's correct and safe to mark such register as "imprecise"
3952   * (i.e., precise marking set to false). This is what we rely on when we do
3953   * not set precise marking in current state. If no child state requires
3954   * precision for any given SCALAR register, it's safe to dictate that it can
3955   * be imprecise. If any child state does require this register to be precise,
3956   * we'll mark it precise later retroactively during precise markings
3957   * propagation from child state to parent states.
3958   *
3959   * Skipping precise marking setting in current state is a mild version of
3960   * relying on the above observation. But we can utilize this property even
3961   * more aggressively by proactively forgetting any precise marking in the
3962   * current state (which we inherited from the parent state), right before we
3963   * checkpoint it and branch off into new child state. This is done by
3964   * mark_all_scalars_imprecise() to hopefully get more permissive and generic
3965   * finalized states which help in short circuiting more future states.
3966   */
__mark_chain_precision(struct bpf_verifier_env * env,int regno)3967  static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
3968  {
3969  	struct backtrack_state *bt = &env->bt;
3970  	struct bpf_verifier_state *st = env->cur_state;
3971  	int first_idx = st->first_insn_idx;
3972  	int last_idx = env->insn_idx;
3973  	int subseq_idx = -1;
3974  	struct bpf_func_state *func;
3975  	struct bpf_reg_state *reg;
3976  	bool skip_first = true;
3977  	int i, fr, err;
3978  
3979  	if (!env->bpf_capable)
3980  		return 0;
3981  
3982  	/* set frame number from which we are starting to backtrack */
3983  	bt_init(bt, env->cur_state->curframe);
3984  
3985  	/* Do sanity checks against current state of register and/or stack
3986  	 * slot, but don't set precise flag in current state, as precision
3987  	 * tracking in the current state is unnecessary.
3988  	 */
3989  	func = st->frame[bt->frame];
3990  	if (regno >= 0) {
3991  		reg = &func->regs[regno];
3992  		if (reg->type != SCALAR_VALUE) {
3993  			WARN_ONCE(1, "backtracing misuse");
3994  			return -EFAULT;
3995  		}
3996  		bt_set_reg(bt, regno);
3997  	}
3998  
3999  	if (bt_empty(bt))
4000  		return 0;
4001  
4002  	for (;;) {
4003  		DECLARE_BITMAP(mask, 64);
4004  		u32 history = st->jmp_history_cnt;
4005  
4006  		if (env->log.level & BPF_LOG_LEVEL2) {
4007  			verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4008  				bt->frame, last_idx, first_idx, subseq_idx);
4009  		}
4010  
4011  		/* If some register with scalar ID is marked as precise,
4012  		 * make sure that all registers sharing this ID are also precise.
4013  		 * This is needed to estimate effect of find_equal_scalars().
4014  		 * Do this at the last instruction of each state,
4015  		 * bpf_reg_state::id fields are valid for these instructions.
4016  		 *
4017  		 * Allows to track precision in situation like below:
4018  		 *
4019  		 *     r2 = unknown value
4020  		 *     ...
4021  		 *   --- state #0 ---
4022  		 *     ...
4023  		 *     r1 = r2                 // r1 and r2 now share the same ID
4024  		 *     ...
4025  		 *   --- state #1 {r1.id = A, r2.id = A} ---
4026  		 *     ...
4027  		 *     if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
4028  		 *     ...
4029  		 *   --- state #2 {r1.id = A, r2.id = A} ---
4030  		 *     r3 = r10
4031  		 *     r3 += r1                // need to mark both r1 and r2
4032  		 */
4033  		if (mark_precise_scalar_ids(env, st))
4034  			return -EFAULT;
4035  
4036  		if (last_idx < 0) {
4037  			/* we are at the entry into subprog, which
4038  			 * is expected for global funcs, but only if
4039  			 * requested precise registers are R1-R5
4040  			 * (which are global func's input arguments)
4041  			 */
4042  			if (st->curframe == 0 &&
4043  			    st->frame[0]->subprogno > 0 &&
4044  			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
4045  			    bt_stack_mask(bt) == 0 &&
4046  			    (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
4047  				bitmap_from_u64(mask, bt_reg_mask(bt));
4048  				for_each_set_bit(i, mask, 32) {
4049  					reg = &st->frame[0]->regs[i];
4050  					bt_clear_reg(bt, i);
4051  					if (reg->type == SCALAR_VALUE)
4052  						reg->precise = true;
4053  				}
4054  				return 0;
4055  			}
4056  
4057  			verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
4058  				st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
4059  			WARN_ONCE(1, "verifier backtracking bug");
4060  			return -EFAULT;
4061  		}
4062  
4063  		for (i = last_idx;;) {
4064  			if (skip_first) {
4065  				err = 0;
4066  				skip_first = false;
4067  			} else {
4068  				err = backtrack_insn(env, i, subseq_idx, bt);
4069  			}
4070  			if (err == -ENOTSUPP) {
4071  				mark_all_scalars_precise(env, env->cur_state);
4072  				bt_reset(bt);
4073  				return 0;
4074  			} else if (err) {
4075  				return err;
4076  			}
4077  			if (bt_empty(bt))
4078  				/* Found assignment(s) into tracked register in this state.
4079  				 * Since this state is already marked, just return.
4080  				 * Nothing to be tracked further in the parent state.
4081  				 */
4082  				return 0;
4083  			if (i == first_idx)
4084  				break;
4085  			subseq_idx = i;
4086  			i = get_prev_insn_idx(st, i, &history);
4087  			if (i >= env->prog->len) {
4088  				/* This can happen if backtracking reached insn 0
4089  				 * and there are still reg_mask or stack_mask
4090  				 * to backtrack.
4091  				 * It means the backtracking missed the spot where
4092  				 * particular register was initialized with a constant.
4093  				 */
4094  				verbose(env, "BUG backtracking idx %d\n", i);
4095  				WARN_ONCE(1, "verifier backtracking bug");
4096  				return -EFAULT;
4097  			}
4098  		}
4099  		st = st->parent;
4100  		if (!st)
4101  			break;
4102  
4103  		for (fr = bt->frame; fr >= 0; fr--) {
4104  			func = st->frame[fr];
4105  			bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
4106  			for_each_set_bit(i, mask, 32) {
4107  				reg = &func->regs[i];
4108  				if (reg->type != SCALAR_VALUE) {
4109  					bt_clear_frame_reg(bt, fr, i);
4110  					continue;
4111  				}
4112  				if (reg->precise)
4113  					bt_clear_frame_reg(bt, fr, i);
4114  				else
4115  					reg->precise = true;
4116  			}
4117  
4118  			bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
4119  			for_each_set_bit(i, mask, 64) {
4120  				if (i >= func->allocated_stack / BPF_REG_SIZE) {
4121  					/* the sequence of instructions:
4122  					 * 2: (bf) r3 = r10
4123  					 * 3: (7b) *(u64 *)(r3 -8) = r0
4124  					 * 4: (79) r4 = *(u64 *)(r10 -8)
4125  					 * doesn't contain jmps. It's backtracked
4126  					 * as a single block.
4127  					 * During backtracking insn 3 is not recognized as
4128  					 * stack access, so at the end of backtracking
4129  					 * stack slot fp-8 is still marked in stack_mask.
4130  					 * However the parent state may not have accessed
4131  					 * fp-8 and it's "unallocated" stack space.
4132  					 * In such case fallback to conservative.
4133  					 */
4134  					mark_all_scalars_precise(env, env->cur_state);
4135  					bt_reset(bt);
4136  					return 0;
4137  				}
4138  
4139  				if (!is_spilled_scalar_reg(&func->stack[i])) {
4140  					bt_clear_frame_slot(bt, fr, i);
4141  					continue;
4142  				}
4143  				reg = &func->stack[i].spilled_ptr;
4144  				if (reg->precise)
4145  					bt_clear_frame_slot(bt, fr, i);
4146  				else
4147  					reg->precise = true;
4148  			}
4149  			if (env->log.level & BPF_LOG_LEVEL2) {
4150  				fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4151  					     bt_frame_reg_mask(bt, fr));
4152  				verbose(env, "mark_precise: frame%d: parent state regs=%s ",
4153  					fr, env->tmp_str_buf);
4154  				fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4155  					       bt_frame_stack_mask(bt, fr));
4156  				verbose(env, "stack=%s: ", env->tmp_str_buf);
4157  				print_verifier_state(env, func, true);
4158  			}
4159  		}
4160  
4161  		if (bt_empty(bt))
4162  			return 0;
4163  
4164  		subseq_idx = first_idx;
4165  		last_idx = st->last_insn_idx;
4166  		first_idx = st->first_insn_idx;
4167  	}
4168  
4169  	/* if we still have requested precise regs or slots, we missed
4170  	 * something (e.g., stack access through non-r10 register), so
4171  	 * fallback to marking all precise
4172  	 */
4173  	if (!bt_empty(bt)) {
4174  		mark_all_scalars_precise(env, env->cur_state);
4175  		bt_reset(bt);
4176  	}
4177  
4178  	return 0;
4179  }
4180  
mark_chain_precision(struct bpf_verifier_env * env,int regno)4181  int mark_chain_precision(struct bpf_verifier_env *env, int regno)
4182  {
4183  	return __mark_chain_precision(env, regno);
4184  }
4185  
4186  /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
4187   * desired reg and stack masks across all relevant frames
4188   */
mark_chain_precision_batch(struct bpf_verifier_env * env)4189  static int mark_chain_precision_batch(struct bpf_verifier_env *env)
4190  {
4191  	return __mark_chain_precision(env, -1);
4192  }
4193  
is_spillable_regtype(enum bpf_reg_type type)4194  static bool is_spillable_regtype(enum bpf_reg_type type)
4195  {
4196  	switch (base_type(type)) {
4197  	case PTR_TO_MAP_VALUE:
4198  	case PTR_TO_STACK:
4199  	case PTR_TO_CTX:
4200  	case PTR_TO_PACKET:
4201  	case PTR_TO_PACKET_META:
4202  	case PTR_TO_PACKET_END:
4203  	case PTR_TO_FLOW_KEYS:
4204  	case CONST_PTR_TO_MAP:
4205  	case PTR_TO_SOCKET:
4206  	case PTR_TO_SOCK_COMMON:
4207  	case PTR_TO_TCP_SOCK:
4208  	case PTR_TO_XDP_SOCK:
4209  	case PTR_TO_BTF_ID:
4210  	case PTR_TO_BUF:
4211  	case PTR_TO_MEM:
4212  	case PTR_TO_FUNC:
4213  	case PTR_TO_MAP_KEY:
4214  		return true;
4215  	default:
4216  		return false;
4217  	}
4218  }
4219  
4220  /* Does this register contain a constant zero? */
register_is_null(struct bpf_reg_state * reg)4221  static bool register_is_null(struct bpf_reg_state *reg)
4222  {
4223  	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
4224  }
4225  
register_is_const(struct bpf_reg_state * reg)4226  static bool register_is_const(struct bpf_reg_state *reg)
4227  {
4228  	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
4229  }
4230  
__is_scalar_unbounded(struct bpf_reg_state * reg)4231  static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
4232  {
4233  	return tnum_is_unknown(reg->var_off) &&
4234  	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
4235  	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
4236  	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
4237  	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
4238  }
4239  
register_is_bounded(struct bpf_reg_state * reg)4240  static bool register_is_bounded(struct bpf_reg_state *reg)
4241  {
4242  	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
4243  }
4244  
__is_pointer_value(bool allow_ptr_leaks,const struct bpf_reg_state * reg)4245  static bool __is_pointer_value(bool allow_ptr_leaks,
4246  			       const struct bpf_reg_state *reg)
4247  {
4248  	if (allow_ptr_leaks)
4249  		return false;
4250  
4251  	return reg->type != SCALAR_VALUE;
4252  }
4253  
4254  /* Copy src state preserving dst->parent and dst->live fields */
copy_register_state(struct bpf_reg_state * dst,const struct bpf_reg_state * src)4255  static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
4256  {
4257  	struct bpf_reg_state *parent = dst->parent;
4258  	enum bpf_reg_liveness live = dst->live;
4259  
4260  	*dst = *src;
4261  	dst->parent = parent;
4262  	dst->live = live;
4263  }
4264  
save_register_state(struct bpf_func_state * state,int spi,struct bpf_reg_state * reg,int size)4265  static void save_register_state(struct bpf_func_state *state,
4266  				int spi, struct bpf_reg_state *reg,
4267  				int size)
4268  {
4269  	int i;
4270  
4271  	copy_register_state(&state->stack[spi].spilled_ptr, reg);
4272  	if (size == BPF_REG_SIZE)
4273  		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4274  
4275  	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
4276  		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
4277  
4278  	/* size < 8 bytes spill */
4279  	for (; i; i--)
4280  		scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
4281  }
4282  
is_bpf_st_mem(struct bpf_insn * insn)4283  static bool is_bpf_st_mem(struct bpf_insn *insn)
4284  {
4285  	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
4286  }
4287  
4288  /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
4289   * stack boundary and alignment are checked in check_mem_access()
4290   */
check_stack_write_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int off,int size,int value_regno,int insn_idx)4291  static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
4292  				       /* stack frame we're writing to */
4293  				       struct bpf_func_state *state,
4294  				       int off, int size, int value_regno,
4295  				       int insn_idx)
4296  {
4297  	struct bpf_func_state *cur; /* state of the current function */
4298  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
4299  	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4300  	struct bpf_reg_state *reg = NULL;
4301  	u32 dst_reg = insn->dst_reg;
4302  
4303  	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
4304  	if (err)
4305  		return err;
4306  	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
4307  	 * so it's aligned access and [off, off + size) are within stack limits
4308  	 */
4309  	if (!env->allow_ptr_leaks &&
4310  	    state->stack[spi].slot_type[0] == STACK_SPILL &&
4311  	    size != BPF_REG_SIZE) {
4312  		verbose(env, "attempt to corrupt spilled pointer on stack\n");
4313  		return -EACCES;
4314  	}
4315  
4316  	cur = env->cur_state->frame[env->cur_state->curframe];
4317  	if (value_regno >= 0)
4318  		reg = &cur->regs[value_regno];
4319  	if (!env->bypass_spec_v4) {
4320  		bool sanitize = reg && is_spillable_regtype(reg->type);
4321  
4322  		for (i = 0; i < size; i++) {
4323  			u8 type = state->stack[spi].slot_type[i];
4324  
4325  			if (type != STACK_MISC && type != STACK_ZERO) {
4326  				sanitize = true;
4327  				break;
4328  			}
4329  		}
4330  
4331  		if (sanitize)
4332  			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
4333  	}
4334  
4335  	err = destroy_if_dynptr_stack_slot(env, state, spi);
4336  	if (err)
4337  		return err;
4338  
4339  	mark_stack_slot_scratched(env, spi);
4340  	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
4341  	    !register_is_null(reg) && env->bpf_capable) {
4342  		if (dst_reg != BPF_REG_FP) {
4343  			/* The backtracking logic can only recognize explicit
4344  			 * stack slot address like [fp - 8]. Other spill of
4345  			 * scalar via different register has to be conservative.
4346  			 * Backtrack from here and mark all registers as precise
4347  			 * that contributed into 'reg' being a constant.
4348  			 */
4349  			err = mark_chain_precision(env, value_regno);
4350  			if (err)
4351  				return err;
4352  		}
4353  		save_register_state(state, spi, reg, size);
4354  		/* Break the relation on a narrowing spill. */
4355  		if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
4356  			state->stack[spi].spilled_ptr.id = 0;
4357  	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
4358  		   insn->imm != 0 && env->bpf_capable) {
4359  		struct bpf_reg_state fake_reg = {};
4360  
4361  		__mark_reg_known(&fake_reg, (u32)insn->imm);
4362  		fake_reg.type = SCALAR_VALUE;
4363  		save_register_state(state, spi, &fake_reg, size);
4364  	} else if (reg && is_spillable_regtype(reg->type)) {
4365  		/* register containing pointer is being spilled into stack */
4366  		if (size != BPF_REG_SIZE) {
4367  			verbose_linfo(env, insn_idx, "; ");
4368  			verbose(env, "invalid size of register spill\n");
4369  			return -EACCES;
4370  		}
4371  		if (state != cur && reg->type == PTR_TO_STACK) {
4372  			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
4373  			return -EINVAL;
4374  		}
4375  		save_register_state(state, spi, reg, size);
4376  	} else {
4377  		u8 type = STACK_MISC;
4378  
4379  		/* regular write of data into stack destroys any spilled ptr */
4380  		state->stack[spi].spilled_ptr.type = NOT_INIT;
4381  		/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
4382  		if (is_stack_slot_special(&state->stack[spi]))
4383  			for (i = 0; i < BPF_REG_SIZE; i++)
4384  				scrub_spilled_slot(&state->stack[spi].slot_type[i]);
4385  
4386  		/* only mark the slot as written if all 8 bytes were written
4387  		 * otherwise read propagation may incorrectly stop too soon
4388  		 * when stack slots are partially written.
4389  		 * This heuristic means that read propagation will be
4390  		 * conservative, since it will add reg_live_read marks
4391  		 * to stack slots all the way to first state when programs
4392  		 * writes+reads less than 8 bytes
4393  		 */
4394  		if (size == BPF_REG_SIZE)
4395  			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4396  
4397  		/* when we zero initialize stack slots mark them as such */
4398  		if ((reg && register_is_null(reg)) ||
4399  		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
4400  			/* backtracking doesn't work for STACK_ZERO yet. */
4401  			err = mark_chain_precision(env, value_regno);
4402  			if (err)
4403  				return err;
4404  			type = STACK_ZERO;
4405  		}
4406  
4407  		/* Mark slots affected by this stack write. */
4408  		for (i = 0; i < size; i++)
4409  			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
4410  				type;
4411  	}
4412  	return 0;
4413  }
4414  
4415  /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
4416   * known to contain a variable offset.
4417   * This function checks whether the write is permitted and conservatively
4418   * tracks the effects of the write, considering that each stack slot in the
4419   * dynamic range is potentially written to.
4420   *
4421   * 'off' includes 'regno->off'.
4422   * 'value_regno' can be -1, meaning that an unknown value is being written to
4423   * the stack.
4424   *
4425   * Spilled pointers in range are not marked as written because we don't know
4426   * what's going to be actually written. This means that read propagation for
4427   * future reads cannot be terminated by this write.
4428   *
4429   * For privileged programs, uninitialized stack slots are considered
4430   * initialized by this write (even though we don't know exactly what offsets
4431   * are going to be written to). The idea is that we don't want the verifier to
4432   * reject future reads that access slots written to through variable offsets.
4433   */
check_stack_write_var_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int ptr_regno,int off,int size,int value_regno,int insn_idx)4434  static int check_stack_write_var_off(struct bpf_verifier_env *env,
4435  				     /* func where register points to */
4436  				     struct bpf_func_state *state,
4437  				     int ptr_regno, int off, int size,
4438  				     int value_regno, int insn_idx)
4439  {
4440  	struct bpf_func_state *cur; /* state of the current function */
4441  	int min_off, max_off;
4442  	int i, err;
4443  	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
4444  	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4445  	bool writing_zero = false;
4446  	/* set if the fact that we're writing a zero is used to let any
4447  	 * stack slots remain STACK_ZERO
4448  	 */
4449  	bool zero_used = false;
4450  
4451  	cur = env->cur_state->frame[env->cur_state->curframe];
4452  	ptr_reg = &cur->regs[ptr_regno];
4453  	min_off = ptr_reg->smin_value + off;
4454  	max_off = ptr_reg->smax_value + off + size;
4455  	if (value_regno >= 0)
4456  		value_reg = &cur->regs[value_regno];
4457  	if ((value_reg && register_is_null(value_reg)) ||
4458  	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
4459  		writing_zero = true;
4460  
4461  	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
4462  	if (err)
4463  		return err;
4464  
4465  	for (i = min_off; i < max_off; i++) {
4466  		int spi;
4467  
4468  		spi = __get_spi(i);
4469  		err = destroy_if_dynptr_stack_slot(env, state, spi);
4470  		if (err)
4471  			return err;
4472  	}
4473  
4474  	/* Variable offset writes destroy any spilled pointers in range. */
4475  	for (i = min_off; i < max_off; i++) {
4476  		u8 new_type, *stype;
4477  		int slot, spi;
4478  
4479  		slot = -i - 1;
4480  		spi = slot / BPF_REG_SIZE;
4481  		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4482  		mark_stack_slot_scratched(env, spi);
4483  
4484  		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
4485  			/* Reject the write if range we may write to has not
4486  			 * been initialized beforehand. If we didn't reject
4487  			 * here, the ptr status would be erased below (even
4488  			 * though not all slots are actually overwritten),
4489  			 * possibly opening the door to leaks.
4490  			 *
4491  			 * We do however catch STACK_INVALID case below, and
4492  			 * only allow reading possibly uninitialized memory
4493  			 * later for CAP_PERFMON, as the write may not happen to
4494  			 * that slot.
4495  			 */
4496  			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
4497  				insn_idx, i);
4498  			return -EINVAL;
4499  		}
4500  
4501  		/* Erase all spilled pointers. */
4502  		state->stack[spi].spilled_ptr.type = NOT_INIT;
4503  
4504  		/* Update the slot type. */
4505  		new_type = STACK_MISC;
4506  		if (writing_zero && *stype == STACK_ZERO) {
4507  			new_type = STACK_ZERO;
4508  			zero_used = true;
4509  		}
4510  		/* If the slot is STACK_INVALID, we check whether it's OK to
4511  		 * pretend that it will be initialized by this write. The slot
4512  		 * might not actually be written to, and so if we mark it as
4513  		 * initialized future reads might leak uninitialized memory.
4514  		 * For privileged programs, we will accept such reads to slots
4515  		 * that may or may not be written because, if we're reject
4516  		 * them, the error would be too confusing.
4517  		 */
4518  		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
4519  			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
4520  					insn_idx, i);
4521  			return -EINVAL;
4522  		}
4523  		*stype = new_type;
4524  	}
4525  	if (zero_used) {
4526  		/* backtracking doesn't work for STACK_ZERO yet. */
4527  		err = mark_chain_precision(env, value_regno);
4528  		if (err)
4529  			return err;
4530  	}
4531  	return 0;
4532  }
4533  
4534  /* When register 'dst_regno' is assigned some values from stack[min_off,
4535   * max_off), we set the register's type according to the types of the
4536   * respective stack slots. If all the stack values are known to be zeros, then
4537   * so is the destination reg. Otherwise, the register is considered to be
4538   * SCALAR. This function does not deal with register filling; the caller must
4539   * ensure that all spilled registers in the stack range have been marked as
4540   * read.
4541   */
mark_reg_stack_read(struct bpf_verifier_env * env,struct bpf_func_state * ptr_state,int min_off,int max_off,int dst_regno)4542  static void mark_reg_stack_read(struct bpf_verifier_env *env,
4543  				/* func where src register points to */
4544  				struct bpf_func_state *ptr_state,
4545  				int min_off, int max_off, int dst_regno)
4546  {
4547  	struct bpf_verifier_state *vstate = env->cur_state;
4548  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4549  	int i, slot, spi;
4550  	u8 *stype;
4551  	int zeros = 0;
4552  
4553  	for (i = min_off; i < max_off; i++) {
4554  		slot = -i - 1;
4555  		spi = slot / BPF_REG_SIZE;
4556  		mark_stack_slot_scratched(env, spi);
4557  		stype = ptr_state->stack[spi].slot_type;
4558  		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
4559  			break;
4560  		zeros++;
4561  	}
4562  	if (zeros == max_off - min_off) {
4563  		/* any access_size read into register is zero extended,
4564  		 * so the whole register == const_zero
4565  		 */
4566  		__mark_reg_const_zero(&state->regs[dst_regno]);
4567  		/* backtracking doesn't support STACK_ZERO yet,
4568  		 * so mark it precise here, so that later
4569  		 * backtracking can stop here.
4570  		 * Backtracking may not need this if this register
4571  		 * doesn't participate in pointer adjustment.
4572  		 * Forward propagation of precise flag is not
4573  		 * necessary either. This mark is only to stop
4574  		 * backtracking. Any register that contributed
4575  		 * to const 0 was marked precise before spill.
4576  		 */
4577  		state->regs[dst_regno].precise = true;
4578  	} else {
4579  		/* have read misc data from the stack */
4580  		mark_reg_unknown(env, state->regs, dst_regno);
4581  	}
4582  	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4583  }
4584  
4585  /* Read the stack at 'off' and put the results into the register indicated by
4586   * 'dst_regno'. It handles reg filling if the addressed stack slot is a
4587   * spilled reg.
4588   *
4589   * 'dst_regno' can be -1, meaning that the read value is not going to a
4590   * register.
4591   *
4592   * The access is assumed to be within the current stack bounds.
4593   */
check_stack_read_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * reg_state,int off,int size,int dst_regno)4594  static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
4595  				      /* func where src register points to */
4596  				      struct bpf_func_state *reg_state,
4597  				      int off, int size, int dst_regno)
4598  {
4599  	struct bpf_verifier_state *vstate = env->cur_state;
4600  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4601  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
4602  	struct bpf_reg_state *reg;
4603  	u8 *stype, type;
4604  
4605  	stype = reg_state->stack[spi].slot_type;
4606  	reg = &reg_state->stack[spi].spilled_ptr;
4607  
4608  	mark_stack_slot_scratched(env, spi);
4609  
4610  	if (is_spilled_reg(&reg_state->stack[spi])) {
4611  		u8 spill_size = 1;
4612  
4613  		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
4614  			spill_size++;
4615  
4616  		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
4617  			if (reg->type != SCALAR_VALUE) {
4618  				verbose_linfo(env, env->insn_idx, "; ");
4619  				verbose(env, "invalid size of register fill\n");
4620  				return -EACCES;
4621  			}
4622  
4623  			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4624  			if (dst_regno < 0)
4625  				return 0;
4626  
4627  			if (!(off % BPF_REG_SIZE) && size == spill_size) {
4628  				/* The earlier check_reg_arg() has decided the
4629  				 * subreg_def for this insn.  Save it first.
4630  				 */
4631  				s32 subreg_def = state->regs[dst_regno].subreg_def;
4632  
4633  				copy_register_state(&state->regs[dst_regno], reg);
4634  				state->regs[dst_regno].subreg_def = subreg_def;
4635  			} else {
4636  				for (i = 0; i < size; i++) {
4637  					type = stype[(slot - i) % BPF_REG_SIZE];
4638  					if (type == STACK_SPILL)
4639  						continue;
4640  					if (type == STACK_MISC)
4641  						continue;
4642  					if (type == STACK_INVALID && env->allow_uninit_stack)
4643  						continue;
4644  					verbose(env, "invalid read from stack off %d+%d size %d\n",
4645  						off, i, size);
4646  					return -EACCES;
4647  				}
4648  				mark_reg_unknown(env, state->regs, dst_regno);
4649  			}
4650  			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4651  			return 0;
4652  		}
4653  
4654  		if (dst_regno >= 0) {
4655  			/* restore register state from stack */
4656  			copy_register_state(&state->regs[dst_regno], reg);
4657  			/* mark reg as written since spilled pointer state likely
4658  			 * has its liveness marks cleared by is_state_visited()
4659  			 * which resets stack/reg liveness for state transitions
4660  			 */
4661  			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4662  		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
4663  			/* If dst_regno==-1, the caller is asking us whether
4664  			 * it is acceptable to use this value as a SCALAR_VALUE
4665  			 * (e.g. for XADD).
4666  			 * We must not allow unprivileged callers to do that
4667  			 * with spilled pointers.
4668  			 */
4669  			verbose(env, "leaking pointer from stack off %d\n",
4670  				off);
4671  			return -EACCES;
4672  		}
4673  		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4674  	} else {
4675  		for (i = 0; i < size; i++) {
4676  			type = stype[(slot - i) % BPF_REG_SIZE];
4677  			if (type == STACK_MISC)
4678  				continue;
4679  			if (type == STACK_ZERO)
4680  				continue;
4681  			if (type == STACK_INVALID && env->allow_uninit_stack)
4682  				continue;
4683  			verbose(env, "invalid read from stack off %d+%d size %d\n",
4684  				off, i, size);
4685  			return -EACCES;
4686  		}
4687  		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4688  		if (dst_regno >= 0)
4689  			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
4690  	}
4691  	return 0;
4692  }
4693  
4694  enum bpf_access_src {
4695  	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
4696  	ACCESS_HELPER = 2,  /* the access is performed by a helper */
4697  };
4698  
4699  static int check_stack_range_initialized(struct bpf_verifier_env *env,
4700  					 int regno, int off, int access_size,
4701  					 bool zero_size_allowed,
4702  					 enum bpf_access_src type,
4703  					 struct bpf_call_arg_meta *meta);
4704  
reg_state(struct bpf_verifier_env * env,int regno)4705  static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
4706  {
4707  	return cur_regs(env) + regno;
4708  }
4709  
4710  /* Read the stack at 'ptr_regno + off' and put the result into the register
4711   * 'dst_regno'.
4712   * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
4713   * but not its variable offset.
4714   * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
4715   *
4716   * As opposed to check_stack_read_fixed_off, this function doesn't deal with
4717   * filling registers (i.e. reads of spilled register cannot be detected when
4718   * the offset is not fixed). We conservatively mark 'dst_regno' as containing
4719   * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
4720   * offset; for a fixed offset check_stack_read_fixed_off should be used
4721   * instead.
4722   */
check_stack_read_var_off(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)4723  static int check_stack_read_var_off(struct bpf_verifier_env *env,
4724  				    int ptr_regno, int off, int size, int dst_regno)
4725  {
4726  	/* The state of the source register. */
4727  	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4728  	struct bpf_func_state *ptr_state = func(env, reg);
4729  	int err;
4730  	int min_off, max_off;
4731  
4732  	/* Note that we pass a NULL meta, so raw access will not be permitted.
4733  	 */
4734  	err = check_stack_range_initialized(env, ptr_regno, off, size,
4735  					    false, ACCESS_DIRECT, NULL);
4736  	if (err)
4737  		return err;
4738  
4739  	min_off = reg->smin_value + off;
4740  	max_off = reg->smax_value + off;
4741  	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
4742  	return 0;
4743  }
4744  
4745  /* check_stack_read dispatches to check_stack_read_fixed_off or
4746   * check_stack_read_var_off.
4747   *
4748   * The caller must ensure that the offset falls within the allocated stack
4749   * bounds.
4750   *
4751   * 'dst_regno' is a register which will receive the value from the stack. It
4752   * can be -1, meaning that the read value is not going to a register.
4753   */
check_stack_read(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)4754  static int check_stack_read(struct bpf_verifier_env *env,
4755  			    int ptr_regno, int off, int size,
4756  			    int dst_regno)
4757  {
4758  	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4759  	struct bpf_func_state *state = func(env, reg);
4760  	int err;
4761  	/* Some accesses are only permitted with a static offset. */
4762  	bool var_off = !tnum_is_const(reg->var_off);
4763  
4764  	/* The offset is required to be static when reads don't go to a
4765  	 * register, in order to not leak pointers (see
4766  	 * check_stack_read_fixed_off).
4767  	 */
4768  	if (dst_regno < 0 && var_off) {
4769  		char tn_buf[48];
4770  
4771  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4772  		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
4773  			tn_buf, off, size);
4774  		return -EACCES;
4775  	}
4776  	/* Variable offset is prohibited for unprivileged mode for simplicity
4777  	 * since it requires corresponding support in Spectre masking for stack
4778  	 * ALU. See also retrieve_ptr_limit(). The check in
4779  	 * check_stack_access_for_ptr_arithmetic() called by
4780  	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
4781  	 * with variable offsets, therefore no check is required here. Further,
4782  	 * just checking it here would be insufficient as speculative stack
4783  	 * writes could still lead to unsafe speculative behaviour.
4784  	 */
4785  	if (!var_off) {
4786  		off += reg->var_off.value;
4787  		err = check_stack_read_fixed_off(env, state, off, size,
4788  						 dst_regno);
4789  	} else {
4790  		/* Variable offset stack reads need more conservative handling
4791  		 * than fixed offset ones. Note that dst_regno >= 0 on this
4792  		 * branch.
4793  		 */
4794  		err = check_stack_read_var_off(env, ptr_regno, off, size,
4795  					       dst_regno);
4796  	}
4797  	return err;
4798  }
4799  
4800  
4801  /* check_stack_write dispatches to check_stack_write_fixed_off or
4802   * check_stack_write_var_off.
4803   *
4804   * 'ptr_regno' is the register used as a pointer into the stack.
4805   * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
4806   * 'value_regno' is the register whose value we're writing to the stack. It can
4807   * be -1, meaning that we're not writing from a register.
4808   *
4809   * The caller must ensure that the offset falls within the maximum stack size.
4810   */
check_stack_write(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int value_regno,int insn_idx)4811  static int check_stack_write(struct bpf_verifier_env *env,
4812  			     int ptr_regno, int off, int size,
4813  			     int value_regno, int insn_idx)
4814  {
4815  	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4816  	struct bpf_func_state *state = func(env, reg);
4817  	int err;
4818  
4819  	if (tnum_is_const(reg->var_off)) {
4820  		off += reg->var_off.value;
4821  		err = check_stack_write_fixed_off(env, state, off, size,
4822  						  value_regno, insn_idx);
4823  	} else {
4824  		/* Variable offset stack reads need more conservative handling
4825  		 * than fixed offset ones.
4826  		 */
4827  		err = check_stack_write_var_off(env, state,
4828  						ptr_regno, off, size,
4829  						value_regno, insn_idx);
4830  	}
4831  	return err;
4832  }
4833  
check_map_access_type(struct bpf_verifier_env * env,u32 regno,int off,int size,enum bpf_access_type type)4834  static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
4835  				 int off, int size, enum bpf_access_type type)
4836  {
4837  	struct bpf_reg_state *regs = cur_regs(env);
4838  	struct bpf_map *map = regs[regno].map_ptr;
4839  	u32 cap = bpf_map_flags_to_cap(map);
4840  
4841  	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4842  		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
4843  			map->value_size, off, size);
4844  		return -EACCES;
4845  	}
4846  
4847  	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4848  		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
4849  			map->value_size, off, size);
4850  		return -EACCES;
4851  	}
4852  
4853  	return 0;
4854  }
4855  
4856  /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
__check_mem_access(struct bpf_verifier_env * env,int regno,int off,int size,u32 mem_size,bool zero_size_allowed)4857  static int __check_mem_access(struct bpf_verifier_env *env, int regno,
4858  			      int off, int size, u32 mem_size,
4859  			      bool zero_size_allowed)
4860  {
4861  	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4862  	struct bpf_reg_state *reg;
4863  
4864  	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4865  		return 0;
4866  
4867  	reg = &cur_regs(env)[regno];
4868  	switch (reg->type) {
4869  	case PTR_TO_MAP_KEY:
4870  		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4871  			mem_size, off, size);
4872  		break;
4873  	case PTR_TO_MAP_VALUE:
4874  		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4875  			mem_size, off, size);
4876  		break;
4877  	case PTR_TO_PACKET:
4878  	case PTR_TO_PACKET_META:
4879  	case PTR_TO_PACKET_END:
4880  		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
4881  			off, size, regno, reg->id, off, mem_size);
4882  		break;
4883  	case PTR_TO_MEM:
4884  	default:
4885  		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4886  			mem_size, off, size);
4887  	}
4888  
4889  	return -EACCES;
4890  }
4891  
4892  /* check read/write into a memory region with possible variable offset */
check_mem_region_access(struct bpf_verifier_env * env,u32 regno,int off,int size,u32 mem_size,bool zero_size_allowed)4893  static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
4894  				   int off, int size, u32 mem_size,
4895  				   bool zero_size_allowed)
4896  {
4897  	struct bpf_verifier_state *vstate = env->cur_state;
4898  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4899  	struct bpf_reg_state *reg = &state->regs[regno];
4900  	int err;
4901  
4902  	/* We may have adjusted the register pointing to memory region, so we
4903  	 * need to try adding each of min_value and max_value to off
4904  	 * to make sure our theoretical access will be safe.
4905  	 *
4906  	 * The minimum value is only important with signed
4907  	 * comparisons where we can't assume the floor of a
4908  	 * value is 0.  If we are using signed variables for our
4909  	 * index'es we need to make sure that whatever we use
4910  	 * will have a set floor within our range.
4911  	 */
4912  	if (reg->smin_value < 0 &&
4913  	    (reg->smin_value == S64_MIN ||
4914  	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
4915  	      reg->smin_value + off < 0)) {
4916  		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4917  			regno);
4918  		return -EACCES;
4919  	}
4920  	err = __check_mem_access(env, regno, reg->smin_value + off, size,
4921  				 mem_size, zero_size_allowed);
4922  	if (err) {
4923  		verbose(env, "R%d min value is outside of the allowed memory range\n",
4924  			regno);
4925  		return err;
4926  	}
4927  
4928  	/* If we haven't set a max value then we need to bail since we can't be
4929  	 * sure we won't do bad things.
4930  	 * If reg->umax_value + off could overflow, treat that as unbounded too.
4931  	 */
4932  	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
4933  		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
4934  			regno);
4935  		return -EACCES;
4936  	}
4937  	err = __check_mem_access(env, regno, reg->umax_value + off, size,
4938  				 mem_size, zero_size_allowed);
4939  	if (err) {
4940  		verbose(env, "R%d max value is outside of the allowed memory range\n",
4941  			regno);
4942  		return err;
4943  	}
4944  
4945  	return 0;
4946  }
4947  
__check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,bool fixed_off_ok)4948  static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4949  			       const struct bpf_reg_state *reg, int regno,
4950  			       bool fixed_off_ok)
4951  {
4952  	/* Access to this pointer-typed register or passing it to a helper
4953  	 * is only allowed in its original, unmodified form.
4954  	 */
4955  
4956  	if (reg->off < 0) {
4957  		verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
4958  			reg_type_str(env, reg->type), regno, reg->off);
4959  		return -EACCES;
4960  	}
4961  
4962  	if (!fixed_off_ok && reg->off) {
4963  		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
4964  			reg_type_str(env, reg->type), regno, reg->off);
4965  		return -EACCES;
4966  	}
4967  
4968  	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4969  		char tn_buf[48];
4970  
4971  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4972  		verbose(env, "variable %s access var_off=%s disallowed\n",
4973  			reg_type_str(env, reg->type), tn_buf);
4974  		return -EACCES;
4975  	}
4976  
4977  	return 0;
4978  }
4979  
check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno)4980  int check_ptr_off_reg(struct bpf_verifier_env *env,
4981  		      const struct bpf_reg_state *reg, int regno)
4982  {
4983  	return __check_ptr_off_reg(env, reg, regno, false);
4984  }
4985  
map_kptr_match_type(struct bpf_verifier_env * env,struct btf_field * kptr_field,struct bpf_reg_state * reg,u32 regno)4986  static int map_kptr_match_type(struct bpf_verifier_env *env,
4987  			       struct btf_field *kptr_field,
4988  			       struct bpf_reg_state *reg, u32 regno)
4989  {
4990  	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4991  	int perm_flags;
4992  	const char *reg_name = "";
4993  
4994  	if (btf_is_kernel(reg->btf)) {
4995  		perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4996  
4997  		/* Only unreferenced case accepts untrusted pointers */
4998  		if (kptr_field->type == BPF_KPTR_UNREF)
4999  			perm_flags |= PTR_UNTRUSTED;
5000  	} else {
5001  		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
5002  	}
5003  
5004  	if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
5005  		goto bad_type;
5006  
5007  	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
5008  	reg_name = btf_type_name(reg->btf, reg->btf_id);
5009  
5010  	/* For ref_ptr case, release function check should ensure we get one
5011  	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
5012  	 * normal store of unreferenced kptr, we must ensure var_off is zero.
5013  	 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
5014  	 * reg->off and reg->ref_obj_id are not needed here.
5015  	 */
5016  	if (__check_ptr_off_reg(env, reg, regno, true))
5017  		return -EACCES;
5018  
5019  	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
5020  	 * we also need to take into account the reg->off.
5021  	 *
5022  	 * We want to support cases like:
5023  	 *
5024  	 * struct foo {
5025  	 *         struct bar br;
5026  	 *         struct baz bz;
5027  	 * };
5028  	 *
5029  	 * struct foo *v;
5030  	 * v = func();	      // PTR_TO_BTF_ID
5031  	 * val->foo = v;      // reg->off is zero, btf and btf_id match type
5032  	 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
5033  	 *                    // first member type of struct after comparison fails
5034  	 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
5035  	 *                    // to match type
5036  	 *
5037  	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
5038  	 * is zero. We must also ensure that btf_struct_ids_match does not walk
5039  	 * the struct to match type against first member of struct, i.e. reject
5040  	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
5041  	 * strict mode to true for type match.
5042  	 */
5043  	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5044  				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
5045  				  kptr_field->type == BPF_KPTR_REF))
5046  		goto bad_type;
5047  	return 0;
5048  bad_type:
5049  	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
5050  		reg_type_str(env, reg->type), reg_name);
5051  	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
5052  	if (kptr_field->type == BPF_KPTR_UNREF)
5053  		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
5054  			targ_name);
5055  	else
5056  		verbose(env, "\n");
5057  	return -EINVAL;
5058  }
5059  
5060  /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
5061   * can dereference RCU protected pointers and result is PTR_TRUSTED.
5062   */
in_rcu_cs(struct bpf_verifier_env * env)5063  static bool in_rcu_cs(struct bpf_verifier_env *env)
5064  {
5065  	return env->cur_state->active_rcu_lock ||
5066  	       env->cur_state->active_lock.ptr ||
5067  	       !env->prog->aux->sleepable;
5068  }
5069  
5070  /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
5071  BTF_SET_START(rcu_protected_types)
BTF_ID(struct,prog_test_ref_kfunc)5072  BTF_ID(struct, prog_test_ref_kfunc)
5073  BTF_ID(struct, cgroup)
5074  BTF_ID(struct, bpf_cpumask)
5075  BTF_ID(struct, task_struct)
5076  BTF_SET_END(rcu_protected_types)
5077  
5078  static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
5079  {
5080  	if (!btf_is_kernel(btf))
5081  		return false;
5082  	return btf_id_set_contains(&rcu_protected_types, btf_id);
5083  }
5084  
rcu_safe_kptr(const struct btf_field * field)5085  static bool rcu_safe_kptr(const struct btf_field *field)
5086  {
5087  	const struct btf_field_kptr *kptr = &field->kptr;
5088  
5089  	return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
5090  }
5091  
check_map_kptr_access(struct bpf_verifier_env * env,u32 regno,int value_regno,int insn_idx,struct btf_field * kptr_field)5092  static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
5093  				 int value_regno, int insn_idx,
5094  				 struct btf_field *kptr_field)
5095  {
5096  	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5097  	int class = BPF_CLASS(insn->code);
5098  	struct bpf_reg_state *val_reg;
5099  
5100  	/* Things we already checked for in check_map_access and caller:
5101  	 *  - Reject cases where variable offset may touch kptr
5102  	 *  - size of access (must be BPF_DW)
5103  	 *  - tnum_is_const(reg->var_off)
5104  	 *  - kptr_field->offset == off + reg->var_off.value
5105  	 */
5106  	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
5107  	if (BPF_MODE(insn->code) != BPF_MEM) {
5108  		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
5109  		return -EACCES;
5110  	}
5111  
5112  	/* We only allow loading referenced kptr, since it will be marked as
5113  	 * untrusted, similar to unreferenced kptr.
5114  	 */
5115  	if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
5116  		verbose(env, "store to referenced kptr disallowed\n");
5117  		return -EACCES;
5118  	}
5119  
5120  	if (class == BPF_LDX) {
5121  		val_reg = reg_state(env, value_regno);
5122  		/* We can simply mark the value_regno receiving the pointer
5123  		 * value from map as PTR_TO_BTF_ID, with the correct type.
5124  		 */
5125  		mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
5126  				kptr_field->kptr.btf_id,
5127  				rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
5128  				PTR_MAYBE_NULL | MEM_RCU :
5129  				PTR_MAYBE_NULL | PTR_UNTRUSTED);
5130  		/* For mark_ptr_or_null_reg */
5131  		val_reg->id = ++env->id_gen;
5132  	} else if (class == BPF_STX) {
5133  		val_reg = reg_state(env, value_regno);
5134  		if (!register_is_null(val_reg) &&
5135  		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
5136  			return -EACCES;
5137  	} else if (class == BPF_ST) {
5138  		if (insn->imm) {
5139  			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
5140  				kptr_field->offset);
5141  			return -EACCES;
5142  		}
5143  	} else {
5144  		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
5145  		return -EACCES;
5146  	}
5147  	return 0;
5148  }
5149  
5150  /* check read/write into a map element with possible variable offset */
check_map_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed,enum bpf_access_src src)5151  static int check_map_access(struct bpf_verifier_env *env, u32 regno,
5152  			    int off, int size, bool zero_size_allowed,
5153  			    enum bpf_access_src src)
5154  {
5155  	struct bpf_verifier_state *vstate = env->cur_state;
5156  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5157  	struct bpf_reg_state *reg = &state->regs[regno];
5158  	struct bpf_map *map = reg->map_ptr;
5159  	struct btf_record *rec;
5160  	int err, i;
5161  
5162  	err = check_mem_region_access(env, regno, off, size, map->value_size,
5163  				      zero_size_allowed);
5164  	if (err)
5165  		return err;
5166  
5167  	if (IS_ERR_OR_NULL(map->record))
5168  		return 0;
5169  	rec = map->record;
5170  	for (i = 0; i < rec->cnt; i++) {
5171  		struct btf_field *field = &rec->fields[i];
5172  		u32 p = field->offset;
5173  
5174  		/* If any part of a field  can be touched by load/store, reject
5175  		 * this program. To check that [x1, x2) overlaps with [y1, y2),
5176  		 * it is sufficient to check x1 < y2 && y1 < x2.
5177  		 */
5178  		if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
5179  		    p < reg->umax_value + off + size) {
5180  			switch (field->type) {
5181  			case BPF_KPTR_UNREF:
5182  			case BPF_KPTR_REF:
5183  				if (src != ACCESS_DIRECT) {
5184  					verbose(env, "kptr cannot be accessed indirectly by helper\n");
5185  					return -EACCES;
5186  				}
5187  				if (!tnum_is_const(reg->var_off)) {
5188  					verbose(env, "kptr access cannot have variable offset\n");
5189  					return -EACCES;
5190  				}
5191  				if (p != off + reg->var_off.value) {
5192  					verbose(env, "kptr access misaligned expected=%u off=%llu\n",
5193  						p, off + reg->var_off.value);
5194  					return -EACCES;
5195  				}
5196  				if (size != bpf_size_to_bytes(BPF_DW)) {
5197  					verbose(env, "kptr access size must be BPF_DW\n");
5198  					return -EACCES;
5199  				}
5200  				break;
5201  			default:
5202  				verbose(env, "%s cannot be accessed directly by load/store\n",
5203  					btf_field_type_name(field->type));
5204  				return -EACCES;
5205  			}
5206  		}
5207  	}
5208  	return 0;
5209  }
5210  
5211  #define MAX_PACKET_OFF 0xffff
5212  
may_access_direct_pkt_data(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_access_type t)5213  static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
5214  				       const struct bpf_call_arg_meta *meta,
5215  				       enum bpf_access_type t)
5216  {
5217  	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
5218  
5219  	switch (prog_type) {
5220  	/* Program types only with direct read access go here! */
5221  	case BPF_PROG_TYPE_LWT_IN:
5222  	case BPF_PROG_TYPE_LWT_OUT:
5223  	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
5224  	case BPF_PROG_TYPE_SK_REUSEPORT:
5225  	case BPF_PROG_TYPE_FLOW_DISSECTOR:
5226  	case BPF_PROG_TYPE_CGROUP_SKB:
5227  		if (t == BPF_WRITE)
5228  			return false;
5229  		fallthrough;
5230  
5231  	/* Program types with direct read + write access go here! */
5232  	case BPF_PROG_TYPE_SCHED_CLS:
5233  	case BPF_PROG_TYPE_SCHED_ACT:
5234  	case BPF_PROG_TYPE_XDP:
5235  	case BPF_PROG_TYPE_LWT_XMIT:
5236  	case BPF_PROG_TYPE_SK_SKB:
5237  	case BPF_PROG_TYPE_SK_MSG:
5238  		if (meta)
5239  			return meta->pkt_access;
5240  
5241  		env->seen_direct_write = true;
5242  		return true;
5243  
5244  	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
5245  		if (t == BPF_WRITE)
5246  			env->seen_direct_write = true;
5247  
5248  		return true;
5249  
5250  	default:
5251  		return false;
5252  	}
5253  }
5254  
check_packet_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed)5255  static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
5256  			       int size, bool zero_size_allowed)
5257  {
5258  	struct bpf_reg_state *regs = cur_regs(env);
5259  	struct bpf_reg_state *reg = &regs[regno];
5260  	int err;
5261  
5262  	/* We may have added a variable offset to the packet pointer; but any
5263  	 * reg->range we have comes after that.  We are only checking the fixed
5264  	 * offset.
5265  	 */
5266  
5267  	/* We don't allow negative numbers, because we aren't tracking enough
5268  	 * detail to prove they're safe.
5269  	 */
5270  	if (reg->smin_value < 0) {
5271  		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5272  			regno);
5273  		return -EACCES;
5274  	}
5275  
5276  	err = reg->range < 0 ? -EINVAL :
5277  	      __check_mem_access(env, regno, off, size, reg->range,
5278  				 zero_size_allowed);
5279  	if (err) {
5280  		verbose(env, "R%d offset is outside of the packet\n", regno);
5281  		return err;
5282  	}
5283  
5284  	/* __check_mem_access has made sure "off + size - 1" is within u16.
5285  	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
5286  	 * otherwise find_good_pkt_pointers would have refused to set range info
5287  	 * that __check_mem_access would have rejected this pkt access.
5288  	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
5289  	 */
5290  	env->prog->aux->max_pkt_offset =
5291  		max_t(u32, env->prog->aux->max_pkt_offset,
5292  		      off + reg->umax_value + size - 1);
5293  
5294  	return err;
5295  }
5296  
5297  /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
check_ctx_access(struct bpf_verifier_env * env,int insn_idx,int off,int size,enum bpf_access_type t,enum bpf_reg_type * reg_type,struct btf ** btf,u32 * btf_id)5298  static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
5299  			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
5300  			    struct btf **btf, u32 *btf_id)
5301  {
5302  	struct bpf_insn_access_aux info = {
5303  		.reg_type = *reg_type,
5304  		.log = &env->log,
5305  	};
5306  
5307  	if (env->ops->is_valid_access &&
5308  	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
5309  		/* A non zero info.ctx_field_size indicates that this field is a
5310  		 * candidate for later verifier transformation to load the whole
5311  		 * field and then apply a mask when accessed with a narrower
5312  		 * access than actual ctx access size. A zero info.ctx_field_size
5313  		 * will only allow for whole field access and rejects any other
5314  		 * type of narrower access.
5315  		 */
5316  		*reg_type = info.reg_type;
5317  
5318  		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
5319  			*btf = info.btf;
5320  			*btf_id = info.btf_id;
5321  		} else {
5322  			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
5323  		}
5324  		/* remember the offset of last byte accessed in ctx */
5325  		if (env->prog->aux->max_ctx_offset < off + size)
5326  			env->prog->aux->max_ctx_offset = off + size;
5327  		return 0;
5328  	}
5329  
5330  	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
5331  	return -EACCES;
5332  }
5333  
check_flow_keys_access(struct bpf_verifier_env * env,int off,int size)5334  static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
5335  				  int size)
5336  {
5337  	if (size < 0 || off < 0 ||
5338  	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
5339  		verbose(env, "invalid access to flow keys off=%d size=%d\n",
5340  			off, size);
5341  		return -EACCES;
5342  	}
5343  	return 0;
5344  }
5345  
check_sock_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int size,enum bpf_access_type t)5346  static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
5347  			     u32 regno, int off, int size,
5348  			     enum bpf_access_type t)
5349  {
5350  	struct bpf_reg_state *regs = cur_regs(env);
5351  	struct bpf_reg_state *reg = &regs[regno];
5352  	struct bpf_insn_access_aux info = {};
5353  	bool valid;
5354  
5355  	if (reg->smin_value < 0) {
5356  		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5357  			regno);
5358  		return -EACCES;
5359  	}
5360  
5361  	switch (reg->type) {
5362  	case PTR_TO_SOCK_COMMON:
5363  		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
5364  		break;
5365  	case PTR_TO_SOCKET:
5366  		valid = bpf_sock_is_valid_access(off, size, t, &info);
5367  		break;
5368  	case PTR_TO_TCP_SOCK:
5369  		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
5370  		break;
5371  	case PTR_TO_XDP_SOCK:
5372  		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
5373  		break;
5374  	default:
5375  		valid = false;
5376  	}
5377  
5378  
5379  	if (valid) {
5380  		env->insn_aux_data[insn_idx].ctx_field_size =
5381  			info.ctx_field_size;
5382  		return 0;
5383  	}
5384  
5385  	verbose(env, "R%d invalid %s access off=%d size=%d\n",
5386  		regno, reg_type_str(env, reg->type), off, size);
5387  
5388  	return -EACCES;
5389  }
5390  
is_pointer_value(struct bpf_verifier_env * env,int regno)5391  static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
5392  {
5393  	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
5394  }
5395  
is_ctx_reg(struct bpf_verifier_env * env,int regno)5396  static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
5397  {
5398  	const struct bpf_reg_state *reg = reg_state(env, regno);
5399  
5400  	return reg->type == PTR_TO_CTX;
5401  }
5402  
is_sk_reg(struct bpf_verifier_env * env,int regno)5403  static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
5404  {
5405  	const struct bpf_reg_state *reg = reg_state(env, regno);
5406  
5407  	return type_is_sk_pointer(reg->type);
5408  }
5409  
is_pkt_reg(struct bpf_verifier_env * env,int regno)5410  static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
5411  {
5412  	const struct bpf_reg_state *reg = reg_state(env, regno);
5413  
5414  	return type_is_pkt_pointer(reg->type);
5415  }
5416  
is_flow_key_reg(struct bpf_verifier_env * env,int regno)5417  static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
5418  {
5419  	const struct bpf_reg_state *reg = reg_state(env, regno);
5420  
5421  	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
5422  	return reg->type == PTR_TO_FLOW_KEYS;
5423  }
5424  
5425  static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
5426  #ifdef CONFIG_NET
5427  	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
5428  	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5429  	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
5430  #endif
5431  	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
5432  };
5433  
is_trusted_reg(const struct bpf_reg_state * reg)5434  static bool is_trusted_reg(const struct bpf_reg_state *reg)
5435  {
5436  	/* A referenced register is always trusted. */
5437  	if (reg->ref_obj_id)
5438  		return true;
5439  
5440  	/* Types listed in the reg2btf_ids are always trusted */
5441  	if (reg2btf_ids[base_type(reg->type)])
5442  		return true;
5443  
5444  	/* If a register is not referenced, it is trusted if it has the
5445  	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
5446  	 * other type modifiers may be safe, but we elect to take an opt-in
5447  	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
5448  	 * not.
5449  	 *
5450  	 * Eventually, we should make PTR_TRUSTED the single source of truth
5451  	 * for whether a register is trusted.
5452  	 */
5453  	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
5454  	       !bpf_type_has_unsafe_modifiers(reg->type);
5455  }
5456  
is_rcu_reg(const struct bpf_reg_state * reg)5457  static bool is_rcu_reg(const struct bpf_reg_state *reg)
5458  {
5459  	return reg->type & MEM_RCU;
5460  }
5461  
clear_trusted_flags(enum bpf_type_flag * flag)5462  static void clear_trusted_flags(enum bpf_type_flag *flag)
5463  {
5464  	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
5465  }
5466  
check_pkt_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict)5467  static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
5468  				   const struct bpf_reg_state *reg,
5469  				   int off, int size, bool strict)
5470  {
5471  	struct tnum reg_off;
5472  	int ip_align;
5473  
5474  	/* Byte size accesses are always allowed. */
5475  	if (!strict || size == 1)
5476  		return 0;
5477  
5478  	/* For platforms that do not have a Kconfig enabling
5479  	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
5480  	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
5481  	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
5482  	 * to this code only in strict mode where we want to emulate
5483  	 * the NET_IP_ALIGN==2 checking.  Therefore use an
5484  	 * unconditional IP align value of '2'.
5485  	 */
5486  	ip_align = 2;
5487  
5488  	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
5489  	if (!tnum_is_aligned(reg_off, size)) {
5490  		char tn_buf[48];
5491  
5492  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5493  		verbose(env,
5494  			"misaligned packet access off %d+%s+%d+%d size %d\n",
5495  			ip_align, tn_buf, reg->off, off, size);
5496  		return -EACCES;
5497  	}
5498  
5499  	return 0;
5500  }
5501  
check_generic_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,const char * pointer_desc,int off,int size,bool strict)5502  static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
5503  				       const struct bpf_reg_state *reg,
5504  				       const char *pointer_desc,
5505  				       int off, int size, bool strict)
5506  {
5507  	struct tnum reg_off;
5508  
5509  	/* Byte size accesses are always allowed. */
5510  	if (!strict || size == 1)
5511  		return 0;
5512  
5513  	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
5514  	if (!tnum_is_aligned(reg_off, size)) {
5515  		char tn_buf[48];
5516  
5517  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5518  		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
5519  			pointer_desc, tn_buf, reg->off, off, size);
5520  		return -EACCES;
5521  	}
5522  
5523  	return 0;
5524  }
5525  
check_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict_alignment_once)5526  static int check_ptr_alignment(struct bpf_verifier_env *env,
5527  			       const struct bpf_reg_state *reg, int off,
5528  			       int size, bool strict_alignment_once)
5529  {
5530  	bool strict = env->strict_alignment || strict_alignment_once;
5531  	const char *pointer_desc = "";
5532  
5533  	switch (reg->type) {
5534  	case PTR_TO_PACKET:
5535  	case PTR_TO_PACKET_META:
5536  		/* Special case, because of NET_IP_ALIGN. Given metadata sits
5537  		 * right in front, treat it the very same way.
5538  		 */
5539  		return check_pkt_ptr_alignment(env, reg, off, size, strict);
5540  	case PTR_TO_FLOW_KEYS:
5541  		pointer_desc = "flow keys ";
5542  		break;
5543  	case PTR_TO_MAP_KEY:
5544  		pointer_desc = "key ";
5545  		break;
5546  	case PTR_TO_MAP_VALUE:
5547  		pointer_desc = "value ";
5548  		break;
5549  	case PTR_TO_CTX:
5550  		pointer_desc = "context ";
5551  		break;
5552  	case PTR_TO_STACK:
5553  		pointer_desc = "stack ";
5554  		/* The stack spill tracking logic in check_stack_write_fixed_off()
5555  		 * and check_stack_read_fixed_off() relies on stack accesses being
5556  		 * aligned.
5557  		 */
5558  		strict = true;
5559  		break;
5560  	case PTR_TO_SOCKET:
5561  		pointer_desc = "sock ";
5562  		break;
5563  	case PTR_TO_SOCK_COMMON:
5564  		pointer_desc = "sock_common ";
5565  		break;
5566  	case PTR_TO_TCP_SOCK:
5567  		pointer_desc = "tcp_sock ";
5568  		break;
5569  	case PTR_TO_XDP_SOCK:
5570  		pointer_desc = "xdp_sock ";
5571  		break;
5572  	default:
5573  		break;
5574  	}
5575  	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5576  					   strict);
5577  }
5578  
update_stack_depth(struct bpf_verifier_env * env,const struct bpf_func_state * func,int off)5579  static int update_stack_depth(struct bpf_verifier_env *env,
5580  			      const struct bpf_func_state *func,
5581  			      int off)
5582  {
5583  	u16 stack = env->subprog_info[func->subprogno].stack_depth;
5584  
5585  	if (stack >= -off)
5586  		return 0;
5587  
5588  	/* update known max for given subprogram */
5589  	env->subprog_info[func->subprogno].stack_depth = -off;
5590  	return 0;
5591  }
5592  
5593  /* starting from main bpf function walk all instructions of the function
5594   * and recursively walk all callees that given function can call.
5595   * Ignore jump and exit insns.
5596   * Since recursion is prevented by check_cfg() this algorithm
5597   * only needs a local stack of MAX_CALL_FRAMES to remember callsites
5598   */
check_max_stack_depth_subprog(struct bpf_verifier_env * env,int idx)5599  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
5600  {
5601  	struct bpf_subprog_info *subprog = env->subprog_info;
5602  	struct bpf_insn *insn = env->prog->insnsi;
5603  	int depth = 0, frame = 0, i, subprog_end;
5604  	bool tail_call_reachable = false;
5605  	int ret_insn[MAX_CALL_FRAMES];
5606  	int ret_prog[MAX_CALL_FRAMES];
5607  	int j;
5608  
5609  	i = subprog[idx].start;
5610  process_func:
5611  	/* protect against potential stack overflow that might happen when
5612  	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5613  	 * depth for such case down to 256 so that the worst case scenario
5614  	 * would result in 8k stack size (32 which is tailcall limit * 256 =
5615  	 * 8k).
5616  	 *
5617  	 * To get the idea what might happen, see an example:
5618  	 * func1 -> sub rsp, 128
5619  	 *  subfunc1 -> sub rsp, 256
5620  	 *  tailcall1 -> add rsp, 256
5621  	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5622  	 *   subfunc2 -> sub rsp, 64
5623  	 *   subfunc22 -> sub rsp, 128
5624  	 *   tailcall2 -> add rsp, 128
5625  	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5626  	 *
5627  	 * tailcall will unwind the current stack frame but it will not get rid
5628  	 * of caller's stack as shown on the example above.
5629  	 */
5630  	if (idx && subprog[idx].has_tail_call && depth >= 256) {
5631  		verbose(env,
5632  			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5633  			depth);
5634  		return -EACCES;
5635  	}
5636  	/* round up to 32-bytes, since this is granularity
5637  	 * of interpreter stack size
5638  	 */
5639  	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
5640  	if (depth > MAX_BPF_STACK) {
5641  		verbose(env, "combined stack size of %d calls is %d. Too large\n",
5642  			frame + 1, depth);
5643  		return -EACCES;
5644  	}
5645  continue_func:
5646  	subprog_end = subprog[idx + 1].start;
5647  	for (; i < subprog_end; i++) {
5648  		int next_insn, sidx;
5649  
5650  		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5651  			continue;
5652  		/* remember insn and function to return to */
5653  		ret_insn[frame] = i + 1;
5654  		ret_prog[frame] = idx;
5655  
5656  		/* find the callee */
5657  		next_insn = i + insn[i].imm + 1;
5658  		sidx = find_subprog(env, next_insn);
5659  		if (sidx < 0) {
5660  			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5661  				  next_insn);
5662  			return -EFAULT;
5663  		}
5664  		if (subprog[sidx].is_async_cb) {
5665  			if (subprog[sidx].has_tail_call) {
5666  				verbose(env, "verifier bug. subprog has tail_call and async cb\n");
5667  				return -EFAULT;
5668  			}
5669  			/* async callbacks don't increase bpf prog stack size unless called directly */
5670  			if (!bpf_pseudo_call(insn + i))
5671  				continue;
5672  		}
5673  		i = next_insn;
5674  		idx = sidx;
5675  
5676  		if (subprog[idx].has_tail_call)
5677  			tail_call_reachable = true;
5678  
5679  		frame++;
5680  		if (frame >= MAX_CALL_FRAMES) {
5681  			verbose(env, "the call stack of %d frames is too deep !\n",
5682  				frame);
5683  			return -E2BIG;
5684  		}
5685  		goto process_func;
5686  	}
5687  	/* if tail call got detected across bpf2bpf calls then mark each of the
5688  	 * currently present subprog frames as tail call reachable subprogs;
5689  	 * this info will be utilized by JIT so that we will be preserving the
5690  	 * tail call counter throughout bpf2bpf calls combined with tailcalls
5691  	 */
5692  	if (tail_call_reachable)
5693  		for (j = 0; j < frame; j++)
5694  			subprog[ret_prog[j]].tail_call_reachable = true;
5695  	if (subprog[0].tail_call_reachable)
5696  		env->prog->aux->tail_call_reachable = true;
5697  
5698  	/* end of for() loop means the last insn of the 'subprog'
5699  	 * was reached. Doesn't matter whether it was JA or EXIT
5700  	 */
5701  	if (frame == 0)
5702  		return 0;
5703  	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
5704  	frame--;
5705  	i = ret_insn[frame];
5706  	idx = ret_prog[frame];
5707  	goto continue_func;
5708  }
5709  
check_max_stack_depth(struct bpf_verifier_env * env)5710  static int check_max_stack_depth(struct bpf_verifier_env *env)
5711  {
5712  	struct bpf_subprog_info *si = env->subprog_info;
5713  	int ret;
5714  
5715  	for (int i = 0; i < env->subprog_cnt; i++) {
5716  		if (!i || si[i].is_async_cb) {
5717  			ret = check_max_stack_depth_subprog(env, i);
5718  			if (ret < 0)
5719  				return ret;
5720  		}
5721  		continue;
5722  	}
5723  	return 0;
5724  }
5725  
5726  #ifndef CONFIG_BPF_JIT_ALWAYS_ON
get_callee_stack_depth(struct bpf_verifier_env * env,const struct bpf_insn * insn,int idx)5727  static int get_callee_stack_depth(struct bpf_verifier_env *env,
5728  				  const struct bpf_insn *insn, int idx)
5729  {
5730  	int start = idx + insn->imm + 1, subprog;
5731  
5732  	subprog = find_subprog(env, start);
5733  	if (subprog < 0) {
5734  		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5735  			  start);
5736  		return -EFAULT;
5737  	}
5738  	return env->subprog_info[subprog].stack_depth;
5739  }
5740  #endif
5741  
__check_buffer_access(struct bpf_verifier_env * env,const char * buf_info,const struct bpf_reg_state * reg,int regno,int off,int size)5742  static int __check_buffer_access(struct bpf_verifier_env *env,
5743  				 const char *buf_info,
5744  				 const struct bpf_reg_state *reg,
5745  				 int regno, int off, int size)
5746  {
5747  	if (off < 0) {
5748  		verbose(env,
5749  			"R%d invalid %s buffer access: off=%d, size=%d\n",
5750  			regno, buf_info, off, size);
5751  		return -EACCES;
5752  	}
5753  	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5754  		char tn_buf[48];
5755  
5756  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5757  		verbose(env,
5758  			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
5759  			regno, off, tn_buf);
5760  		return -EACCES;
5761  	}
5762  
5763  	return 0;
5764  }
5765  
check_tp_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size)5766  static int check_tp_buffer_access(struct bpf_verifier_env *env,
5767  				  const struct bpf_reg_state *reg,
5768  				  int regno, int off, int size)
5769  {
5770  	int err;
5771  
5772  	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
5773  	if (err)
5774  		return err;
5775  
5776  	if (off + size > env->prog->aux->max_tp_access)
5777  		env->prog->aux->max_tp_access = off + size;
5778  
5779  	return 0;
5780  }
5781  
check_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size,bool zero_size_allowed,u32 * max_access)5782  static int check_buffer_access(struct bpf_verifier_env *env,
5783  			       const struct bpf_reg_state *reg,
5784  			       int regno, int off, int size,
5785  			       bool zero_size_allowed,
5786  			       u32 *max_access)
5787  {
5788  	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5789  	int err;
5790  
5791  	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
5792  	if (err)
5793  		return err;
5794  
5795  	if (off + size > *max_access)
5796  		*max_access = off + size;
5797  
5798  	return 0;
5799  }
5800  
5801  /* BPF architecture zero extends alu32 ops into 64-bit registesr */
zext_32_to_64(struct bpf_reg_state * reg)5802  static void zext_32_to_64(struct bpf_reg_state *reg)
5803  {
5804  	reg->var_off = tnum_subreg(reg->var_off);
5805  	__reg_assign_32_into_64(reg);
5806  }
5807  
5808  /* truncate register to smaller size (in bytes)
5809   * must be called with size < BPF_REG_SIZE
5810   */
coerce_reg_to_size(struct bpf_reg_state * reg,int size)5811  static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5812  {
5813  	u64 mask;
5814  
5815  	/* clear high bits in bit representation */
5816  	reg->var_off = tnum_cast(reg->var_off, size);
5817  
5818  	/* fix arithmetic bounds */
5819  	mask = ((u64)1 << (size * 8)) - 1;
5820  	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
5821  		reg->umin_value &= mask;
5822  		reg->umax_value &= mask;
5823  	} else {
5824  		reg->umin_value = 0;
5825  		reg->umax_value = mask;
5826  	}
5827  	reg->smin_value = reg->umin_value;
5828  	reg->smax_value = reg->umax_value;
5829  
5830  	/* If size is smaller than 32bit register the 32bit register
5831  	 * values are also truncated so we push 64-bit bounds into
5832  	 * 32-bit bounds. Above were truncated < 32-bits already.
5833  	 */
5834  	if (size >= 4)
5835  		return;
5836  	__reg_combine_64_into_32(reg);
5837  }
5838  
set_sext64_default_val(struct bpf_reg_state * reg,int size)5839  static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
5840  {
5841  	if (size == 1) {
5842  		reg->smin_value = reg->s32_min_value = S8_MIN;
5843  		reg->smax_value = reg->s32_max_value = S8_MAX;
5844  	} else if (size == 2) {
5845  		reg->smin_value = reg->s32_min_value = S16_MIN;
5846  		reg->smax_value = reg->s32_max_value = S16_MAX;
5847  	} else {
5848  		/* size == 4 */
5849  		reg->smin_value = reg->s32_min_value = S32_MIN;
5850  		reg->smax_value = reg->s32_max_value = S32_MAX;
5851  	}
5852  	reg->umin_value = reg->u32_min_value = 0;
5853  	reg->umax_value = U64_MAX;
5854  	reg->u32_max_value = U32_MAX;
5855  	reg->var_off = tnum_unknown;
5856  }
5857  
coerce_reg_to_size_sx(struct bpf_reg_state * reg,int size)5858  static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
5859  {
5860  	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
5861  	u64 top_smax_value, top_smin_value;
5862  	u64 num_bits = size * 8;
5863  
5864  	if (tnum_is_const(reg->var_off)) {
5865  		u64_cval = reg->var_off.value;
5866  		if (size == 1)
5867  			reg->var_off = tnum_const((s8)u64_cval);
5868  		else if (size == 2)
5869  			reg->var_off = tnum_const((s16)u64_cval);
5870  		else
5871  			/* size == 4 */
5872  			reg->var_off = tnum_const((s32)u64_cval);
5873  
5874  		u64_cval = reg->var_off.value;
5875  		reg->smax_value = reg->smin_value = u64_cval;
5876  		reg->umax_value = reg->umin_value = u64_cval;
5877  		reg->s32_max_value = reg->s32_min_value = u64_cval;
5878  		reg->u32_max_value = reg->u32_min_value = u64_cval;
5879  		return;
5880  	}
5881  
5882  	top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
5883  	top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
5884  
5885  	if (top_smax_value != top_smin_value)
5886  		goto out;
5887  
5888  	/* find the s64_min and s64_min after sign extension */
5889  	if (size == 1) {
5890  		init_s64_max = (s8)reg->smax_value;
5891  		init_s64_min = (s8)reg->smin_value;
5892  	} else if (size == 2) {
5893  		init_s64_max = (s16)reg->smax_value;
5894  		init_s64_min = (s16)reg->smin_value;
5895  	} else {
5896  		init_s64_max = (s32)reg->smax_value;
5897  		init_s64_min = (s32)reg->smin_value;
5898  	}
5899  
5900  	s64_max = max(init_s64_max, init_s64_min);
5901  	s64_min = min(init_s64_max, init_s64_min);
5902  
5903  	/* both of s64_max/s64_min positive or negative */
5904  	if ((s64_max >= 0) == (s64_min >= 0)) {
5905  		reg->smin_value = reg->s32_min_value = s64_min;
5906  		reg->smax_value = reg->s32_max_value = s64_max;
5907  		reg->umin_value = reg->u32_min_value = s64_min;
5908  		reg->umax_value = reg->u32_max_value = s64_max;
5909  		reg->var_off = tnum_range(s64_min, s64_max);
5910  		return;
5911  	}
5912  
5913  out:
5914  	set_sext64_default_val(reg, size);
5915  }
5916  
set_sext32_default_val(struct bpf_reg_state * reg,int size)5917  static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
5918  {
5919  	if (size == 1) {
5920  		reg->s32_min_value = S8_MIN;
5921  		reg->s32_max_value = S8_MAX;
5922  	} else {
5923  		/* size == 2 */
5924  		reg->s32_min_value = S16_MIN;
5925  		reg->s32_max_value = S16_MAX;
5926  	}
5927  	reg->u32_min_value = 0;
5928  	reg->u32_max_value = U32_MAX;
5929  }
5930  
coerce_subreg_to_size_sx(struct bpf_reg_state * reg,int size)5931  static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
5932  {
5933  	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
5934  	u32 top_smax_value, top_smin_value;
5935  	u32 num_bits = size * 8;
5936  
5937  	if (tnum_is_const(reg->var_off)) {
5938  		u32_val = reg->var_off.value;
5939  		if (size == 1)
5940  			reg->var_off = tnum_const((s8)u32_val);
5941  		else
5942  			reg->var_off = tnum_const((s16)u32_val);
5943  
5944  		u32_val = reg->var_off.value;
5945  		reg->s32_min_value = reg->s32_max_value = u32_val;
5946  		reg->u32_min_value = reg->u32_max_value = u32_val;
5947  		return;
5948  	}
5949  
5950  	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
5951  	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
5952  
5953  	if (top_smax_value != top_smin_value)
5954  		goto out;
5955  
5956  	/* find the s32_min and s32_min after sign extension */
5957  	if (size == 1) {
5958  		init_s32_max = (s8)reg->s32_max_value;
5959  		init_s32_min = (s8)reg->s32_min_value;
5960  	} else {
5961  		/* size == 2 */
5962  		init_s32_max = (s16)reg->s32_max_value;
5963  		init_s32_min = (s16)reg->s32_min_value;
5964  	}
5965  	s32_max = max(init_s32_max, init_s32_min);
5966  	s32_min = min(init_s32_max, init_s32_min);
5967  
5968  	if ((s32_min >= 0) == (s32_max >= 0)) {
5969  		reg->s32_min_value = s32_min;
5970  		reg->s32_max_value = s32_max;
5971  		reg->u32_min_value = (u32)s32_min;
5972  		reg->u32_max_value = (u32)s32_max;
5973  		return;
5974  	}
5975  
5976  out:
5977  	set_sext32_default_val(reg, size);
5978  }
5979  
bpf_map_is_rdonly(const struct bpf_map * map)5980  static bool bpf_map_is_rdonly(const struct bpf_map *map)
5981  {
5982  	/* A map is considered read-only if the following condition are true:
5983  	 *
5984  	 * 1) BPF program side cannot change any of the map content. The
5985  	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5986  	 *    and was set at map creation time.
5987  	 * 2) The map value(s) have been initialized from user space by a
5988  	 *    loader and then "frozen", such that no new map update/delete
5989  	 *    operations from syscall side are possible for the rest of
5990  	 *    the map's lifetime from that point onwards.
5991  	 * 3) Any parallel/pending map update/delete operations from syscall
5992  	 *    side have been completed. Only after that point, it's safe to
5993  	 *    assume that map value(s) are immutable.
5994  	 */
5995  	return (map->map_flags & BPF_F_RDONLY_PROG) &&
5996  	       READ_ONCE(map->frozen) &&
5997  	       !bpf_map_write_active(map);
5998  }
5999  
bpf_map_direct_read(struct bpf_map * map,int off,int size,u64 * val,bool is_ldsx)6000  static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
6001  			       bool is_ldsx)
6002  {
6003  	void *ptr;
6004  	u64 addr;
6005  	int err;
6006  
6007  	err = map->ops->map_direct_value_addr(map, &addr, off);
6008  	if (err)
6009  		return err;
6010  	ptr = (void *)(long)addr + off;
6011  
6012  	switch (size) {
6013  	case sizeof(u8):
6014  		*val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
6015  		break;
6016  	case sizeof(u16):
6017  		*val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
6018  		break;
6019  	case sizeof(u32):
6020  		*val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
6021  		break;
6022  	case sizeof(u64):
6023  		*val = *(u64 *)ptr;
6024  		break;
6025  	default:
6026  		return -EINVAL;
6027  	}
6028  	return 0;
6029  }
6030  
6031  #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
6032  #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
6033  #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
6034  
6035  /*
6036   * Allow list few fields as RCU trusted or full trusted.
6037   * This logic doesn't allow mix tagging and will be removed once GCC supports
6038   * btf_type_tag.
6039   */
6040  
6041  /* RCU trusted: these fields are trusted in RCU CS and never NULL */
BTF_TYPE_SAFE_RCU(struct task_struct)6042  BTF_TYPE_SAFE_RCU(struct task_struct) {
6043  	const cpumask_t *cpus_ptr;
6044  	struct css_set __rcu *cgroups;
6045  	struct task_struct __rcu *real_parent;
6046  	struct task_struct *group_leader;
6047  };
6048  
BTF_TYPE_SAFE_RCU(struct cgroup)6049  BTF_TYPE_SAFE_RCU(struct cgroup) {
6050  	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
6051  	struct kernfs_node *kn;
6052  };
6053  
BTF_TYPE_SAFE_RCU(struct css_set)6054  BTF_TYPE_SAFE_RCU(struct css_set) {
6055  	struct cgroup *dfl_cgrp;
6056  };
6057  
6058  /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct)6059  BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
6060  	struct file __rcu *exe_file;
6061  };
6062  
6063  /* skb->sk, req->sk are not RCU protected, but we mark them as such
6064   * because bpf prog accessible sockets are SOCK_RCU_FREE.
6065   */
BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff)6066  BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
6067  	struct sock *sk;
6068  };
6069  
BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock)6070  BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
6071  	struct sock *sk;
6072  };
6073  
6074  /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)6075  BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
6076  	struct seq_file *seq;
6077  };
6078  
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)6079  BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
6080  	struct bpf_iter_meta *meta;
6081  	struct task_struct *task;
6082  };
6083  
BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)6084  BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
6085  	struct file *file;
6086  };
6087  
BTF_TYPE_SAFE_TRUSTED(struct file)6088  BTF_TYPE_SAFE_TRUSTED(struct file) {
6089  	struct inode *f_inode;
6090  };
6091  
BTF_TYPE_SAFE_TRUSTED(struct dentry)6092  BTF_TYPE_SAFE_TRUSTED(struct dentry) {
6093  	/* no negative dentry-s in places where bpf can see it */
6094  	struct inode *d_inode;
6095  };
6096  
BTF_TYPE_SAFE_TRUSTED(struct socket)6097  BTF_TYPE_SAFE_TRUSTED(struct socket) {
6098  	struct sock *sk;
6099  };
6100  
type_is_rcu(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)6101  static bool type_is_rcu(struct bpf_verifier_env *env,
6102  			struct bpf_reg_state *reg,
6103  			const char *field_name, u32 btf_id)
6104  {
6105  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
6106  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
6107  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
6108  
6109  	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
6110  }
6111  
type_is_rcu_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)6112  static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
6113  				struct bpf_reg_state *reg,
6114  				const char *field_name, u32 btf_id)
6115  {
6116  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
6117  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
6118  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
6119  
6120  	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
6121  }
6122  
type_is_trusted(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)6123  static bool type_is_trusted(struct bpf_verifier_env *env,
6124  			    struct bpf_reg_state *reg,
6125  			    const char *field_name, u32 btf_id)
6126  {
6127  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
6128  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
6129  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
6130  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
6131  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
6132  	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
6133  
6134  	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
6135  }
6136  
check_ptr_to_btf_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)6137  static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
6138  				   struct bpf_reg_state *regs,
6139  				   int regno, int off, int size,
6140  				   enum bpf_access_type atype,
6141  				   int value_regno)
6142  {
6143  	struct bpf_reg_state *reg = regs + regno;
6144  	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
6145  	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
6146  	const char *field_name = NULL;
6147  	enum bpf_type_flag flag = 0;
6148  	u32 btf_id = 0;
6149  	int ret;
6150  
6151  	if (!env->allow_ptr_leaks) {
6152  		verbose(env,
6153  			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6154  			tname);
6155  		return -EPERM;
6156  	}
6157  	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
6158  		verbose(env,
6159  			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
6160  			tname);
6161  		return -EINVAL;
6162  	}
6163  	if (off < 0) {
6164  		verbose(env,
6165  			"R%d is ptr_%s invalid negative access: off=%d\n",
6166  			regno, tname, off);
6167  		return -EACCES;
6168  	}
6169  	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
6170  		char tn_buf[48];
6171  
6172  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6173  		verbose(env,
6174  			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6175  			regno, tname, off, tn_buf);
6176  		return -EACCES;
6177  	}
6178  
6179  	if (reg->type & MEM_USER) {
6180  		verbose(env,
6181  			"R%d is ptr_%s access user memory: off=%d\n",
6182  			regno, tname, off);
6183  		return -EACCES;
6184  	}
6185  
6186  	if (reg->type & MEM_PERCPU) {
6187  		verbose(env,
6188  			"R%d is ptr_%s access percpu memory: off=%d\n",
6189  			regno, tname, off);
6190  		return -EACCES;
6191  	}
6192  
6193  	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
6194  		if (!btf_is_kernel(reg->btf)) {
6195  			verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
6196  			return -EFAULT;
6197  		}
6198  		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
6199  	} else {
6200  		/* Writes are permitted with default btf_struct_access for
6201  		 * program allocated objects (which always have ref_obj_id > 0),
6202  		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
6203  		 */
6204  		if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
6205  			verbose(env, "only read is supported\n");
6206  			return -EACCES;
6207  		}
6208  
6209  		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
6210  		    !reg->ref_obj_id) {
6211  			verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
6212  			return -EFAULT;
6213  		}
6214  
6215  		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
6216  	}
6217  
6218  	if (ret < 0)
6219  		return ret;
6220  
6221  	if (ret != PTR_TO_BTF_ID) {
6222  		/* just mark; */
6223  
6224  	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
6225  		/* If this is an untrusted pointer, all pointers formed by walking it
6226  		 * also inherit the untrusted flag.
6227  		 */
6228  		flag = PTR_UNTRUSTED;
6229  
6230  	} else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
6231  		/* By default any pointer obtained from walking a trusted pointer is no
6232  		 * longer trusted, unless the field being accessed has explicitly been
6233  		 * marked as inheriting its parent's state of trust (either full or RCU).
6234  		 * For example:
6235  		 * 'cgroups' pointer is untrusted if task->cgroups dereference
6236  		 * happened in a sleepable program outside of bpf_rcu_read_lock()
6237  		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
6238  		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
6239  		 *
6240  		 * A regular RCU-protected pointer with __rcu tag can also be deemed
6241  		 * trusted if we are in an RCU CS. Such pointer can be NULL.
6242  		 */
6243  		if (type_is_trusted(env, reg, field_name, btf_id)) {
6244  			flag |= PTR_TRUSTED;
6245  		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
6246  			if (type_is_rcu(env, reg, field_name, btf_id)) {
6247  				/* ignore __rcu tag and mark it MEM_RCU */
6248  				flag |= MEM_RCU;
6249  			} else if (flag & MEM_RCU ||
6250  				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
6251  				/* __rcu tagged pointers can be NULL */
6252  				flag |= MEM_RCU | PTR_MAYBE_NULL;
6253  
6254  				/* We always trust them */
6255  				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
6256  				    flag & PTR_UNTRUSTED)
6257  					flag &= ~PTR_UNTRUSTED;
6258  			} else if (flag & (MEM_PERCPU | MEM_USER)) {
6259  				/* keep as-is */
6260  			} else {
6261  				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
6262  				clear_trusted_flags(&flag);
6263  			}
6264  		} else {
6265  			/*
6266  			 * If not in RCU CS or MEM_RCU pointer can be NULL then
6267  			 * aggressively mark as untrusted otherwise such
6268  			 * pointers will be plain PTR_TO_BTF_ID without flags
6269  			 * and will be allowed to be passed into helpers for
6270  			 * compat reasons.
6271  			 */
6272  			flag = PTR_UNTRUSTED;
6273  		}
6274  	} else {
6275  		/* Old compat. Deprecated */
6276  		clear_trusted_flags(&flag);
6277  	}
6278  
6279  	if (atype == BPF_READ && value_regno >= 0)
6280  		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
6281  
6282  	return 0;
6283  }
6284  
check_ptr_to_map_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)6285  static int check_ptr_to_map_access(struct bpf_verifier_env *env,
6286  				   struct bpf_reg_state *regs,
6287  				   int regno, int off, int size,
6288  				   enum bpf_access_type atype,
6289  				   int value_regno)
6290  {
6291  	struct bpf_reg_state *reg = regs + regno;
6292  	struct bpf_map *map = reg->map_ptr;
6293  	struct bpf_reg_state map_reg;
6294  	enum bpf_type_flag flag = 0;
6295  	const struct btf_type *t;
6296  	const char *tname;
6297  	u32 btf_id;
6298  	int ret;
6299  
6300  	if (!btf_vmlinux) {
6301  		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
6302  		return -ENOTSUPP;
6303  	}
6304  
6305  	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
6306  		verbose(env, "map_ptr access not supported for map type %d\n",
6307  			map->map_type);
6308  		return -ENOTSUPP;
6309  	}
6310  
6311  	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
6312  	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
6313  
6314  	if (!env->allow_ptr_leaks) {
6315  		verbose(env,
6316  			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6317  			tname);
6318  		return -EPERM;
6319  	}
6320  
6321  	if (off < 0) {
6322  		verbose(env, "R%d is %s invalid negative access: off=%d\n",
6323  			regno, tname, off);
6324  		return -EACCES;
6325  	}
6326  
6327  	if (atype != BPF_READ) {
6328  		verbose(env, "only read from %s is supported\n", tname);
6329  		return -EACCES;
6330  	}
6331  
6332  	/* Simulate access to a PTR_TO_BTF_ID */
6333  	memset(&map_reg, 0, sizeof(map_reg));
6334  	mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
6335  	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
6336  	if (ret < 0)
6337  		return ret;
6338  
6339  	if (value_regno >= 0)
6340  		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
6341  
6342  	return 0;
6343  }
6344  
6345  /* Check that the stack access at the given offset is within bounds. The
6346   * maximum valid offset is -1.
6347   *
6348   * The minimum valid offset is -MAX_BPF_STACK for writes, and
6349   * -state->allocated_stack for reads.
6350   */
check_stack_slot_within_bounds(int off,struct bpf_func_state * state,enum bpf_access_type t)6351  static int check_stack_slot_within_bounds(int off,
6352  					  struct bpf_func_state *state,
6353  					  enum bpf_access_type t)
6354  {
6355  	int min_valid_off;
6356  
6357  	if (t == BPF_WRITE)
6358  		min_valid_off = -MAX_BPF_STACK;
6359  	else
6360  		min_valid_off = -state->allocated_stack;
6361  
6362  	if (off < min_valid_off || off > -1)
6363  		return -EACCES;
6364  	return 0;
6365  }
6366  
6367  /* Check that the stack access at 'regno + off' falls within the maximum stack
6368   * bounds.
6369   *
6370   * 'off' includes `regno->offset`, but not its dynamic part (if any).
6371   */
check_stack_access_within_bounds(struct bpf_verifier_env * env,int regno,int off,int access_size,enum bpf_access_src src,enum bpf_access_type type)6372  static int check_stack_access_within_bounds(
6373  		struct bpf_verifier_env *env,
6374  		int regno, int off, int access_size,
6375  		enum bpf_access_src src, enum bpf_access_type type)
6376  {
6377  	struct bpf_reg_state *regs = cur_regs(env);
6378  	struct bpf_reg_state *reg = regs + regno;
6379  	struct bpf_func_state *state = func(env, reg);
6380  	int min_off, max_off;
6381  	int err;
6382  	char *err_extra;
6383  
6384  	if (src == ACCESS_HELPER)
6385  		/* We don't know if helpers are reading or writing (or both). */
6386  		err_extra = " indirect access to";
6387  	else if (type == BPF_READ)
6388  		err_extra = " read from";
6389  	else
6390  		err_extra = " write to";
6391  
6392  	if (tnum_is_const(reg->var_off)) {
6393  		min_off = reg->var_off.value + off;
6394  		if (access_size > 0)
6395  			max_off = min_off + access_size - 1;
6396  		else
6397  			max_off = min_off;
6398  	} else {
6399  		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
6400  		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
6401  			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
6402  				err_extra, regno);
6403  			return -EACCES;
6404  		}
6405  		min_off = reg->smin_value + off;
6406  		if (access_size > 0)
6407  			max_off = reg->smax_value + off + access_size - 1;
6408  		else
6409  			max_off = min_off;
6410  	}
6411  
6412  	err = check_stack_slot_within_bounds(min_off, state, type);
6413  	if (!err)
6414  		err = check_stack_slot_within_bounds(max_off, state, type);
6415  
6416  	if (err) {
6417  		if (tnum_is_const(reg->var_off)) {
6418  			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
6419  				err_extra, regno, off, access_size);
6420  		} else {
6421  			char tn_buf[48];
6422  
6423  			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6424  			verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
6425  				err_extra, regno, tn_buf, access_size);
6426  		}
6427  	}
6428  	return err;
6429  }
6430  
6431  /* check whether memory at (regno + off) is accessible for t = (read | write)
6432   * if t==write, value_regno is a register which value is stored into memory
6433   * if t==read, value_regno is a register which will receive the value from memory
6434   * if t==write && value_regno==-1, some unknown value is stored into memory
6435   * if t==read && value_regno==-1, don't care what we read from memory
6436   */
check_mem_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int bpf_size,enum bpf_access_type t,int value_regno,bool strict_alignment_once,bool is_ldsx)6437  static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
6438  			    int off, int bpf_size, enum bpf_access_type t,
6439  			    int value_regno, bool strict_alignment_once, bool is_ldsx)
6440  {
6441  	struct bpf_reg_state *regs = cur_regs(env);
6442  	struct bpf_reg_state *reg = regs + regno;
6443  	struct bpf_func_state *state;
6444  	int size, err = 0;
6445  
6446  	size = bpf_size_to_bytes(bpf_size);
6447  	if (size < 0)
6448  		return size;
6449  
6450  	/* alignment checks will add in reg->off themselves */
6451  	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6452  	if (err)
6453  		return err;
6454  
6455  	/* for access checks, reg->off is just part of off */
6456  	off += reg->off;
6457  
6458  	if (reg->type == PTR_TO_MAP_KEY) {
6459  		if (t == BPF_WRITE) {
6460  			verbose(env, "write to change key R%d not allowed\n", regno);
6461  			return -EACCES;
6462  		}
6463  
6464  		err = check_mem_region_access(env, regno, off, size,
6465  					      reg->map_ptr->key_size, false);
6466  		if (err)
6467  			return err;
6468  		if (value_regno >= 0)
6469  			mark_reg_unknown(env, regs, value_regno);
6470  	} else if (reg->type == PTR_TO_MAP_VALUE) {
6471  		struct btf_field *kptr_field = NULL;
6472  
6473  		if (t == BPF_WRITE && value_regno >= 0 &&
6474  		    is_pointer_value(env, value_regno)) {
6475  			verbose(env, "R%d leaks addr into map\n", value_regno);
6476  			return -EACCES;
6477  		}
6478  		err = check_map_access_type(env, regno, off, size, t);
6479  		if (err)
6480  			return err;
6481  		err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
6482  		if (err)
6483  			return err;
6484  		if (tnum_is_const(reg->var_off))
6485  			kptr_field = btf_record_find(reg->map_ptr->record,
6486  						     off + reg->var_off.value, BPF_KPTR);
6487  		if (kptr_field) {
6488  			err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
6489  		} else if (t == BPF_READ && value_regno >= 0) {
6490  			struct bpf_map *map = reg->map_ptr;
6491  
6492  			/* if map is read-only, track its contents as scalars */
6493  			if (tnum_is_const(reg->var_off) &&
6494  			    bpf_map_is_rdonly(map) &&
6495  			    map->ops->map_direct_value_addr) {
6496  				int map_off = off + reg->var_off.value;
6497  				u64 val = 0;
6498  
6499  				err = bpf_map_direct_read(map, map_off, size,
6500  							  &val, is_ldsx);
6501  				if (err)
6502  					return err;
6503  
6504  				regs[value_regno].type = SCALAR_VALUE;
6505  				__mark_reg_known(&regs[value_regno], val);
6506  			} else {
6507  				mark_reg_unknown(env, regs, value_regno);
6508  			}
6509  		}
6510  	} else if (base_type(reg->type) == PTR_TO_MEM) {
6511  		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6512  
6513  		if (type_may_be_null(reg->type)) {
6514  			verbose(env, "R%d invalid mem access '%s'\n", regno,
6515  				reg_type_str(env, reg->type));
6516  			return -EACCES;
6517  		}
6518  
6519  		if (t == BPF_WRITE && rdonly_mem) {
6520  			verbose(env, "R%d cannot write into %s\n",
6521  				regno, reg_type_str(env, reg->type));
6522  			return -EACCES;
6523  		}
6524  
6525  		if (t == BPF_WRITE && value_regno >= 0 &&
6526  		    is_pointer_value(env, value_regno)) {
6527  			verbose(env, "R%d leaks addr into mem\n", value_regno);
6528  			return -EACCES;
6529  		}
6530  
6531  		err = check_mem_region_access(env, regno, off, size,
6532  					      reg->mem_size, false);
6533  		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6534  			mark_reg_unknown(env, regs, value_regno);
6535  	} else if (reg->type == PTR_TO_CTX) {
6536  		enum bpf_reg_type reg_type = SCALAR_VALUE;
6537  		struct btf *btf = NULL;
6538  		u32 btf_id = 0;
6539  
6540  		if (t == BPF_WRITE && value_regno >= 0 &&
6541  		    is_pointer_value(env, value_regno)) {
6542  			verbose(env, "R%d leaks addr into ctx\n", value_regno);
6543  			return -EACCES;
6544  		}
6545  
6546  		err = check_ptr_off_reg(env, reg, regno);
6547  		if (err < 0)
6548  			return err;
6549  
6550  		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
6551  				       &btf_id);
6552  		if (err)
6553  			verbose_linfo(env, insn_idx, "; ");
6554  		if (!err && t == BPF_READ && value_regno >= 0) {
6555  			/* ctx access returns either a scalar, or a
6556  			 * PTR_TO_PACKET[_META,_END]. In the latter
6557  			 * case, we know the offset is zero.
6558  			 */
6559  			if (reg_type == SCALAR_VALUE) {
6560  				mark_reg_unknown(env, regs, value_regno);
6561  			} else {
6562  				mark_reg_known_zero(env, regs,
6563  						    value_regno);
6564  				if (type_may_be_null(reg_type))
6565  					regs[value_regno].id = ++env->id_gen;
6566  				/* A load of ctx field could have different
6567  				 * actual load size with the one encoded in the
6568  				 * insn. When the dst is PTR, it is for sure not
6569  				 * a sub-register.
6570  				 */
6571  				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6572  				if (base_type(reg_type) == PTR_TO_BTF_ID) {
6573  					regs[value_regno].btf = btf;
6574  					regs[value_regno].btf_id = btf_id;
6575  				}
6576  			}
6577  			regs[value_regno].type = reg_type;
6578  		}
6579  
6580  	} else if (reg->type == PTR_TO_STACK) {
6581  		/* Basic bounds checks. */
6582  		err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
6583  		if (err)
6584  			return err;
6585  
6586  		state = func(env, reg);
6587  		err = update_stack_depth(env, state, off);
6588  		if (err)
6589  			return err;
6590  
6591  		if (t == BPF_READ)
6592  			err = check_stack_read(env, regno, off, size,
6593  					       value_regno);
6594  		else
6595  			err = check_stack_write(env, regno, off, size,
6596  						value_regno, insn_idx);
6597  	} else if (reg_is_pkt_pointer(reg)) {
6598  		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6599  			verbose(env, "cannot write into packet\n");
6600  			return -EACCES;
6601  		}
6602  		if (t == BPF_WRITE && value_regno >= 0 &&
6603  		    is_pointer_value(env, value_regno)) {
6604  			verbose(env, "R%d leaks addr into packet\n",
6605  				value_regno);
6606  			return -EACCES;
6607  		}
6608  		err = check_packet_access(env, regno, off, size, false);
6609  		if (!err && t == BPF_READ && value_regno >= 0)
6610  			mark_reg_unknown(env, regs, value_regno);
6611  	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6612  		if (t == BPF_WRITE && value_regno >= 0 &&
6613  		    is_pointer_value(env, value_regno)) {
6614  			verbose(env, "R%d leaks addr into flow keys\n",
6615  				value_regno);
6616  			return -EACCES;
6617  		}
6618  
6619  		err = check_flow_keys_access(env, off, size);
6620  		if (!err && t == BPF_READ && value_regno >= 0)
6621  			mark_reg_unknown(env, regs, value_regno);
6622  	} else if (type_is_sk_pointer(reg->type)) {
6623  		if (t == BPF_WRITE) {
6624  			verbose(env, "R%d cannot write into %s\n",
6625  				regno, reg_type_str(env, reg->type));
6626  			return -EACCES;
6627  		}
6628  		err = check_sock_access(env, insn_idx, regno, off, size, t);
6629  		if (!err && value_regno >= 0)
6630  			mark_reg_unknown(env, regs, value_regno);
6631  	} else if (reg->type == PTR_TO_TP_BUFFER) {
6632  		err = check_tp_buffer_access(env, reg, regno, off, size);
6633  		if (!err && t == BPF_READ && value_regno >= 0)
6634  			mark_reg_unknown(env, regs, value_regno);
6635  	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
6636  		   !type_may_be_null(reg->type)) {
6637  		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
6638  					      value_regno);
6639  	} else if (reg->type == CONST_PTR_TO_MAP) {
6640  		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
6641  					      value_regno);
6642  	} else if (base_type(reg->type) == PTR_TO_BUF) {
6643  		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6644  		u32 *max_access;
6645  
6646  		if (rdonly_mem) {
6647  			if (t == BPF_WRITE) {
6648  				verbose(env, "R%d cannot write into %s\n",
6649  					regno, reg_type_str(env, reg->type));
6650  				return -EACCES;
6651  			}
6652  			max_access = &env->prog->aux->max_rdonly_access;
6653  		} else {
6654  			max_access = &env->prog->aux->max_rdwr_access;
6655  		}
6656  
6657  		err = check_buffer_access(env, reg, regno, off, size, false,
6658  					  max_access);
6659  
6660  		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
6661  			mark_reg_unknown(env, regs, value_regno);
6662  	} else {
6663  		verbose(env, "R%d invalid mem access '%s'\n", regno,
6664  			reg_type_str(env, reg->type));
6665  		return -EACCES;
6666  	}
6667  
6668  	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
6669  	    regs[value_regno].type == SCALAR_VALUE) {
6670  		if (!is_ldsx)
6671  			/* b/h/w load zero-extends, mark upper bits as known 0 */
6672  			coerce_reg_to_size(&regs[value_regno], size);
6673  		else
6674  			coerce_reg_to_size_sx(&regs[value_regno], size);
6675  	}
6676  	return err;
6677  }
6678  
check_atomic(struct bpf_verifier_env * env,int insn_idx,struct bpf_insn * insn)6679  static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
6680  {
6681  	int load_reg;
6682  	int err;
6683  
6684  	switch (insn->imm) {
6685  	case BPF_ADD:
6686  	case BPF_ADD | BPF_FETCH:
6687  	case BPF_AND:
6688  	case BPF_AND | BPF_FETCH:
6689  	case BPF_OR:
6690  	case BPF_OR | BPF_FETCH:
6691  	case BPF_XOR:
6692  	case BPF_XOR | BPF_FETCH:
6693  	case BPF_XCHG:
6694  	case BPF_CMPXCHG:
6695  		break;
6696  	default:
6697  		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
6698  		return -EINVAL;
6699  	}
6700  
6701  	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6702  		verbose(env, "invalid atomic operand size\n");
6703  		return -EINVAL;
6704  	}
6705  
6706  	/* check src1 operand */
6707  	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6708  	if (err)
6709  		return err;
6710  
6711  	/* check src2 operand */
6712  	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6713  	if (err)
6714  		return err;
6715  
6716  	if (insn->imm == BPF_CMPXCHG) {
6717  		/* Check comparison of R0 with memory location */
6718  		const u32 aux_reg = BPF_REG_0;
6719  
6720  		err = check_reg_arg(env, aux_reg, SRC_OP);
6721  		if (err)
6722  			return err;
6723  
6724  		if (is_pointer_value(env, aux_reg)) {
6725  			verbose(env, "R%d leaks addr into mem\n", aux_reg);
6726  			return -EACCES;
6727  		}
6728  	}
6729  
6730  	if (is_pointer_value(env, insn->src_reg)) {
6731  		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6732  		return -EACCES;
6733  	}
6734  
6735  	if (is_ctx_reg(env, insn->dst_reg) ||
6736  	    is_pkt_reg(env, insn->dst_reg) ||
6737  	    is_flow_key_reg(env, insn->dst_reg) ||
6738  	    is_sk_reg(env, insn->dst_reg)) {
6739  		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6740  			insn->dst_reg,
6741  			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6742  		return -EACCES;
6743  	}
6744  
6745  	if (insn->imm & BPF_FETCH) {
6746  		if (insn->imm == BPF_CMPXCHG)
6747  			load_reg = BPF_REG_0;
6748  		else
6749  			load_reg = insn->src_reg;
6750  
6751  		/* check and record load of old value */
6752  		err = check_reg_arg(env, load_reg, DST_OP);
6753  		if (err)
6754  			return err;
6755  	} else {
6756  		/* This instruction accesses a memory location but doesn't
6757  		 * actually load it into a register.
6758  		 */
6759  		load_reg = -1;
6760  	}
6761  
6762  	/* Check whether we can read the memory, with second call for fetch
6763  	 * case to simulate the register fill.
6764  	 */
6765  	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6766  			       BPF_SIZE(insn->code), BPF_READ, -1, true, false);
6767  	if (!err && load_reg >= 0)
6768  		err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6769  				       BPF_SIZE(insn->code), BPF_READ, load_reg,
6770  				       true, false);
6771  	if (err)
6772  		return err;
6773  
6774  	/* Check whether we can write into the same memory. */
6775  	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6776  			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
6777  	if (err)
6778  		return err;
6779  
6780  	return 0;
6781  }
6782  
6783  /* When register 'regno' is used to read the stack (either directly or through
6784   * a helper function) make sure that it's within stack boundary and, depending
6785   * on the access type, that all elements of the stack are initialized.
6786   *
6787   * 'off' includes 'regno->off', but not its dynamic part (if any).
6788   *
6789   * All registers that have been spilled on the stack in the slots within the
6790   * read offsets are marked as read.
6791   */
check_stack_range_initialized(struct bpf_verifier_env * env,int regno,int off,int access_size,bool zero_size_allowed,enum bpf_access_src type,struct bpf_call_arg_meta * meta)6792  static int check_stack_range_initialized(
6793  		struct bpf_verifier_env *env, int regno, int off,
6794  		int access_size, bool zero_size_allowed,
6795  		enum bpf_access_src type, struct bpf_call_arg_meta *meta)
6796  {
6797  	struct bpf_reg_state *reg = reg_state(env, regno);
6798  	struct bpf_func_state *state = func(env, reg);
6799  	int err, min_off, max_off, i, j, slot, spi;
6800  	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
6801  	enum bpf_access_type bounds_check_type;
6802  	/* Some accesses can write anything into the stack, others are
6803  	 * read-only.
6804  	 */
6805  	bool clobber = false;
6806  
6807  	if (access_size == 0 && !zero_size_allowed) {
6808  		verbose(env, "invalid zero-sized read\n");
6809  		return -EACCES;
6810  	}
6811  
6812  	if (type == ACCESS_HELPER) {
6813  		/* The bounds checks for writes are more permissive than for
6814  		 * reads. However, if raw_mode is not set, we'll do extra
6815  		 * checks below.
6816  		 */
6817  		bounds_check_type = BPF_WRITE;
6818  		clobber = true;
6819  	} else {
6820  		bounds_check_type = BPF_READ;
6821  	}
6822  	err = check_stack_access_within_bounds(env, regno, off, access_size,
6823  					       type, bounds_check_type);
6824  	if (err)
6825  		return err;
6826  
6827  
6828  	if (tnum_is_const(reg->var_off)) {
6829  		min_off = max_off = reg->var_off.value + off;
6830  	} else {
6831  		/* Variable offset is prohibited for unprivileged mode for
6832  		 * simplicity since it requires corresponding support in
6833  		 * Spectre masking for stack ALU.
6834  		 * See also retrieve_ptr_limit().
6835  		 */
6836  		if (!env->bypass_spec_v1) {
6837  			char tn_buf[48];
6838  
6839  			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6840  			verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
6841  				regno, err_extra, tn_buf);
6842  			return -EACCES;
6843  		}
6844  		/* Only initialized buffer on stack is allowed to be accessed
6845  		 * with variable offset. With uninitialized buffer it's hard to
6846  		 * guarantee that whole memory is marked as initialized on
6847  		 * helper return since specific bounds are unknown what may
6848  		 * cause uninitialized stack leaking.
6849  		 */
6850  		if (meta && meta->raw_mode)
6851  			meta = NULL;
6852  
6853  		min_off = reg->smin_value + off;
6854  		max_off = reg->smax_value + off;
6855  	}
6856  
6857  	if (meta && meta->raw_mode) {
6858  		/* Ensure we won't be overwriting dynptrs when simulating byte
6859  		 * by byte access in check_helper_call using meta.access_size.
6860  		 * This would be a problem if we have a helper in the future
6861  		 * which takes:
6862  		 *
6863  		 *	helper(uninit_mem, len, dynptr)
6864  		 *
6865  		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
6866  		 * may end up writing to dynptr itself when touching memory from
6867  		 * arg 1. This can be relaxed on a case by case basis for known
6868  		 * safe cases, but reject due to the possibilitiy of aliasing by
6869  		 * default.
6870  		 */
6871  		for (i = min_off; i < max_off + access_size; i++) {
6872  			int stack_off = -i - 1;
6873  
6874  			spi = __get_spi(i);
6875  			/* raw_mode may write past allocated_stack */
6876  			if (state->allocated_stack <= stack_off)
6877  				continue;
6878  			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
6879  				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
6880  				return -EACCES;
6881  			}
6882  		}
6883  		meta->access_size = access_size;
6884  		meta->regno = regno;
6885  		return 0;
6886  	}
6887  
6888  	for (i = min_off; i < max_off + access_size; i++) {
6889  		u8 *stype;
6890  
6891  		slot = -i - 1;
6892  		spi = slot / BPF_REG_SIZE;
6893  		if (state->allocated_stack <= slot)
6894  			goto err;
6895  		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
6896  		if (*stype == STACK_MISC)
6897  			goto mark;
6898  		if ((*stype == STACK_ZERO) ||
6899  		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
6900  			if (clobber) {
6901  				/* helper can write anything into the stack */
6902  				*stype = STACK_MISC;
6903  			}
6904  			goto mark;
6905  		}
6906  
6907  		if (is_spilled_reg(&state->stack[spi]) &&
6908  		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
6909  		     env->allow_ptr_leaks)) {
6910  			if (clobber) {
6911  				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
6912  				for (j = 0; j < BPF_REG_SIZE; j++)
6913  					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
6914  			}
6915  			goto mark;
6916  		}
6917  
6918  err:
6919  		if (tnum_is_const(reg->var_off)) {
6920  			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
6921  				err_extra, regno, min_off, i - min_off, access_size);
6922  		} else {
6923  			char tn_buf[48];
6924  
6925  			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6926  			verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
6927  				err_extra, regno, tn_buf, i - min_off, access_size);
6928  		}
6929  		return -EACCES;
6930  mark:
6931  		/* reading any byte out of 8-byte 'spill_slot' will cause
6932  		 * the whole slot to be marked as 'read'
6933  		 */
6934  		mark_reg_read(env, &state->stack[spi].spilled_ptr,
6935  			      state->stack[spi].spilled_ptr.parent,
6936  			      REG_LIVE_READ64);
6937  		/* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
6938  		 * be sure that whether stack slot is written to or not. Hence,
6939  		 * we must still conservatively propagate reads upwards even if
6940  		 * helper may write to the entire memory range.
6941  		 */
6942  	}
6943  	return update_stack_depth(env, state, min_off);
6944  }
6945  
check_helper_mem_access(struct bpf_verifier_env * env,int regno,int access_size,bool zero_size_allowed,struct bpf_call_arg_meta * meta)6946  static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
6947  				   int access_size, bool zero_size_allowed,
6948  				   struct bpf_call_arg_meta *meta)
6949  {
6950  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6951  	u32 *max_access;
6952  
6953  	switch (base_type(reg->type)) {
6954  	case PTR_TO_PACKET:
6955  	case PTR_TO_PACKET_META:
6956  		return check_packet_access(env, regno, reg->off, access_size,
6957  					   zero_size_allowed);
6958  	case PTR_TO_MAP_KEY:
6959  		if (meta && meta->raw_mode) {
6960  			verbose(env, "R%d cannot write into %s\n", regno,
6961  				reg_type_str(env, reg->type));
6962  			return -EACCES;
6963  		}
6964  		return check_mem_region_access(env, regno, reg->off, access_size,
6965  					       reg->map_ptr->key_size, false);
6966  	case PTR_TO_MAP_VALUE:
6967  		if (check_map_access_type(env, regno, reg->off, access_size,
6968  					  meta && meta->raw_mode ? BPF_WRITE :
6969  					  BPF_READ))
6970  			return -EACCES;
6971  		return check_map_access(env, regno, reg->off, access_size,
6972  					zero_size_allowed, ACCESS_HELPER);
6973  	case PTR_TO_MEM:
6974  		if (type_is_rdonly_mem(reg->type)) {
6975  			if (meta && meta->raw_mode) {
6976  				verbose(env, "R%d cannot write into %s\n", regno,
6977  					reg_type_str(env, reg->type));
6978  				return -EACCES;
6979  			}
6980  		}
6981  		return check_mem_region_access(env, regno, reg->off,
6982  					       access_size, reg->mem_size,
6983  					       zero_size_allowed);
6984  	case PTR_TO_BUF:
6985  		if (type_is_rdonly_mem(reg->type)) {
6986  			if (meta && meta->raw_mode) {
6987  				verbose(env, "R%d cannot write into %s\n", regno,
6988  					reg_type_str(env, reg->type));
6989  				return -EACCES;
6990  			}
6991  
6992  			max_access = &env->prog->aux->max_rdonly_access;
6993  		} else {
6994  			max_access = &env->prog->aux->max_rdwr_access;
6995  		}
6996  		return check_buffer_access(env, reg, regno, reg->off,
6997  					   access_size, zero_size_allowed,
6998  					   max_access);
6999  	case PTR_TO_STACK:
7000  		return check_stack_range_initialized(
7001  				env,
7002  				regno, reg->off, access_size,
7003  				zero_size_allowed, ACCESS_HELPER, meta);
7004  	case PTR_TO_BTF_ID:
7005  		return check_ptr_to_btf_access(env, regs, regno, reg->off,
7006  					       access_size, BPF_READ, -1);
7007  	case PTR_TO_CTX:
7008  		/* in case the function doesn't know how to access the context,
7009  		 * (because we are in a program of type SYSCALL for example), we
7010  		 * can not statically check its size.
7011  		 * Dynamically check it now.
7012  		 */
7013  		if (!env->ops->convert_ctx_access) {
7014  			enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
7015  			int offset = access_size - 1;
7016  
7017  			/* Allow zero-byte read from PTR_TO_CTX */
7018  			if (access_size == 0)
7019  				return zero_size_allowed ? 0 : -EACCES;
7020  
7021  			return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
7022  						atype, -1, false, false);
7023  		}
7024  
7025  		fallthrough;
7026  	default: /* scalar_value or invalid ptr */
7027  		/* Allow zero-byte read from NULL, regardless of pointer type */
7028  		if (zero_size_allowed && access_size == 0 &&
7029  		    register_is_null(reg))
7030  			return 0;
7031  
7032  		verbose(env, "R%d type=%s ", regno,
7033  			reg_type_str(env, reg->type));
7034  		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
7035  		return -EACCES;
7036  	}
7037  }
7038  
check_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,bool zero_size_allowed,struct bpf_call_arg_meta * meta)7039  static int check_mem_size_reg(struct bpf_verifier_env *env,
7040  			      struct bpf_reg_state *reg, u32 regno,
7041  			      bool zero_size_allowed,
7042  			      struct bpf_call_arg_meta *meta)
7043  {
7044  	int err;
7045  
7046  	/* This is used to refine r0 return value bounds for helpers
7047  	 * that enforce this value as an upper bound on return values.
7048  	 * See do_refine_retval_range() for helpers that can refine
7049  	 * the return value. C type of helper is u32 so we pull register
7050  	 * bound from umax_value however, if negative verifier errors
7051  	 * out. Only upper bounds can be learned because retval is an
7052  	 * int type and negative retvals are allowed.
7053  	 */
7054  	meta->msize_max_value = reg->umax_value;
7055  
7056  	/* The register is SCALAR_VALUE; the access check
7057  	 * happens using its boundaries.
7058  	 */
7059  	if (!tnum_is_const(reg->var_off))
7060  		/* For unprivileged variable accesses, disable raw
7061  		 * mode so that the program is required to
7062  		 * initialize all the memory that the helper could
7063  		 * just partially fill up.
7064  		 */
7065  		meta = NULL;
7066  
7067  	if (reg->smin_value < 0) {
7068  		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
7069  			regno);
7070  		return -EACCES;
7071  	}
7072  
7073  	if (reg->umin_value == 0) {
7074  		err = check_helper_mem_access(env, regno - 1, 0,
7075  					      zero_size_allowed,
7076  					      meta);
7077  		if (err)
7078  			return err;
7079  	}
7080  
7081  	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
7082  		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7083  			regno);
7084  		return -EACCES;
7085  	}
7086  	err = check_helper_mem_access(env, regno - 1,
7087  				      reg->umax_value,
7088  				      zero_size_allowed, meta);
7089  	if (!err)
7090  		err = mark_chain_precision(env, regno);
7091  	return err;
7092  }
7093  
check_mem_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,u32 mem_size)7094  int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7095  		   u32 regno, u32 mem_size)
7096  {
7097  	bool may_be_null = type_may_be_null(reg->type);
7098  	struct bpf_reg_state saved_reg;
7099  	struct bpf_call_arg_meta meta;
7100  	int err;
7101  
7102  	if (register_is_null(reg))
7103  		return 0;
7104  
7105  	memset(&meta, 0, sizeof(meta));
7106  	/* Assuming that the register contains a value check if the memory
7107  	 * access is safe. Temporarily save and restore the register's state as
7108  	 * the conversion shouldn't be visible to a caller.
7109  	 */
7110  	if (may_be_null) {
7111  		saved_reg = *reg;
7112  		mark_ptr_not_null_reg(reg);
7113  	}
7114  
7115  	err = check_helper_mem_access(env, regno, mem_size, true, &meta);
7116  	/* Check access for BPF_WRITE */
7117  	meta.raw_mode = true;
7118  	err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
7119  
7120  	if (may_be_null)
7121  		*reg = saved_reg;
7122  
7123  	return err;
7124  }
7125  
check_kfunc_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)7126  static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7127  				    u32 regno)
7128  {
7129  	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
7130  	bool may_be_null = type_may_be_null(mem_reg->type);
7131  	struct bpf_reg_state saved_reg;
7132  	struct bpf_call_arg_meta meta;
7133  	int err;
7134  
7135  	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
7136  
7137  	memset(&meta, 0, sizeof(meta));
7138  
7139  	if (may_be_null) {
7140  		saved_reg = *mem_reg;
7141  		mark_ptr_not_null_reg(mem_reg);
7142  	}
7143  
7144  	err = check_mem_size_reg(env, reg, regno, true, &meta);
7145  	/* Check access for BPF_WRITE */
7146  	meta.raw_mode = true;
7147  	err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
7148  
7149  	if (may_be_null)
7150  		*mem_reg = saved_reg;
7151  	return err;
7152  }
7153  
7154  /* Implementation details:
7155   * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7156   * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
7157   * Two bpf_map_lookups (even with the same key) will have different reg->id.
7158   * Two separate bpf_obj_new will also have different reg->id.
7159   * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
7160   * clears reg->id after value_or_null->value transition, since the verifier only
7161   * cares about the range of access to valid map value pointer and doesn't care
7162   * about actual address of the map element.
7163   * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7164   * reg->id > 0 after value_or_null->value transition. By doing so
7165   * two bpf_map_lookups will be considered two different pointers that
7166   * point to different bpf_spin_locks. Likewise for pointers to allocated objects
7167   * returned from bpf_obj_new.
7168   * The verifier allows taking only one bpf_spin_lock at a time to avoid
7169   * dead-locks.
7170   * Since only one bpf_spin_lock is allowed the checks are simpler than
7171   * reg_is_refcounted() logic. The verifier needs to remember only
7172   * one spin_lock instead of array of acquired_refs.
7173   * cur_state->active_lock remembers which map value element or allocated
7174   * object got locked and clears it after bpf_spin_unlock.
7175   */
process_spin_lock(struct bpf_verifier_env * env,int regno,bool is_lock)7176  static int process_spin_lock(struct bpf_verifier_env *env, int regno,
7177  			     bool is_lock)
7178  {
7179  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7180  	struct bpf_verifier_state *cur = env->cur_state;
7181  	bool is_const = tnum_is_const(reg->var_off);
7182  	u64 val = reg->var_off.value;
7183  	struct bpf_map *map = NULL;
7184  	struct btf *btf = NULL;
7185  	struct btf_record *rec;
7186  
7187  	if (!is_const) {
7188  		verbose(env,
7189  			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
7190  			regno);
7191  		return -EINVAL;
7192  	}
7193  	if (reg->type == PTR_TO_MAP_VALUE) {
7194  		map = reg->map_ptr;
7195  		if (!map->btf) {
7196  			verbose(env,
7197  				"map '%s' has to have BTF in order to use bpf_spin_lock\n",
7198  				map->name);
7199  			return -EINVAL;
7200  		}
7201  	} else {
7202  		btf = reg->btf;
7203  	}
7204  
7205  	rec = reg_btf_record(reg);
7206  	if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
7207  		verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
7208  			map ? map->name : "kptr");
7209  		return -EINVAL;
7210  	}
7211  	if (rec->spin_lock_off != val + reg->off) {
7212  		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
7213  			val + reg->off, rec->spin_lock_off);
7214  		return -EINVAL;
7215  	}
7216  	if (is_lock) {
7217  		if (cur->active_lock.ptr) {
7218  			verbose(env,
7219  				"Locking two bpf_spin_locks are not allowed\n");
7220  			return -EINVAL;
7221  		}
7222  		if (map)
7223  			cur->active_lock.ptr = map;
7224  		else
7225  			cur->active_lock.ptr = btf;
7226  		cur->active_lock.id = reg->id;
7227  	} else {
7228  		void *ptr;
7229  
7230  		if (map)
7231  			ptr = map;
7232  		else
7233  			ptr = btf;
7234  
7235  		if (!cur->active_lock.ptr) {
7236  			verbose(env, "bpf_spin_unlock without taking a lock\n");
7237  			return -EINVAL;
7238  		}
7239  		if (cur->active_lock.ptr != ptr ||
7240  		    cur->active_lock.id != reg->id) {
7241  			verbose(env, "bpf_spin_unlock of different lock\n");
7242  			return -EINVAL;
7243  		}
7244  
7245  		invalidate_non_owning_refs(env);
7246  
7247  		cur->active_lock.ptr = NULL;
7248  		cur->active_lock.id = 0;
7249  	}
7250  	return 0;
7251  }
7252  
process_timer_func(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)7253  static int process_timer_func(struct bpf_verifier_env *env, int regno,
7254  			      struct bpf_call_arg_meta *meta)
7255  {
7256  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7257  	bool is_const = tnum_is_const(reg->var_off);
7258  	struct bpf_map *map = reg->map_ptr;
7259  	u64 val = reg->var_off.value;
7260  
7261  	if (!is_const) {
7262  		verbose(env,
7263  			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
7264  			regno);
7265  		return -EINVAL;
7266  	}
7267  	if (!map->btf) {
7268  		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
7269  			map->name);
7270  		return -EINVAL;
7271  	}
7272  	if (!btf_record_has_field(map->record, BPF_TIMER)) {
7273  		verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
7274  		return -EINVAL;
7275  	}
7276  	if (map->record->timer_off != val + reg->off) {
7277  		verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
7278  			val + reg->off, map->record->timer_off);
7279  		return -EINVAL;
7280  	}
7281  	if (meta->map_ptr) {
7282  		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
7283  		return -EFAULT;
7284  	}
7285  	meta->map_uid = reg->map_uid;
7286  	meta->map_ptr = map;
7287  	return 0;
7288  }
7289  
process_kptr_func(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)7290  static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7291  			     struct bpf_call_arg_meta *meta)
7292  {
7293  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7294  	struct bpf_map *map_ptr = reg->map_ptr;
7295  	struct btf_field *kptr_field;
7296  	u32 kptr_off;
7297  
7298  	if (!tnum_is_const(reg->var_off)) {
7299  		verbose(env,
7300  			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7301  			regno);
7302  		return -EINVAL;
7303  	}
7304  	if (!map_ptr->btf) {
7305  		verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7306  			map_ptr->name);
7307  		return -EINVAL;
7308  	}
7309  	if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
7310  		verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
7311  		return -EINVAL;
7312  	}
7313  
7314  	meta->map_ptr = map_ptr;
7315  	kptr_off = reg->off + reg->var_off.value;
7316  	kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
7317  	if (!kptr_field) {
7318  		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7319  		return -EACCES;
7320  	}
7321  	if (kptr_field->type != BPF_KPTR_REF) {
7322  		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7323  		return -EACCES;
7324  	}
7325  	meta->kptr_field = kptr_field;
7326  	return 0;
7327  }
7328  
7329  /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7330   * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7331   *
7332   * In both cases we deal with the first 8 bytes, but need to mark the next 8
7333   * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7334   * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7335   *
7336   * Mutability of bpf_dynptr is at two levels, one is at the level of struct
7337   * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
7338   * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
7339   * mutate the view of the dynptr and also possibly destroy it. In the latter
7340   * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
7341   * memory that dynptr points to.
7342   *
7343   * The verifier will keep track both levels of mutation (bpf_dynptr's in
7344   * reg->type and the memory's in reg->dynptr.type), but there is no support for
7345   * readonly dynptr view yet, hence only the first case is tracked and checked.
7346   *
7347   * This is consistent with how C applies the const modifier to a struct object,
7348   * where the pointer itself inside bpf_dynptr becomes const but not what it
7349   * points to.
7350   *
7351   * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
7352   * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
7353   */
process_dynptr_func(struct bpf_verifier_env * env,int regno,int insn_idx,enum bpf_arg_type arg_type,int clone_ref_obj_id)7354  static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
7355  			       enum bpf_arg_type arg_type, int clone_ref_obj_id)
7356  {
7357  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7358  	int err;
7359  
7360  	/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
7361  	 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
7362  	 */
7363  	if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
7364  		verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
7365  		return -EFAULT;
7366  	}
7367  
7368  	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7369  	 *		 constructing a mutable bpf_dynptr object.
7370  	 *
7371  	 *		 Currently, this is only possible with PTR_TO_STACK
7372  	 *		 pointing to a region of at least 16 bytes which doesn't
7373  	 *		 contain an existing bpf_dynptr.
7374  	 *
7375  	 *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
7376  	 *		 mutated or destroyed. However, the memory it points to
7377  	 *		 may be mutated.
7378  	 *
7379  	 *  None       - Points to a initialized dynptr that can be mutated and
7380  	 *		 destroyed, including mutation of the memory it points
7381  	 *		 to.
7382  	 */
7383  	if (arg_type & MEM_UNINIT) {
7384  		int i;
7385  
7386  		if (!is_dynptr_reg_valid_uninit(env, reg)) {
7387  			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7388  			return -EINVAL;
7389  		}
7390  
7391  		/* we write BPF_DW bits (8 bytes) at a time */
7392  		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7393  			err = check_mem_access(env, insn_idx, regno,
7394  					       i, BPF_DW, BPF_WRITE, -1, false, false);
7395  			if (err)
7396  				return err;
7397  		}
7398  
7399  		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
7400  	} else /* MEM_RDONLY and None case from above */ {
7401  		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7402  		if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
7403  			verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
7404  			return -EINVAL;
7405  		}
7406  
7407  		if (!is_dynptr_reg_valid_init(env, reg)) {
7408  			verbose(env,
7409  				"Expected an initialized dynptr as arg #%d\n",
7410  				regno);
7411  			return -EINVAL;
7412  		}
7413  
7414  		/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
7415  		if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
7416  			verbose(env,
7417  				"Expected a dynptr of type %s as arg #%d\n",
7418  				dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
7419  			return -EINVAL;
7420  		}
7421  
7422  		err = mark_dynptr_read(env, reg);
7423  	}
7424  	return err;
7425  }
7426  
iter_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi)7427  static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
7428  {
7429  	struct bpf_func_state *state = func(env, reg);
7430  
7431  	return state->stack[spi].spilled_ptr.ref_obj_id;
7432  }
7433  
is_iter_kfunc(struct bpf_kfunc_call_arg_meta * meta)7434  static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7435  {
7436  	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7437  }
7438  
is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta * meta)7439  static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7440  {
7441  	return meta->kfunc_flags & KF_ITER_NEW;
7442  }
7443  
is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta * meta)7444  static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7445  {
7446  	return meta->kfunc_flags & KF_ITER_NEXT;
7447  }
7448  
is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta * meta)7449  static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7450  {
7451  	return meta->kfunc_flags & KF_ITER_DESTROY;
7452  }
7453  
is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta * meta,int arg)7454  static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
7455  {
7456  	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
7457  	 * kfunc is iter state pointer
7458  	 */
7459  	return arg == 0 && is_iter_kfunc(meta);
7460  }
7461  
process_iter_arg(struct bpf_verifier_env * env,int regno,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)7462  static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
7463  			    struct bpf_kfunc_call_arg_meta *meta)
7464  {
7465  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7466  	const struct btf_type *t;
7467  	const struct btf_param *arg;
7468  	int spi, err, i, nr_slots;
7469  	u32 btf_id;
7470  
7471  	/* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
7472  	arg = &btf_params(meta->func_proto)[0];
7473  	t = btf_type_skip_modifiers(meta->btf, arg->type, NULL);	/* PTR */
7474  	t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id);	/* STRUCT */
7475  	nr_slots = t->size / BPF_REG_SIZE;
7476  
7477  	if (is_iter_new_kfunc(meta)) {
7478  		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
7479  		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7480  			verbose(env, "expected uninitialized iter_%s as arg #%d\n",
7481  				iter_type_str(meta->btf, btf_id), regno);
7482  			return -EINVAL;
7483  		}
7484  
7485  		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7486  			err = check_mem_access(env, insn_idx, regno,
7487  					       i, BPF_DW, BPF_WRITE, -1, false, false);
7488  			if (err)
7489  				return err;
7490  		}
7491  
7492  		err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots);
7493  		if (err)
7494  			return err;
7495  	} else {
7496  		/* iter_next() or iter_destroy() expect initialized iter state*/
7497  		if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) {
7498  			verbose(env, "expected an initialized iter_%s as arg #%d\n",
7499  				iter_type_str(meta->btf, btf_id), regno);
7500  			return -EINVAL;
7501  		}
7502  
7503  		spi = iter_get_spi(env, reg, nr_slots);
7504  		if (spi < 0)
7505  			return spi;
7506  
7507  		err = mark_iter_read(env, reg, spi, nr_slots);
7508  		if (err)
7509  			return err;
7510  
7511  		/* remember meta->iter info for process_iter_next_call() */
7512  		meta->iter.spi = spi;
7513  		meta->iter.frameno = reg->frameno;
7514  		meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
7515  
7516  		if (is_iter_destroy_kfunc(meta)) {
7517  			err = unmark_stack_slots_iter(env, reg, nr_slots);
7518  			if (err)
7519  				return err;
7520  		}
7521  	}
7522  
7523  	return 0;
7524  }
7525  
7526  /* process_iter_next_call() is called when verifier gets to iterator's next
7527   * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7528   * to it as just "iter_next()" in comments below.
7529   *
7530   * BPF verifier relies on a crucial contract for any iter_next()
7531   * implementation: it should *eventually* return NULL, and once that happens
7532   * it should keep returning NULL. That is, once iterator exhausts elements to
7533   * iterate, it should never reset or spuriously return new elements.
7534   *
7535   * With the assumption of such contract, process_iter_next_call() simulates
7536   * a fork in the verifier state to validate loop logic correctness and safety
7537   * without having to simulate infinite amount of iterations.
7538   *
7539   * In current state, we first assume that iter_next() returned NULL and
7540   * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7541   * conditions we should not form an infinite loop and should eventually reach
7542   * exit.
7543   *
7544   * Besides that, we also fork current state and enqueue it for later
7545   * verification. In a forked state we keep iterator state as ACTIVE
7546   * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7547   * also bump iteration depth to prevent erroneous infinite loop detection
7548   * later on (see iter_active_depths_differ() comment for details). In this
7549   * state we assume that we'll eventually loop back to another iter_next()
7550   * calls (it could be in exactly same location or in some other instruction,
7551   * it doesn't matter, we don't make any unnecessary assumptions about this,
7552   * everything revolves around iterator state in a stack slot, not which
7553   * instruction is calling iter_next()). When that happens, we either will come
7554   * to iter_next() with equivalent state and can conclude that next iteration
7555   * will proceed in exactly the same way as we just verified, so it's safe to
7556   * assume that loop converges. If not, we'll go on another iteration
7557   * simulation with a different input state, until all possible starting states
7558   * are validated or we reach maximum number of instructions limit.
7559   *
7560   * This way, we will either exhaustively discover all possible input states
7561   * that iterator loop can start with and eventually will converge, or we'll
7562   * effectively regress into bounded loop simulation logic and either reach
7563   * maximum number of instructions if loop is not provably convergent, or there
7564   * is some statically known limit on number of iterations (e.g., if there is
7565   * an explicit `if n > 100 then break;` statement somewhere in the loop).
7566   *
7567   * One very subtle but very important aspect is that we *always* simulate NULL
7568   * condition first (as the current state) before we simulate non-NULL case.
7569   * This has to do with intricacies of scalar precision tracking. By simulating
7570   * "exit condition" of iter_next() returning NULL first, we make sure all the
7571   * relevant precision marks *that will be set **after** we exit iterator loop*
7572   * are propagated backwards to common parent state of NULL and non-NULL
7573   * branches. Thanks to that, state equivalence checks done later in forked
7574   * state, when reaching iter_next() for ACTIVE iterator, can assume that
7575   * precision marks are finalized and won't change. Because simulating another
7576   * ACTIVE iterator iteration won't change them (because given same input
7577   * states we'll end up with exactly same output states which we are currently
7578   * comparing; and verification after the loop already propagated back what
7579   * needs to be **additionally** tracked as precise). It's subtle, grok
7580   * precision tracking for more intuitive understanding.
7581   */
process_iter_next_call(struct bpf_verifier_env * env,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)7582  static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7583  				  struct bpf_kfunc_call_arg_meta *meta)
7584  {
7585  	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st;
7586  	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7587  	struct bpf_reg_state *cur_iter, *queued_iter;
7588  	int iter_frameno = meta->iter.frameno;
7589  	int iter_spi = meta->iter.spi;
7590  
7591  	BTF_TYPE_EMIT(struct bpf_iter);
7592  
7593  	cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7594  
7595  	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
7596  	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
7597  		verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n",
7598  			cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
7599  		return -EFAULT;
7600  	}
7601  
7602  	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7603  		/* branch out active iter state */
7604  		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7605  		if (!queued_st)
7606  			return -ENOMEM;
7607  
7608  		queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7609  		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7610  		queued_iter->iter.depth++;
7611  
7612  		queued_fr = queued_st->frame[queued_st->curframe];
7613  		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
7614  	}
7615  
7616  	/* switch to DRAINED state, but keep the depth unchanged */
7617  	/* mark current iter state as drained and assume returned NULL */
7618  	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
7619  	__mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]);
7620  
7621  	return 0;
7622  }
7623  
arg_type_is_mem_size(enum bpf_arg_type type)7624  static bool arg_type_is_mem_size(enum bpf_arg_type type)
7625  {
7626  	return type == ARG_CONST_SIZE ||
7627  	       type == ARG_CONST_SIZE_OR_ZERO;
7628  }
7629  
arg_type_is_release(enum bpf_arg_type type)7630  static bool arg_type_is_release(enum bpf_arg_type type)
7631  {
7632  	return type & OBJ_RELEASE;
7633  }
7634  
arg_type_is_dynptr(enum bpf_arg_type type)7635  static bool arg_type_is_dynptr(enum bpf_arg_type type)
7636  {
7637  	return base_type(type) == ARG_PTR_TO_DYNPTR;
7638  }
7639  
int_ptr_type_to_size(enum bpf_arg_type type)7640  static int int_ptr_type_to_size(enum bpf_arg_type type)
7641  {
7642  	if (type == ARG_PTR_TO_INT)
7643  		return sizeof(u32);
7644  	else if (type == ARG_PTR_TO_LONG)
7645  		return sizeof(u64);
7646  
7647  	return -EINVAL;
7648  }
7649  
resolve_map_arg_type(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_arg_type * arg_type)7650  static int resolve_map_arg_type(struct bpf_verifier_env *env,
7651  				 const struct bpf_call_arg_meta *meta,
7652  				 enum bpf_arg_type *arg_type)
7653  {
7654  	if (!meta->map_ptr) {
7655  		/* kernel subsystem misconfigured verifier */
7656  		verbose(env, "invalid map_ptr to access map->type\n");
7657  		return -EACCES;
7658  	}
7659  
7660  	switch (meta->map_ptr->map_type) {
7661  	case BPF_MAP_TYPE_SOCKMAP:
7662  	case BPF_MAP_TYPE_SOCKHASH:
7663  		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
7664  			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
7665  		} else {
7666  			verbose(env, "invalid arg_type for sockmap/sockhash\n");
7667  			return -EINVAL;
7668  		}
7669  		break;
7670  	case BPF_MAP_TYPE_BLOOM_FILTER:
7671  		if (meta->func_id == BPF_FUNC_map_peek_elem)
7672  			*arg_type = ARG_PTR_TO_MAP_VALUE;
7673  		break;
7674  	default:
7675  		break;
7676  	}
7677  	return 0;
7678  }
7679  
7680  struct bpf_reg_types {
7681  	const enum bpf_reg_type types[10];
7682  	u32 *btf_id;
7683  };
7684  
7685  static const struct bpf_reg_types sock_types = {
7686  	.types = {
7687  		PTR_TO_SOCK_COMMON,
7688  		PTR_TO_SOCKET,
7689  		PTR_TO_TCP_SOCK,
7690  		PTR_TO_XDP_SOCK,
7691  	},
7692  };
7693  
7694  #ifdef CONFIG_NET
7695  static const struct bpf_reg_types btf_id_sock_common_types = {
7696  	.types = {
7697  		PTR_TO_SOCK_COMMON,
7698  		PTR_TO_SOCKET,
7699  		PTR_TO_TCP_SOCK,
7700  		PTR_TO_XDP_SOCK,
7701  		PTR_TO_BTF_ID,
7702  		PTR_TO_BTF_ID | PTR_TRUSTED,
7703  	},
7704  	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
7705  };
7706  #endif
7707  
7708  static const struct bpf_reg_types mem_types = {
7709  	.types = {
7710  		PTR_TO_STACK,
7711  		PTR_TO_PACKET,
7712  		PTR_TO_PACKET_META,
7713  		PTR_TO_MAP_KEY,
7714  		PTR_TO_MAP_VALUE,
7715  		PTR_TO_MEM,
7716  		PTR_TO_MEM | MEM_RINGBUF,
7717  		PTR_TO_BUF,
7718  		PTR_TO_BTF_ID | PTR_TRUSTED,
7719  	},
7720  };
7721  
7722  static const struct bpf_reg_types int_ptr_types = {
7723  	.types = {
7724  		PTR_TO_STACK,
7725  		PTR_TO_PACKET,
7726  		PTR_TO_PACKET_META,
7727  		PTR_TO_MAP_KEY,
7728  		PTR_TO_MAP_VALUE,
7729  	},
7730  };
7731  
7732  static const struct bpf_reg_types spin_lock_types = {
7733  	.types = {
7734  		PTR_TO_MAP_VALUE,
7735  		PTR_TO_BTF_ID | MEM_ALLOC,
7736  	}
7737  };
7738  
7739  static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
7740  static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
7741  static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
7742  static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
7743  static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
7744  static const struct bpf_reg_types btf_ptr_types = {
7745  	.types = {
7746  		PTR_TO_BTF_ID,
7747  		PTR_TO_BTF_ID | PTR_TRUSTED,
7748  		PTR_TO_BTF_ID | MEM_RCU,
7749  	},
7750  };
7751  static const struct bpf_reg_types percpu_btf_ptr_types = {
7752  	.types = {
7753  		PTR_TO_BTF_ID | MEM_PERCPU,
7754  		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
7755  	}
7756  };
7757  static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
7758  static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
7759  static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
7760  static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
7761  static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
7762  static const struct bpf_reg_types dynptr_types = {
7763  	.types = {
7764  		PTR_TO_STACK,
7765  		CONST_PTR_TO_DYNPTR,
7766  	}
7767  };
7768  
7769  static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
7770  	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
7771  	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
7772  	[ARG_CONST_SIZE]		= &scalar_types,
7773  	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
7774  	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
7775  	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
7776  	[ARG_PTR_TO_CTX]		= &context_types,
7777  	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
7778  #ifdef CONFIG_NET
7779  	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
7780  #endif
7781  	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
7782  	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
7783  	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
7784  	[ARG_PTR_TO_MEM]		= &mem_types,
7785  	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
7786  	[ARG_PTR_TO_INT]		= &int_ptr_types,
7787  	[ARG_PTR_TO_LONG]		= &int_ptr_types,
7788  	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
7789  	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
7790  	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
7791  	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
7792  	[ARG_PTR_TO_TIMER]		= &timer_types,
7793  	[ARG_PTR_TO_KPTR]		= &kptr_types,
7794  	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
7795  };
7796  
check_reg_type(struct bpf_verifier_env * env,u32 regno,enum bpf_arg_type arg_type,const u32 * arg_btf_id,struct bpf_call_arg_meta * meta)7797  static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
7798  			  enum bpf_arg_type arg_type,
7799  			  const u32 *arg_btf_id,
7800  			  struct bpf_call_arg_meta *meta)
7801  {
7802  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7803  	enum bpf_reg_type expected, type = reg->type;
7804  	const struct bpf_reg_types *compatible;
7805  	int i, j;
7806  
7807  	compatible = compatible_reg_types[base_type(arg_type)];
7808  	if (!compatible) {
7809  		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
7810  		return -EFAULT;
7811  	}
7812  
7813  	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
7814  	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
7815  	 *
7816  	 * Same for MAYBE_NULL:
7817  	 *
7818  	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
7819  	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
7820  	 *
7821  	 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
7822  	 *
7823  	 * Therefore we fold these flags depending on the arg_type before comparison.
7824  	 */
7825  	if (arg_type & MEM_RDONLY)
7826  		type &= ~MEM_RDONLY;
7827  	if (arg_type & PTR_MAYBE_NULL)
7828  		type &= ~PTR_MAYBE_NULL;
7829  	if (base_type(arg_type) == ARG_PTR_TO_MEM)
7830  		type &= ~DYNPTR_TYPE_FLAG_MASK;
7831  
7832  	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
7833  		type &= ~MEM_ALLOC;
7834  
7835  	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
7836  		expected = compatible->types[i];
7837  		if (expected == NOT_INIT)
7838  			break;
7839  
7840  		if (type == expected)
7841  			goto found;
7842  	}
7843  
7844  	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
7845  	for (j = 0; j + 1 < i; j++)
7846  		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
7847  	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
7848  	return -EACCES;
7849  
7850  found:
7851  	if (base_type(reg->type) != PTR_TO_BTF_ID)
7852  		return 0;
7853  
7854  	if (compatible == &mem_types) {
7855  		if (!(arg_type & MEM_RDONLY)) {
7856  			verbose(env,
7857  				"%s() may write into memory pointed by R%d type=%s\n",
7858  				func_id_name(meta->func_id),
7859  				regno, reg_type_str(env, reg->type));
7860  			return -EACCES;
7861  		}
7862  		return 0;
7863  	}
7864  
7865  	switch ((int)reg->type) {
7866  	case PTR_TO_BTF_ID:
7867  	case PTR_TO_BTF_ID | PTR_TRUSTED:
7868  	case PTR_TO_BTF_ID | MEM_RCU:
7869  	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
7870  	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
7871  	{
7872  		/* For bpf_sk_release, it needs to match against first member
7873  		 * 'struct sock_common', hence make an exception for it. This
7874  		 * allows bpf_sk_release to work for multiple socket types.
7875  		 */
7876  		bool strict_type_match = arg_type_is_release(arg_type) &&
7877  					 meta->func_id != BPF_FUNC_sk_release;
7878  
7879  		if (type_may_be_null(reg->type) &&
7880  		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
7881  			verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
7882  			return -EACCES;
7883  		}
7884  
7885  		if (!arg_btf_id) {
7886  			if (!compatible->btf_id) {
7887  				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
7888  				return -EFAULT;
7889  			}
7890  			arg_btf_id = compatible->btf_id;
7891  		}
7892  
7893  		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7894  			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
7895  				return -EACCES;
7896  		} else {
7897  			if (arg_btf_id == BPF_PTR_POISON) {
7898  				verbose(env, "verifier internal error:");
7899  				verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
7900  					regno);
7901  				return -EACCES;
7902  			}
7903  
7904  			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
7905  						  btf_vmlinux, *arg_btf_id,
7906  						  strict_type_match)) {
7907  				verbose(env, "R%d is of type %s but %s is expected\n",
7908  					regno, btf_type_name(reg->btf, reg->btf_id),
7909  					btf_type_name(btf_vmlinux, *arg_btf_id));
7910  				return -EACCES;
7911  			}
7912  		}
7913  		break;
7914  	}
7915  	case PTR_TO_BTF_ID | MEM_ALLOC:
7916  		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
7917  		    meta->func_id != BPF_FUNC_kptr_xchg) {
7918  			verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
7919  			return -EFAULT;
7920  		}
7921  		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7922  			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
7923  				return -EACCES;
7924  		}
7925  		break;
7926  	case PTR_TO_BTF_ID | MEM_PERCPU:
7927  	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
7928  		/* Handled by helper specific checks */
7929  		break;
7930  	default:
7931  		verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
7932  		return -EFAULT;
7933  	}
7934  	return 0;
7935  }
7936  
7937  static struct btf_field *
reg_find_field_offset(const struct bpf_reg_state * reg,s32 off,u32 fields)7938  reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
7939  {
7940  	struct btf_field *field;
7941  	struct btf_record *rec;
7942  
7943  	rec = reg_btf_record(reg);
7944  	if (!rec)
7945  		return NULL;
7946  
7947  	field = btf_record_find(rec, off, fields);
7948  	if (!field)
7949  		return NULL;
7950  
7951  	return field;
7952  }
7953  
check_func_arg_reg_off(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,enum bpf_arg_type arg_type)7954  int check_func_arg_reg_off(struct bpf_verifier_env *env,
7955  			   const struct bpf_reg_state *reg, int regno,
7956  			   enum bpf_arg_type arg_type)
7957  {
7958  	u32 type = reg->type;
7959  
7960  	/* When referenced register is passed to release function, its fixed
7961  	 * offset must be 0.
7962  	 *
7963  	 * We will check arg_type_is_release reg has ref_obj_id when storing
7964  	 * meta->release_regno.
7965  	 */
7966  	if (arg_type_is_release(arg_type)) {
7967  		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
7968  		 * may not directly point to the object being released, but to
7969  		 * dynptr pointing to such object, which might be at some offset
7970  		 * on the stack. In that case, we simply to fallback to the
7971  		 * default handling.
7972  		 */
7973  		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
7974  			return 0;
7975  
7976  		/* Doing check_ptr_off_reg check for the offset will catch this
7977  		 * because fixed_off_ok is false, but checking here allows us
7978  		 * to give the user a better error message.
7979  		 */
7980  		if (reg->off) {
7981  			verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
7982  				regno);
7983  			return -EINVAL;
7984  		}
7985  		return __check_ptr_off_reg(env, reg, regno, false);
7986  	}
7987  
7988  	switch (type) {
7989  	/* Pointer types where both fixed and variable offset is explicitly allowed: */
7990  	case PTR_TO_STACK:
7991  	case PTR_TO_PACKET:
7992  	case PTR_TO_PACKET_META:
7993  	case PTR_TO_MAP_KEY:
7994  	case PTR_TO_MAP_VALUE:
7995  	case PTR_TO_MEM:
7996  	case PTR_TO_MEM | MEM_RDONLY:
7997  	case PTR_TO_MEM | MEM_RINGBUF:
7998  	case PTR_TO_BUF:
7999  	case PTR_TO_BUF | MEM_RDONLY:
8000  	case SCALAR_VALUE:
8001  		return 0;
8002  	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8003  	 * fixed offset.
8004  	 */
8005  	case PTR_TO_BTF_ID:
8006  	case PTR_TO_BTF_ID | MEM_ALLOC:
8007  	case PTR_TO_BTF_ID | PTR_TRUSTED:
8008  	case PTR_TO_BTF_ID | MEM_RCU:
8009  	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8010  	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8011  		/* When referenced PTR_TO_BTF_ID is passed to release function,
8012  		 * its fixed offset must be 0. In the other cases, fixed offset
8013  		 * can be non-zero. This was already checked above. So pass
8014  		 * fixed_off_ok as true to allow fixed offset for all other
8015  		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8016  		 * still need to do checks instead of returning.
8017  		 */
8018  		return __check_ptr_off_reg(env, reg, regno, true);
8019  	default:
8020  		return __check_ptr_off_reg(env, reg, regno, false);
8021  	}
8022  }
8023  
get_dynptr_arg_reg(struct bpf_verifier_env * env,const struct bpf_func_proto * fn,struct bpf_reg_state * regs)8024  static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
8025  						const struct bpf_func_proto *fn,
8026  						struct bpf_reg_state *regs)
8027  {
8028  	struct bpf_reg_state *state = NULL;
8029  	int i;
8030  
8031  	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
8032  		if (arg_type_is_dynptr(fn->arg_type[i])) {
8033  			if (state) {
8034  				verbose(env, "verifier internal error: multiple dynptr args\n");
8035  				return NULL;
8036  			}
8037  			state = &regs[BPF_REG_1 + i];
8038  		}
8039  
8040  	if (!state)
8041  		verbose(env, "verifier internal error: no dynptr arg found\n");
8042  
8043  	return state;
8044  }
8045  
dynptr_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)8046  static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8047  {
8048  	struct bpf_func_state *state = func(env, reg);
8049  	int spi;
8050  
8051  	if (reg->type == CONST_PTR_TO_DYNPTR)
8052  		return reg->id;
8053  	spi = dynptr_get_spi(env, reg);
8054  	if (spi < 0)
8055  		return spi;
8056  	return state->stack[spi].spilled_ptr.id;
8057  }
8058  
dynptr_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)8059  static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8060  {
8061  	struct bpf_func_state *state = func(env, reg);
8062  	int spi;
8063  
8064  	if (reg->type == CONST_PTR_TO_DYNPTR)
8065  		return reg->ref_obj_id;
8066  	spi = dynptr_get_spi(env, reg);
8067  	if (spi < 0)
8068  		return spi;
8069  	return state->stack[spi].spilled_ptr.ref_obj_id;
8070  }
8071  
dynptr_get_type(struct bpf_verifier_env * env,struct bpf_reg_state * reg)8072  static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
8073  					    struct bpf_reg_state *reg)
8074  {
8075  	struct bpf_func_state *state = func(env, reg);
8076  	int spi;
8077  
8078  	if (reg->type == CONST_PTR_TO_DYNPTR)
8079  		return reg->dynptr.type;
8080  
8081  	spi = __get_spi(reg->off);
8082  	if (spi < 0) {
8083  		verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
8084  		return BPF_DYNPTR_TYPE_INVALID;
8085  	}
8086  
8087  	return state->stack[spi].spilled_ptr.dynptr.type;
8088  }
8089  
check_func_arg(struct bpf_verifier_env * env,u32 arg,struct bpf_call_arg_meta * meta,const struct bpf_func_proto * fn,int insn_idx)8090  static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8091  			  struct bpf_call_arg_meta *meta,
8092  			  const struct bpf_func_proto *fn,
8093  			  int insn_idx)
8094  {
8095  	u32 regno = BPF_REG_1 + arg;
8096  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8097  	enum bpf_arg_type arg_type = fn->arg_type[arg];
8098  	enum bpf_reg_type type = reg->type;
8099  	u32 *arg_btf_id = NULL;
8100  	int err = 0;
8101  
8102  	if (arg_type == ARG_DONTCARE)
8103  		return 0;
8104  
8105  	err = check_reg_arg(env, regno, SRC_OP);
8106  	if (err)
8107  		return err;
8108  
8109  	if (arg_type == ARG_ANYTHING) {
8110  		if (is_pointer_value(env, regno)) {
8111  			verbose(env, "R%d leaks addr into helper function\n",
8112  				regno);
8113  			return -EACCES;
8114  		}
8115  		return 0;
8116  	}
8117  
8118  	if (type_is_pkt_pointer(type) &&
8119  	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8120  		verbose(env, "helper access to the packet is not allowed\n");
8121  		return -EACCES;
8122  	}
8123  
8124  	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8125  		err = resolve_map_arg_type(env, meta, &arg_type);
8126  		if (err)
8127  			return err;
8128  	}
8129  
8130  	if (register_is_null(reg) && type_may_be_null(arg_type))
8131  		/* A NULL register has a SCALAR_VALUE type, so skip
8132  		 * type checking.
8133  		 */
8134  		goto skip_type_check;
8135  
8136  	/* arg_btf_id and arg_size are in a union. */
8137  	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8138  	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8139  		arg_btf_id = fn->arg_btf_id[arg];
8140  
8141  	err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
8142  	if (err)
8143  		return err;
8144  
8145  	err = check_func_arg_reg_off(env, reg, regno, arg_type);
8146  	if (err)
8147  		return err;
8148  
8149  skip_type_check:
8150  	if (arg_type_is_release(arg_type)) {
8151  		if (arg_type_is_dynptr(arg_type)) {
8152  			struct bpf_func_state *state = func(env, reg);
8153  			int spi;
8154  
8155  			/* Only dynptr created on stack can be released, thus
8156  			 * the get_spi and stack state checks for spilled_ptr
8157  			 * should only be done before process_dynptr_func for
8158  			 * PTR_TO_STACK.
8159  			 */
8160  			if (reg->type == PTR_TO_STACK) {
8161  				spi = dynptr_get_spi(env, reg);
8162  				if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
8163  					verbose(env, "arg %d is an unacquired reference\n", regno);
8164  					return -EINVAL;
8165  				}
8166  			} else {
8167  				verbose(env, "cannot release unowned const bpf_dynptr\n");
8168  				return -EINVAL;
8169  			}
8170  		} else if (!reg->ref_obj_id && !register_is_null(reg)) {
8171  			verbose(env, "R%d must be referenced when passed to release function\n",
8172  				regno);
8173  			return -EINVAL;
8174  		}
8175  		if (meta->release_regno) {
8176  			verbose(env, "verifier internal error: more than one release argument\n");
8177  			return -EFAULT;
8178  		}
8179  		meta->release_regno = regno;
8180  	}
8181  
8182  	if (reg->ref_obj_id) {
8183  		if (meta->ref_obj_id) {
8184  			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
8185  				regno, reg->ref_obj_id,
8186  				meta->ref_obj_id);
8187  			return -EFAULT;
8188  		}
8189  		meta->ref_obj_id = reg->ref_obj_id;
8190  	}
8191  
8192  	switch (base_type(arg_type)) {
8193  	case ARG_CONST_MAP_PTR:
8194  		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8195  		if (meta->map_ptr) {
8196  			/* Use map_uid (which is unique id of inner map) to reject:
8197  			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8198  			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8199  			 * if (inner_map1 && inner_map2) {
8200  			 *     timer = bpf_map_lookup_elem(inner_map1);
8201  			 *     if (timer)
8202  			 *         // mismatch would have been allowed
8203  			 *         bpf_timer_init(timer, inner_map2);
8204  			 * }
8205  			 *
8206  			 * Comparing map_ptr is enough to distinguish normal and outer maps.
8207  			 */
8208  			if (meta->map_ptr != reg->map_ptr ||
8209  			    meta->map_uid != reg->map_uid) {
8210  				verbose(env,
8211  					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8212  					meta->map_uid, reg->map_uid);
8213  				return -EINVAL;
8214  			}
8215  		}
8216  		meta->map_ptr = reg->map_ptr;
8217  		meta->map_uid = reg->map_uid;
8218  		break;
8219  	case ARG_PTR_TO_MAP_KEY:
8220  		/* bpf_map_xxx(..., map_ptr, ..., key) call:
8221  		 * check that [key, key + map->key_size) are within
8222  		 * stack limits and initialized
8223  		 */
8224  		if (!meta->map_ptr) {
8225  			/* in function declaration map_ptr must come before
8226  			 * map_key, so that it's verified and known before
8227  			 * we have to check map_key here. Otherwise it means
8228  			 * that kernel subsystem misconfigured verifier
8229  			 */
8230  			verbose(env, "invalid map_ptr to access map->key\n");
8231  			return -EACCES;
8232  		}
8233  		err = check_helper_mem_access(env, regno,
8234  					      meta->map_ptr->key_size, false,
8235  					      NULL);
8236  		break;
8237  	case ARG_PTR_TO_MAP_VALUE:
8238  		if (type_may_be_null(arg_type) && register_is_null(reg))
8239  			return 0;
8240  
8241  		/* bpf_map_xxx(..., map_ptr, ..., value) call:
8242  		 * check [value, value + map->value_size) validity
8243  		 */
8244  		if (!meta->map_ptr) {
8245  			/* kernel subsystem misconfigured verifier */
8246  			verbose(env, "invalid map_ptr to access map->value\n");
8247  			return -EACCES;
8248  		}
8249  		meta->raw_mode = arg_type & MEM_UNINIT;
8250  		err = check_helper_mem_access(env, regno,
8251  					      meta->map_ptr->value_size, false,
8252  					      meta);
8253  		break;
8254  	case ARG_PTR_TO_PERCPU_BTF_ID:
8255  		if (!reg->btf_id) {
8256  			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8257  			return -EACCES;
8258  		}
8259  		meta->ret_btf = reg->btf;
8260  		meta->ret_btf_id = reg->btf_id;
8261  		break;
8262  	case ARG_PTR_TO_SPIN_LOCK:
8263  		if (in_rbtree_lock_required_cb(env)) {
8264  			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8265  			return -EACCES;
8266  		}
8267  		if (meta->func_id == BPF_FUNC_spin_lock) {
8268  			err = process_spin_lock(env, regno, true);
8269  			if (err)
8270  				return err;
8271  		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8272  			err = process_spin_lock(env, regno, false);
8273  			if (err)
8274  				return err;
8275  		} else {
8276  			verbose(env, "verifier internal error\n");
8277  			return -EFAULT;
8278  		}
8279  		break;
8280  	case ARG_PTR_TO_TIMER:
8281  		err = process_timer_func(env, regno, meta);
8282  		if (err)
8283  			return err;
8284  		break;
8285  	case ARG_PTR_TO_FUNC:
8286  		meta->subprogno = reg->subprogno;
8287  		break;
8288  	case ARG_PTR_TO_MEM:
8289  		/* The access to this pointer is only checked when we hit the
8290  		 * next is_mem_size argument below.
8291  		 */
8292  		meta->raw_mode = arg_type & MEM_UNINIT;
8293  		if (arg_type & MEM_FIXED_SIZE) {
8294  			err = check_helper_mem_access(env, regno,
8295  						      fn->arg_size[arg], false,
8296  						      meta);
8297  		}
8298  		break;
8299  	case ARG_CONST_SIZE:
8300  		err = check_mem_size_reg(env, reg, regno, false, meta);
8301  		break;
8302  	case ARG_CONST_SIZE_OR_ZERO:
8303  		err = check_mem_size_reg(env, reg, regno, true, meta);
8304  		break;
8305  	case ARG_PTR_TO_DYNPTR:
8306  		err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
8307  		if (err)
8308  			return err;
8309  		break;
8310  	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8311  		if (!tnum_is_const(reg->var_off)) {
8312  			verbose(env, "R%d is not a known constant'\n",
8313  				regno);
8314  			return -EACCES;
8315  		}
8316  		meta->mem_size = reg->var_off.value;
8317  		err = mark_chain_precision(env, regno);
8318  		if (err)
8319  			return err;
8320  		break;
8321  	case ARG_PTR_TO_INT:
8322  	case ARG_PTR_TO_LONG:
8323  	{
8324  		int size = int_ptr_type_to_size(arg_type);
8325  
8326  		err = check_helper_mem_access(env, regno, size, false, meta);
8327  		if (err)
8328  			return err;
8329  		err = check_ptr_alignment(env, reg, 0, size, true);
8330  		break;
8331  	}
8332  	case ARG_PTR_TO_CONST_STR:
8333  	{
8334  		struct bpf_map *map = reg->map_ptr;
8335  		int map_off;
8336  		u64 map_addr;
8337  		char *str_ptr;
8338  
8339  		if (!bpf_map_is_rdonly(map)) {
8340  			verbose(env, "R%d does not point to a readonly map'\n", regno);
8341  			return -EACCES;
8342  		}
8343  
8344  		if (!tnum_is_const(reg->var_off)) {
8345  			verbose(env, "R%d is not a constant address'\n", regno);
8346  			return -EACCES;
8347  		}
8348  
8349  		if (!map->ops->map_direct_value_addr) {
8350  			verbose(env, "no direct value access support for this map type\n");
8351  			return -EACCES;
8352  		}
8353  
8354  		err = check_map_access(env, regno, reg->off,
8355  				       map->value_size - reg->off, false,
8356  				       ACCESS_HELPER);
8357  		if (err)
8358  			return err;
8359  
8360  		map_off = reg->off + reg->var_off.value;
8361  		err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8362  		if (err) {
8363  			verbose(env, "direct value access on string failed\n");
8364  			return err;
8365  		}
8366  
8367  		str_ptr = (char *)(long)(map_addr);
8368  		if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8369  			verbose(env, "string is not zero-terminated\n");
8370  			return -EINVAL;
8371  		}
8372  		break;
8373  	}
8374  	case ARG_PTR_TO_KPTR:
8375  		err = process_kptr_func(env, regno, meta);
8376  		if (err)
8377  			return err;
8378  		break;
8379  	}
8380  
8381  	return err;
8382  }
8383  
may_update_sockmap(struct bpf_verifier_env * env,int func_id)8384  static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8385  {
8386  	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8387  	enum bpf_prog_type type = resolve_prog_type(env->prog);
8388  
8389  	if (func_id != BPF_FUNC_map_update_elem)
8390  		return false;
8391  
8392  	/* It's not possible to get access to a locked struct sock in these
8393  	 * contexts, so updating is safe.
8394  	 */
8395  	switch (type) {
8396  	case BPF_PROG_TYPE_TRACING:
8397  		if (eatype == BPF_TRACE_ITER)
8398  			return true;
8399  		break;
8400  	case BPF_PROG_TYPE_SOCKET_FILTER:
8401  	case BPF_PROG_TYPE_SCHED_CLS:
8402  	case BPF_PROG_TYPE_SCHED_ACT:
8403  	case BPF_PROG_TYPE_XDP:
8404  	case BPF_PROG_TYPE_SK_REUSEPORT:
8405  	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8406  	case BPF_PROG_TYPE_SK_LOOKUP:
8407  		return true;
8408  	default:
8409  		break;
8410  	}
8411  
8412  	verbose(env, "cannot update sockmap in this context\n");
8413  	return false;
8414  }
8415  
allow_tail_call_in_subprogs(struct bpf_verifier_env * env)8416  static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8417  {
8418  	return env->prog->jit_requested &&
8419  	       bpf_jit_supports_subprog_tailcalls();
8420  }
8421  
check_map_func_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,int func_id)8422  static int check_map_func_compatibility(struct bpf_verifier_env *env,
8423  					struct bpf_map *map, int func_id)
8424  {
8425  	if (!map)
8426  		return 0;
8427  
8428  	/* We need a two way check, first is from map perspective ... */
8429  	switch (map->map_type) {
8430  	case BPF_MAP_TYPE_PROG_ARRAY:
8431  		if (func_id != BPF_FUNC_tail_call)
8432  			goto error;
8433  		break;
8434  	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8435  		if (func_id != BPF_FUNC_perf_event_read &&
8436  		    func_id != BPF_FUNC_perf_event_output &&
8437  		    func_id != BPF_FUNC_skb_output &&
8438  		    func_id != BPF_FUNC_perf_event_read_value &&
8439  		    func_id != BPF_FUNC_xdp_output)
8440  			goto error;
8441  		break;
8442  	case BPF_MAP_TYPE_RINGBUF:
8443  		if (func_id != BPF_FUNC_ringbuf_output &&
8444  		    func_id != BPF_FUNC_ringbuf_reserve &&
8445  		    func_id != BPF_FUNC_ringbuf_query &&
8446  		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8447  		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8448  		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
8449  			goto error;
8450  		break;
8451  	case BPF_MAP_TYPE_USER_RINGBUF:
8452  		if (func_id != BPF_FUNC_user_ringbuf_drain)
8453  			goto error;
8454  		break;
8455  	case BPF_MAP_TYPE_STACK_TRACE:
8456  		if (func_id != BPF_FUNC_get_stackid)
8457  			goto error;
8458  		break;
8459  	case BPF_MAP_TYPE_CGROUP_ARRAY:
8460  		if (func_id != BPF_FUNC_skb_under_cgroup &&
8461  		    func_id != BPF_FUNC_current_task_under_cgroup)
8462  			goto error;
8463  		break;
8464  	case BPF_MAP_TYPE_CGROUP_STORAGE:
8465  	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8466  		if (func_id != BPF_FUNC_get_local_storage)
8467  			goto error;
8468  		break;
8469  	case BPF_MAP_TYPE_DEVMAP:
8470  	case BPF_MAP_TYPE_DEVMAP_HASH:
8471  		if (func_id != BPF_FUNC_redirect_map &&
8472  		    func_id != BPF_FUNC_map_lookup_elem)
8473  			goto error;
8474  		break;
8475  	/* Restrict bpf side of cpumap and xskmap, open when use-cases
8476  	 * appear.
8477  	 */
8478  	case BPF_MAP_TYPE_CPUMAP:
8479  		if (func_id != BPF_FUNC_redirect_map)
8480  			goto error;
8481  		break;
8482  	case BPF_MAP_TYPE_XSKMAP:
8483  		if (func_id != BPF_FUNC_redirect_map &&
8484  		    func_id != BPF_FUNC_map_lookup_elem)
8485  			goto error;
8486  		break;
8487  	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8488  	case BPF_MAP_TYPE_HASH_OF_MAPS:
8489  		if (func_id != BPF_FUNC_map_lookup_elem)
8490  			goto error;
8491  		break;
8492  	case BPF_MAP_TYPE_SOCKMAP:
8493  		if (func_id != BPF_FUNC_sk_redirect_map &&
8494  		    func_id != BPF_FUNC_sock_map_update &&
8495  		    func_id != BPF_FUNC_map_delete_elem &&
8496  		    func_id != BPF_FUNC_msg_redirect_map &&
8497  		    func_id != BPF_FUNC_sk_select_reuseport &&
8498  		    func_id != BPF_FUNC_map_lookup_elem &&
8499  		    !may_update_sockmap(env, func_id))
8500  			goto error;
8501  		break;
8502  	case BPF_MAP_TYPE_SOCKHASH:
8503  		if (func_id != BPF_FUNC_sk_redirect_hash &&
8504  		    func_id != BPF_FUNC_sock_hash_update &&
8505  		    func_id != BPF_FUNC_map_delete_elem &&
8506  		    func_id != BPF_FUNC_msg_redirect_hash &&
8507  		    func_id != BPF_FUNC_sk_select_reuseport &&
8508  		    func_id != BPF_FUNC_map_lookup_elem &&
8509  		    !may_update_sockmap(env, func_id))
8510  			goto error;
8511  		break;
8512  	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8513  		if (func_id != BPF_FUNC_sk_select_reuseport)
8514  			goto error;
8515  		break;
8516  	case BPF_MAP_TYPE_QUEUE:
8517  	case BPF_MAP_TYPE_STACK:
8518  		if (func_id != BPF_FUNC_map_peek_elem &&
8519  		    func_id != BPF_FUNC_map_pop_elem &&
8520  		    func_id != BPF_FUNC_map_push_elem)
8521  			goto error;
8522  		break;
8523  	case BPF_MAP_TYPE_SK_STORAGE:
8524  		if (func_id != BPF_FUNC_sk_storage_get &&
8525  		    func_id != BPF_FUNC_sk_storage_delete &&
8526  		    func_id != BPF_FUNC_kptr_xchg)
8527  			goto error;
8528  		break;
8529  	case BPF_MAP_TYPE_INODE_STORAGE:
8530  		if (func_id != BPF_FUNC_inode_storage_get &&
8531  		    func_id != BPF_FUNC_inode_storage_delete &&
8532  		    func_id != BPF_FUNC_kptr_xchg)
8533  			goto error;
8534  		break;
8535  	case BPF_MAP_TYPE_TASK_STORAGE:
8536  		if (func_id != BPF_FUNC_task_storage_get &&
8537  		    func_id != BPF_FUNC_task_storage_delete &&
8538  		    func_id != BPF_FUNC_kptr_xchg)
8539  			goto error;
8540  		break;
8541  	case BPF_MAP_TYPE_CGRP_STORAGE:
8542  		if (func_id != BPF_FUNC_cgrp_storage_get &&
8543  		    func_id != BPF_FUNC_cgrp_storage_delete &&
8544  		    func_id != BPF_FUNC_kptr_xchg)
8545  			goto error;
8546  		break;
8547  	case BPF_MAP_TYPE_BLOOM_FILTER:
8548  		if (func_id != BPF_FUNC_map_peek_elem &&
8549  		    func_id != BPF_FUNC_map_push_elem)
8550  			goto error;
8551  		break;
8552  	default:
8553  		break;
8554  	}
8555  
8556  	/* ... and second from the function itself. */
8557  	switch (func_id) {
8558  	case BPF_FUNC_tail_call:
8559  		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
8560  			goto error;
8561  		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
8562  			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
8563  			return -EINVAL;
8564  		}
8565  		break;
8566  	case BPF_FUNC_perf_event_read:
8567  	case BPF_FUNC_perf_event_output:
8568  	case BPF_FUNC_perf_event_read_value:
8569  	case BPF_FUNC_skb_output:
8570  	case BPF_FUNC_xdp_output:
8571  		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
8572  			goto error;
8573  		break;
8574  	case BPF_FUNC_ringbuf_output:
8575  	case BPF_FUNC_ringbuf_reserve:
8576  	case BPF_FUNC_ringbuf_query:
8577  	case BPF_FUNC_ringbuf_reserve_dynptr:
8578  	case BPF_FUNC_ringbuf_submit_dynptr:
8579  	case BPF_FUNC_ringbuf_discard_dynptr:
8580  		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
8581  			goto error;
8582  		break;
8583  	case BPF_FUNC_user_ringbuf_drain:
8584  		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
8585  			goto error;
8586  		break;
8587  	case BPF_FUNC_get_stackid:
8588  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
8589  			goto error;
8590  		break;
8591  	case BPF_FUNC_current_task_under_cgroup:
8592  	case BPF_FUNC_skb_under_cgroup:
8593  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
8594  			goto error;
8595  		break;
8596  	case BPF_FUNC_redirect_map:
8597  		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
8598  		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
8599  		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
8600  		    map->map_type != BPF_MAP_TYPE_XSKMAP)
8601  			goto error;
8602  		break;
8603  	case BPF_FUNC_sk_redirect_map:
8604  	case BPF_FUNC_msg_redirect_map:
8605  	case BPF_FUNC_sock_map_update:
8606  		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
8607  			goto error;
8608  		break;
8609  	case BPF_FUNC_sk_redirect_hash:
8610  	case BPF_FUNC_msg_redirect_hash:
8611  	case BPF_FUNC_sock_hash_update:
8612  		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
8613  			goto error;
8614  		break;
8615  	case BPF_FUNC_get_local_storage:
8616  		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8617  		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
8618  			goto error;
8619  		break;
8620  	case BPF_FUNC_sk_select_reuseport:
8621  		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
8622  		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
8623  		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
8624  			goto error;
8625  		break;
8626  	case BPF_FUNC_map_pop_elem:
8627  		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8628  		    map->map_type != BPF_MAP_TYPE_STACK)
8629  			goto error;
8630  		break;
8631  	case BPF_FUNC_map_peek_elem:
8632  	case BPF_FUNC_map_push_elem:
8633  		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8634  		    map->map_type != BPF_MAP_TYPE_STACK &&
8635  		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
8636  			goto error;
8637  		break;
8638  	case BPF_FUNC_map_lookup_percpu_elem:
8639  		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
8640  		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
8641  		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
8642  			goto error;
8643  		break;
8644  	case BPF_FUNC_sk_storage_get:
8645  	case BPF_FUNC_sk_storage_delete:
8646  		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
8647  			goto error;
8648  		break;
8649  	case BPF_FUNC_inode_storage_get:
8650  	case BPF_FUNC_inode_storage_delete:
8651  		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
8652  			goto error;
8653  		break;
8654  	case BPF_FUNC_task_storage_get:
8655  	case BPF_FUNC_task_storage_delete:
8656  		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
8657  			goto error;
8658  		break;
8659  	case BPF_FUNC_cgrp_storage_get:
8660  	case BPF_FUNC_cgrp_storage_delete:
8661  		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
8662  			goto error;
8663  		break;
8664  	default:
8665  		break;
8666  	}
8667  
8668  	return 0;
8669  error:
8670  	verbose(env, "cannot pass map_type %d into func %s#%d\n",
8671  		map->map_type, func_id_name(func_id), func_id);
8672  	return -EINVAL;
8673  }
8674  
check_raw_mode_ok(const struct bpf_func_proto * fn)8675  static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
8676  {
8677  	int count = 0;
8678  
8679  	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
8680  		count++;
8681  	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
8682  		count++;
8683  	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
8684  		count++;
8685  	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
8686  		count++;
8687  	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
8688  		count++;
8689  
8690  	/* We only support one arg being in raw mode at the moment,
8691  	 * which is sufficient for the helper functions we have
8692  	 * right now.
8693  	 */
8694  	return count <= 1;
8695  }
8696  
check_args_pair_invalid(const struct bpf_func_proto * fn,int arg)8697  static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
8698  {
8699  	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
8700  	bool has_size = fn->arg_size[arg] != 0;
8701  	bool is_next_size = false;
8702  
8703  	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
8704  		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
8705  
8706  	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
8707  		return is_next_size;
8708  
8709  	return has_size == is_next_size || is_next_size == is_fixed;
8710  }
8711  
check_arg_pair_ok(const struct bpf_func_proto * fn)8712  static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
8713  {
8714  	/* bpf_xxx(..., buf, len) call will access 'len'
8715  	 * bytes from memory 'buf'. Both arg types need
8716  	 * to be paired, so make sure there's no buggy
8717  	 * helper function specification.
8718  	 */
8719  	if (arg_type_is_mem_size(fn->arg1_type) ||
8720  	    check_args_pair_invalid(fn, 0) ||
8721  	    check_args_pair_invalid(fn, 1) ||
8722  	    check_args_pair_invalid(fn, 2) ||
8723  	    check_args_pair_invalid(fn, 3) ||
8724  	    check_args_pair_invalid(fn, 4))
8725  		return false;
8726  
8727  	return true;
8728  }
8729  
check_btf_id_ok(const struct bpf_func_proto * fn)8730  static bool check_btf_id_ok(const struct bpf_func_proto *fn)
8731  {
8732  	int i;
8733  
8734  	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8735  		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
8736  			return !!fn->arg_btf_id[i];
8737  		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
8738  			return fn->arg_btf_id[i] == BPF_PTR_POISON;
8739  		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
8740  		    /* arg_btf_id and arg_size are in a union. */
8741  		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
8742  		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
8743  			return false;
8744  	}
8745  
8746  	return true;
8747  }
8748  
check_func_proto(const struct bpf_func_proto * fn,int func_id)8749  static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
8750  {
8751  	return check_raw_mode_ok(fn) &&
8752  	       check_arg_pair_ok(fn) &&
8753  	       check_btf_id_ok(fn) ? 0 : -EINVAL;
8754  }
8755  
8756  /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
8757   * are now invalid, so turn them into unknown SCALAR_VALUE.
8758   *
8759   * This also applies to dynptr slices belonging to skb and xdp dynptrs,
8760   * since these slices point to packet data.
8761   */
clear_all_pkt_pointers(struct bpf_verifier_env * env)8762  static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
8763  {
8764  	struct bpf_func_state *state;
8765  	struct bpf_reg_state *reg;
8766  
8767  	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8768  		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
8769  			mark_reg_invalid(env, reg);
8770  	}));
8771  }
8772  
8773  enum {
8774  	AT_PKT_END = -1,
8775  	BEYOND_PKT_END = -2,
8776  };
8777  
mark_pkt_end(struct bpf_verifier_state * vstate,int regn,bool range_open)8778  static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
8779  {
8780  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8781  	struct bpf_reg_state *reg = &state->regs[regn];
8782  
8783  	if (reg->type != PTR_TO_PACKET)
8784  		/* PTR_TO_PACKET_META is not supported yet */
8785  		return;
8786  
8787  	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
8788  	 * How far beyond pkt_end it goes is unknown.
8789  	 * if (!range_open) it's the case of pkt >= pkt_end
8790  	 * if (range_open) it's the case of pkt > pkt_end
8791  	 * hence this pointer is at least 1 byte bigger than pkt_end
8792  	 */
8793  	if (range_open)
8794  		reg->range = BEYOND_PKT_END;
8795  	else
8796  		reg->range = AT_PKT_END;
8797  }
8798  
8799  /* The pointer with the specified id has released its reference to kernel
8800   * resources. Identify all copies of the same pointer and clear the reference.
8801   */
release_reference(struct bpf_verifier_env * env,int ref_obj_id)8802  static int release_reference(struct bpf_verifier_env *env,
8803  			     int ref_obj_id)
8804  {
8805  	struct bpf_func_state *state;
8806  	struct bpf_reg_state *reg;
8807  	int err;
8808  
8809  	err = release_reference_state(cur_func(env), ref_obj_id);
8810  	if (err)
8811  		return err;
8812  
8813  	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8814  		if (reg->ref_obj_id == ref_obj_id)
8815  			mark_reg_invalid(env, reg);
8816  	}));
8817  
8818  	return 0;
8819  }
8820  
invalidate_non_owning_refs(struct bpf_verifier_env * env)8821  static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
8822  {
8823  	struct bpf_func_state *unused;
8824  	struct bpf_reg_state *reg;
8825  
8826  	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
8827  		if (type_is_non_owning_ref(reg->type))
8828  			mark_reg_invalid(env, reg);
8829  	}));
8830  }
8831  
clear_caller_saved_regs(struct bpf_verifier_env * env,struct bpf_reg_state * regs)8832  static void clear_caller_saved_regs(struct bpf_verifier_env *env,
8833  				    struct bpf_reg_state *regs)
8834  {
8835  	int i;
8836  
8837  	/* after the call registers r0 - r5 were scratched */
8838  	for (i = 0; i < CALLER_SAVED_REGS; i++) {
8839  		mark_reg_not_init(env, regs, caller_saved[i]);
8840  		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8841  	}
8842  }
8843  
8844  typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
8845  				   struct bpf_func_state *caller,
8846  				   struct bpf_func_state *callee,
8847  				   int insn_idx);
8848  
8849  static int set_callee_state(struct bpf_verifier_env *env,
8850  			    struct bpf_func_state *caller,
8851  			    struct bpf_func_state *callee, int insn_idx);
8852  
__check_func_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx,int subprog,set_callee_state_fn set_callee_state_cb)8853  static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
8854  			     int *insn_idx, int subprog,
8855  			     set_callee_state_fn set_callee_state_cb)
8856  {
8857  	struct bpf_verifier_state *state = env->cur_state;
8858  	struct bpf_func_state *caller, *callee;
8859  	int err;
8860  
8861  	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
8862  		verbose(env, "the call stack of %d frames is too deep\n",
8863  			state->curframe + 2);
8864  		return -E2BIG;
8865  	}
8866  
8867  	caller = state->frame[state->curframe];
8868  	if (state->frame[state->curframe + 1]) {
8869  		verbose(env, "verifier bug. Frame %d already allocated\n",
8870  			state->curframe + 1);
8871  		return -EFAULT;
8872  	}
8873  
8874  	err = btf_check_subprog_call(env, subprog, caller->regs);
8875  	if (err == -EFAULT)
8876  		return err;
8877  	if (subprog_is_global(env, subprog)) {
8878  		if (err) {
8879  			verbose(env, "Caller passes invalid args into func#%d\n",
8880  				subprog);
8881  			return err;
8882  		} else {
8883  			if (env->log.level & BPF_LOG_LEVEL)
8884  				verbose(env,
8885  					"Func#%d is global and valid. Skipping.\n",
8886  					subprog);
8887  			clear_caller_saved_regs(env, caller->regs);
8888  
8889  			/* All global functions return a 64-bit SCALAR_VALUE */
8890  			mark_reg_unknown(env, caller->regs, BPF_REG_0);
8891  			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
8892  
8893  			/* continue with next insn after call */
8894  			return 0;
8895  		}
8896  	}
8897  
8898  	/* set_callee_state is used for direct subprog calls, but we are
8899  	 * interested in validating only BPF helpers that can call subprogs as
8900  	 * callbacks
8901  	 */
8902  	if (set_callee_state_cb != set_callee_state) {
8903  		if (bpf_pseudo_kfunc_call(insn) &&
8904  		    !is_callback_calling_kfunc(insn->imm)) {
8905  			verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
8906  				func_id_name(insn->imm), insn->imm);
8907  			return -EFAULT;
8908  		} else if (!bpf_pseudo_kfunc_call(insn) &&
8909  			   !is_callback_calling_function(insn->imm)) { /* helper */
8910  			verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
8911  				func_id_name(insn->imm), insn->imm);
8912  			return -EFAULT;
8913  		}
8914  	}
8915  
8916  	if (insn->code == (BPF_JMP | BPF_CALL) &&
8917  	    insn->src_reg == 0 &&
8918  	    insn->imm == BPF_FUNC_timer_set_callback) {
8919  		struct bpf_verifier_state *async_cb;
8920  
8921  		/* there is no real recursion here. timer callbacks are async */
8922  		env->subprog_info[subprog].is_async_cb = true;
8923  		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
8924  					 *insn_idx, subprog);
8925  		if (!async_cb)
8926  			return -EFAULT;
8927  		callee = async_cb->frame[0];
8928  		callee->async_entry_cnt = caller->async_entry_cnt + 1;
8929  
8930  		/* Convert bpf_timer_set_callback() args into timer callback args */
8931  		err = set_callee_state_cb(env, caller, callee, *insn_idx);
8932  		if (err)
8933  			return err;
8934  
8935  		clear_caller_saved_regs(env, caller->regs);
8936  		mark_reg_unknown(env, caller->regs, BPF_REG_0);
8937  		caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
8938  		/* continue with next insn after call */
8939  		return 0;
8940  	}
8941  
8942  	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
8943  	if (!callee)
8944  		return -ENOMEM;
8945  	state->frame[state->curframe + 1] = callee;
8946  
8947  	/* callee cannot access r0, r6 - r9 for reading and has to write
8948  	 * into its own stack before reading from it.
8949  	 * callee can read/write into caller's stack
8950  	 */
8951  	init_func_state(env, callee,
8952  			/* remember the callsite, it will be used by bpf_exit */
8953  			*insn_idx /* callsite */,
8954  			state->curframe + 1 /* frameno within this callchain */,
8955  			subprog /* subprog number within this prog */);
8956  
8957  	/* Transfer references to the callee */
8958  	err = copy_reference_state(callee, caller);
8959  	if (err)
8960  		goto err_out;
8961  
8962  	err = set_callee_state_cb(env, caller, callee, *insn_idx);
8963  	if (err)
8964  		goto err_out;
8965  
8966  	clear_caller_saved_regs(env, caller->regs);
8967  
8968  	/* only increment it after check_reg_arg() finished */
8969  	state->curframe++;
8970  
8971  	/* and go analyze first insn of the callee */
8972  	*insn_idx = env->subprog_info[subprog].start - 1;
8973  
8974  	if (env->log.level & BPF_LOG_LEVEL) {
8975  		verbose(env, "caller:\n");
8976  		print_verifier_state(env, caller, true);
8977  		verbose(env, "callee:\n");
8978  		print_verifier_state(env, callee, true);
8979  	}
8980  	return 0;
8981  
8982  err_out:
8983  	free_func_state(callee);
8984  	state->frame[state->curframe + 1] = NULL;
8985  	return err;
8986  }
8987  
map_set_for_each_callback_args(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee)8988  int map_set_for_each_callback_args(struct bpf_verifier_env *env,
8989  				   struct bpf_func_state *caller,
8990  				   struct bpf_func_state *callee)
8991  {
8992  	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
8993  	 *      void *callback_ctx, u64 flags);
8994  	 * callback_fn(struct bpf_map *map, void *key, void *value,
8995  	 *      void *callback_ctx);
8996  	 */
8997  	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
8998  
8999  	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9000  	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9001  	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9002  
9003  	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9004  	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9005  	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9006  
9007  	/* pointer to stack or null */
9008  	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9009  
9010  	/* unused */
9011  	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9012  	return 0;
9013  }
9014  
set_callee_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9015  static int set_callee_state(struct bpf_verifier_env *env,
9016  			    struct bpf_func_state *caller,
9017  			    struct bpf_func_state *callee, int insn_idx)
9018  {
9019  	int i;
9020  
9021  	/* copy r1 - r5 args that callee can access.  The copy includes parent
9022  	 * pointers, which connects us up to the liveness chain
9023  	 */
9024  	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9025  		callee->regs[i] = caller->regs[i];
9026  	return 0;
9027  }
9028  
check_func_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)9029  static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9030  			   int *insn_idx)
9031  {
9032  	int subprog, target_insn;
9033  
9034  	target_insn = *insn_idx + insn->imm + 1;
9035  	subprog = find_subprog(env, target_insn);
9036  	if (subprog < 0) {
9037  		verbose(env, "verifier bug. No program starts at insn %d\n",
9038  			target_insn);
9039  		return -EFAULT;
9040  	}
9041  
9042  	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
9043  }
9044  
set_map_elem_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9045  static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9046  				       struct bpf_func_state *caller,
9047  				       struct bpf_func_state *callee,
9048  				       int insn_idx)
9049  {
9050  	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9051  	struct bpf_map *map;
9052  	int err;
9053  
9054  	if (bpf_map_ptr_poisoned(insn_aux)) {
9055  		verbose(env, "tail_call abusing map_ptr\n");
9056  		return -EINVAL;
9057  	}
9058  
9059  	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
9060  	if (!map->ops->map_set_for_each_callback_args ||
9061  	    !map->ops->map_for_each_callback) {
9062  		verbose(env, "callback function not allowed for map\n");
9063  		return -ENOTSUPP;
9064  	}
9065  
9066  	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9067  	if (err)
9068  		return err;
9069  
9070  	callee->in_callback_fn = true;
9071  	callee->callback_ret_range = tnum_range(0, 1);
9072  	return 0;
9073  }
9074  
set_loop_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9075  static int set_loop_callback_state(struct bpf_verifier_env *env,
9076  				   struct bpf_func_state *caller,
9077  				   struct bpf_func_state *callee,
9078  				   int insn_idx)
9079  {
9080  	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9081  	 *	    u64 flags);
9082  	 * callback_fn(u32 index, void *callback_ctx);
9083  	 */
9084  	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9085  	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9086  
9087  	/* unused */
9088  	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9089  	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9090  	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9091  
9092  	callee->in_callback_fn = true;
9093  	callee->callback_ret_range = tnum_range(0, 1);
9094  	return 0;
9095  }
9096  
set_timer_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9097  static int set_timer_callback_state(struct bpf_verifier_env *env,
9098  				    struct bpf_func_state *caller,
9099  				    struct bpf_func_state *callee,
9100  				    int insn_idx)
9101  {
9102  	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9103  
9104  	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9105  	 * callback_fn(struct bpf_map *map, void *key, void *value);
9106  	 */
9107  	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9108  	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9109  	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9110  
9111  	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9112  	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9113  	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9114  
9115  	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9116  	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9117  	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9118  
9119  	/* unused */
9120  	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9121  	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9122  	callee->in_async_callback_fn = true;
9123  	callee->callback_ret_range = tnum_range(0, 1);
9124  	return 0;
9125  }
9126  
set_find_vma_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9127  static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9128  				       struct bpf_func_state *caller,
9129  				       struct bpf_func_state *callee,
9130  				       int insn_idx)
9131  {
9132  	/* bpf_find_vma(struct task_struct *task, u64 addr,
9133  	 *               void *callback_fn, void *callback_ctx, u64 flags)
9134  	 * (callback_fn)(struct task_struct *task,
9135  	 *               struct vm_area_struct *vma, void *callback_ctx);
9136  	 */
9137  	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9138  
9139  	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9140  	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9141  	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9142  	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
9143  
9144  	/* pointer to stack or null */
9145  	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9146  
9147  	/* unused */
9148  	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9149  	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9150  	callee->in_callback_fn = true;
9151  	callee->callback_ret_range = tnum_range(0, 1);
9152  	return 0;
9153  }
9154  
set_user_ringbuf_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9155  static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9156  					   struct bpf_func_state *caller,
9157  					   struct bpf_func_state *callee,
9158  					   int insn_idx)
9159  {
9160  	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9161  	 *			  callback_ctx, u64 flags);
9162  	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9163  	 */
9164  	__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9165  	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9166  	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9167  
9168  	/* unused */
9169  	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9170  	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9171  	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9172  
9173  	callee->in_callback_fn = true;
9174  	callee->callback_ret_range = tnum_range(0, 1);
9175  	return 0;
9176  }
9177  
set_rbtree_add_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9178  static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9179  					 struct bpf_func_state *caller,
9180  					 struct bpf_func_state *callee,
9181  					 int insn_idx)
9182  {
9183  	/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9184  	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9185  	 *
9186  	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9187  	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9188  	 * by this point, so look at 'root'
9189  	 */
9190  	struct btf_field *field;
9191  
9192  	field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
9193  				      BPF_RB_ROOT);
9194  	if (!field || !field->graph_root.value_btf_id)
9195  		return -EFAULT;
9196  
9197  	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9198  	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9199  	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9200  	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9201  
9202  	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9203  	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9204  	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9205  	callee->in_callback_fn = true;
9206  	callee->callback_ret_range = tnum_range(0, 1);
9207  	return 0;
9208  }
9209  
9210  static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9211  
9212  /* Are we currently verifying the callback for a rbtree helper that must
9213   * be called with lock held? If so, no need to complain about unreleased
9214   * lock
9215   */
in_rbtree_lock_required_cb(struct bpf_verifier_env * env)9216  static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9217  {
9218  	struct bpf_verifier_state *state = env->cur_state;
9219  	struct bpf_insn *insn = env->prog->insnsi;
9220  	struct bpf_func_state *callee;
9221  	int kfunc_btf_id;
9222  
9223  	if (!state->curframe)
9224  		return false;
9225  
9226  	callee = state->frame[state->curframe];
9227  
9228  	if (!callee->in_callback_fn)
9229  		return false;
9230  
9231  	kfunc_btf_id = insn[callee->callsite].imm;
9232  	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9233  }
9234  
prepare_func_exit(struct bpf_verifier_env * env,int * insn_idx)9235  static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9236  {
9237  	struct bpf_verifier_state *state = env->cur_state;
9238  	struct bpf_func_state *caller, *callee;
9239  	struct bpf_reg_state *r0;
9240  	int err;
9241  
9242  	callee = state->frame[state->curframe];
9243  	r0 = &callee->regs[BPF_REG_0];
9244  	if (r0->type == PTR_TO_STACK) {
9245  		/* technically it's ok to return caller's stack pointer
9246  		 * (or caller's caller's pointer) back to the caller,
9247  		 * since these pointers are valid. Only current stack
9248  		 * pointer will be invalid as soon as function exits,
9249  		 * but let's be conservative
9250  		 */
9251  		verbose(env, "cannot return stack pointer to the caller\n");
9252  		return -EINVAL;
9253  	}
9254  
9255  	caller = state->frame[state->curframe - 1];
9256  	if (callee->in_callback_fn) {
9257  		/* enforce R0 return value range [0, 1]. */
9258  		struct tnum range = callee->callback_ret_range;
9259  
9260  		if (r0->type != SCALAR_VALUE) {
9261  			verbose(env, "R0 not a scalar value\n");
9262  			return -EACCES;
9263  		}
9264  		if (!tnum_in(range, r0->var_off)) {
9265  			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
9266  			return -EINVAL;
9267  		}
9268  	} else {
9269  		/* return to the caller whatever r0 had in the callee */
9270  		caller->regs[BPF_REG_0] = *r0;
9271  	}
9272  
9273  	/* callback_fn frame should have released its own additions to parent's
9274  	 * reference state at this point, or check_reference_leak would
9275  	 * complain, hence it must be the same as the caller. There is no need
9276  	 * to copy it back.
9277  	 */
9278  	if (!callee->in_callback_fn) {
9279  		/* Transfer references to the caller */
9280  		err = copy_reference_state(caller, callee);
9281  		if (err)
9282  			return err;
9283  	}
9284  
9285  	*insn_idx = callee->callsite + 1;
9286  	if (env->log.level & BPF_LOG_LEVEL) {
9287  		verbose(env, "returning from callee:\n");
9288  		print_verifier_state(env, callee, true);
9289  		verbose(env, "to caller at %d:\n", *insn_idx);
9290  		print_verifier_state(env, caller, true);
9291  	}
9292  	/* clear everything in the callee */
9293  	free_func_state(callee);
9294  	state->frame[state->curframe--] = NULL;
9295  	return 0;
9296  }
9297  
do_refine_retval_range(struct bpf_reg_state * regs,int ret_type,int func_id,struct bpf_call_arg_meta * meta)9298  static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
9299  				   int func_id,
9300  				   struct bpf_call_arg_meta *meta)
9301  {
9302  	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9303  
9304  	if (ret_type != RET_INTEGER)
9305  		return;
9306  
9307  	switch (func_id) {
9308  	case BPF_FUNC_get_stack:
9309  	case BPF_FUNC_get_task_stack:
9310  	case BPF_FUNC_probe_read_str:
9311  	case BPF_FUNC_probe_read_kernel_str:
9312  	case BPF_FUNC_probe_read_user_str:
9313  		ret_reg->smax_value = meta->msize_max_value;
9314  		ret_reg->s32_max_value = meta->msize_max_value;
9315  		ret_reg->smin_value = -MAX_ERRNO;
9316  		ret_reg->s32_min_value = -MAX_ERRNO;
9317  		reg_bounds_sync(ret_reg);
9318  		break;
9319  	case BPF_FUNC_get_smp_processor_id:
9320  		ret_reg->umax_value = nr_cpu_ids - 1;
9321  		ret_reg->u32_max_value = nr_cpu_ids - 1;
9322  		ret_reg->smax_value = nr_cpu_ids - 1;
9323  		ret_reg->s32_max_value = nr_cpu_ids - 1;
9324  		ret_reg->umin_value = 0;
9325  		ret_reg->u32_min_value = 0;
9326  		ret_reg->smin_value = 0;
9327  		ret_reg->s32_min_value = 0;
9328  		reg_bounds_sync(ret_reg);
9329  		break;
9330  	}
9331  }
9332  
9333  static int
record_func_map(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)9334  record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9335  		int func_id, int insn_idx)
9336  {
9337  	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9338  	struct bpf_map *map = meta->map_ptr;
9339  
9340  	if (func_id != BPF_FUNC_tail_call &&
9341  	    func_id != BPF_FUNC_map_lookup_elem &&
9342  	    func_id != BPF_FUNC_map_update_elem &&
9343  	    func_id != BPF_FUNC_map_delete_elem &&
9344  	    func_id != BPF_FUNC_map_push_elem &&
9345  	    func_id != BPF_FUNC_map_pop_elem &&
9346  	    func_id != BPF_FUNC_map_peek_elem &&
9347  	    func_id != BPF_FUNC_for_each_map_elem &&
9348  	    func_id != BPF_FUNC_redirect_map &&
9349  	    func_id != BPF_FUNC_map_lookup_percpu_elem)
9350  		return 0;
9351  
9352  	if (map == NULL) {
9353  		verbose(env, "kernel subsystem misconfigured verifier\n");
9354  		return -EINVAL;
9355  	}
9356  
9357  	/* In case of read-only, some additional restrictions
9358  	 * need to be applied in order to prevent altering the
9359  	 * state of the map from program side.
9360  	 */
9361  	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
9362  	    (func_id == BPF_FUNC_map_delete_elem ||
9363  	     func_id == BPF_FUNC_map_update_elem ||
9364  	     func_id == BPF_FUNC_map_push_elem ||
9365  	     func_id == BPF_FUNC_map_pop_elem)) {
9366  		verbose(env, "write into map forbidden\n");
9367  		return -EACCES;
9368  	}
9369  
9370  	if (!BPF_MAP_PTR(aux->map_ptr_state))
9371  		bpf_map_ptr_store(aux, meta->map_ptr,
9372  				  !meta->map_ptr->bypass_spec_v1);
9373  	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
9374  		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
9375  				  !meta->map_ptr->bypass_spec_v1);
9376  	return 0;
9377  }
9378  
9379  static int
record_func_key(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)9380  record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9381  		int func_id, int insn_idx)
9382  {
9383  	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9384  	struct bpf_reg_state *regs = cur_regs(env), *reg;
9385  	struct bpf_map *map = meta->map_ptr;
9386  	u64 val, max;
9387  	int err;
9388  
9389  	if (func_id != BPF_FUNC_tail_call)
9390  		return 0;
9391  	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
9392  		verbose(env, "kernel subsystem misconfigured verifier\n");
9393  		return -EINVAL;
9394  	}
9395  
9396  	reg = &regs[BPF_REG_3];
9397  	val = reg->var_off.value;
9398  	max = map->max_entries;
9399  
9400  	if (!(register_is_const(reg) && val < max)) {
9401  		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9402  		return 0;
9403  	}
9404  
9405  	err = mark_chain_precision(env, BPF_REG_3);
9406  	if (err)
9407  		return err;
9408  	if (bpf_map_key_unseen(aux))
9409  		bpf_map_key_store(aux, val);
9410  	else if (!bpf_map_key_poisoned(aux) &&
9411  		  bpf_map_key_immediate(aux) != val)
9412  		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9413  	return 0;
9414  }
9415  
check_reference_leak(struct bpf_verifier_env * env)9416  static int check_reference_leak(struct bpf_verifier_env *env)
9417  {
9418  	struct bpf_func_state *state = cur_func(env);
9419  	bool refs_lingering = false;
9420  	int i;
9421  
9422  	if (state->frameno && !state->in_callback_fn)
9423  		return 0;
9424  
9425  	for (i = 0; i < state->acquired_refs; i++) {
9426  		if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
9427  			continue;
9428  		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
9429  			state->refs[i].id, state->refs[i].insn_idx);
9430  		refs_lingering = true;
9431  	}
9432  	return refs_lingering ? -EINVAL : 0;
9433  }
9434  
check_bpf_snprintf_call(struct bpf_verifier_env * env,struct bpf_reg_state * regs)9435  static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
9436  				   struct bpf_reg_state *regs)
9437  {
9438  	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
9439  	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
9440  	struct bpf_map *fmt_map = fmt_reg->map_ptr;
9441  	struct bpf_bprintf_data data = {};
9442  	int err, fmt_map_off, num_args;
9443  	u64 fmt_addr;
9444  	char *fmt;
9445  
9446  	/* data must be an array of u64 */
9447  	if (data_len_reg->var_off.value % 8)
9448  		return -EINVAL;
9449  	num_args = data_len_reg->var_off.value / 8;
9450  
9451  	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
9452  	 * and map_direct_value_addr is set.
9453  	 */
9454  	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
9455  	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
9456  						  fmt_map_off);
9457  	if (err) {
9458  		verbose(env, "verifier bug\n");
9459  		return -EFAULT;
9460  	}
9461  	fmt = (char *)(long)fmt_addr + fmt_map_off;
9462  
9463  	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
9464  	 * can focus on validating the format specifiers.
9465  	 */
9466  	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
9467  	if (err < 0)
9468  		verbose(env, "Invalid format string\n");
9469  
9470  	return err;
9471  }
9472  
check_get_func_ip(struct bpf_verifier_env * env)9473  static int check_get_func_ip(struct bpf_verifier_env *env)
9474  {
9475  	enum bpf_prog_type type = resolve_prog_type(env->prog);
9476  	int func_id = BPF_FUNC_get_func_ip;
9477  
9478  	if (type == BPF_PROG_TYPE_TRACING) {
9479  		if (!bpf_prog_has_trampoline(env->prog)) {
9480  			verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
9481  				func_id_name(func_id), func_id);
9482  			return -ENOTSUPP;
9483  		}
9484  		return 0;
9485  	} else if (type == BPF_PROG_TYPE_KPROBE) {
9486  		return 0;
9487  	}
9488  
9489  	verbose(env, "func %s#%d not supported for program type %d\n",
9490  		func_id_name(func_id), func_id, type);
9491  	return -ENOTSUPP;
9492  }
9493  
cur_aux(struct bpf_verifier_env * env)9494  static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
9495  {
9496  	return &env->insn_aux_data[env->insn_idx];
9497  }
9498  
loop_flag_is_zero(struct bpf_verifier_env * env)9499  static bool loop_flag_is_zero(struct bpf_verifier_env *env)
9500  {
9501  	struct bpf_reg_state *regs = cur_regs(env);
9502  	struct bpf_reg_state *reg = &regs[BPF_REG_4];
9503  	bool reg_is_null = register_is_null(reg);
9504  
9505  	if (reg_is_null)
9506  		mark_chain_precision(env, BPF_REG_4);
9507  
9508  	return reg_is_null;
9509  }
9510  
update_loop_inline_state(struct bpf_verifier_env * env,u32 subprogno)9511  static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
9512  {
9513  	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
9514  
9515  	if (!state->initialized) {
9516  		state->initialized = 1;
9517  		state->fit_for_inline = loop_flag_is_zero(env);
9518  		state->callback_subprogno = subprogno;
9519  		return;
9520  	}
9521  
9522  	if (!state->fit_for_inline)
9523  		return;
9524  
9525  	state->fit_for_inline = (loop_flag_is_zero(env) &&
9526  				 state->callback_subprogno == subprogno);
9527  }
9528  
check_helper_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)9529  static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9530  			     int *insn_idx_p)
9531  {
9532  	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9533  	const struct bpf_func_proto *fn = NULL;
9534  	enum bpf_return_type ret_type;
9535  	enum bpf_type_flag ret_flag;
9536  	struct bpf_reg_state *regs;
9537  	struct bpf_call_arg_meta meta;
9538  	int insn_idx = *insn_idx_p;
9539  	bool changes_data;
9540  	int i, err, func_id;
9541  
9542  	/* find function prototype */
9543  	func_id = insn->imm;
9544  	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
9545  		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
9546  			func_id);
9547  		return -EINVAL;
9548  	}
9549  
9550  	if (env->ops->get_func_proto)
9551  		fn = env->ops->get_func_proto(func_id, env->prog);
9552  	if (!fn) {
9553  		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
9554  			func_id);
9555  		return -EINVAL;
9556  	}
9557  
9558  	/* eBPF programs must be GPL compatible to use GPL-ed functions */
9559  	if (!env->prog->gpl_compatible && fn->gpl_only) {
9560  		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
9561  		return -EINVAL;
9562  	}
9563  
9564  	if (fn->allowed && !fn->allowed(env->prog)) {
9565  		verbose(env, "helper call is not allowed in probe\n");
9566  		return -EINVAL;
9567  	}
9568  
9569  	if (!env->prog->aux->sleepable && fn->might_sleep) {
9570  		verbose(env, "helper call might sleep in a non-sleepable prog\n");
9571  		return -EINVAL;
9572  	}
9573  
9574  	/* With LD_ABS/IND some JITs save/restore skb from r1. */
9575  	changes_data = bpf_helper_changes_pkt_data(fn->func);
9576  	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
9577  		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
9578  			func_id_name(func_id), func_id);
9579  		return -EINVAL;
9580  	}
9581  
9582  	memset(&meta, 0, sizeof(meta));
9583  	meta.pkt_access = fn->pkt_access;
9584  
9585  	err = check_func_proto(fn, func_id);
9586  	if (err) {
9587  		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
9588  			func_id_name(func_id), func_id);
9589  		return err;
9590  	}
9591  
9592  	if (env->cur_state->active_rcu_lock) {
9593  		if (fn->might_sleep) {
9594  			verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
9595  				func_id_name(func_id), func_id);
9596  			return -EINVAL;
9597  		}
9598  
9599  		if (env->prog->aux->sleepable && is_storage_get_function(func_id))
9600  			env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
9601  	}
9602  
9603  	meta.func_id = func_id;
9604  	/* check args */
9605  	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
9606  		err = check_func_arg(env, i, &meta, fn, insn_idx);
9607  		if (err)
9608  			return err;
9609  	}
9610  
9611  	err = record_func_map(env, &meta, func_id, insn_idx);
9612  	if (err)
9613  		return err;
9614  
9615  	err = record_func_key(env, &meta, func_id, insn_idx);
9616  	if (err)
9617  		return err;
9618  
9619  	/* Mark slots with STACK_MISC in case of raw mode, stack offset
9620  	 * is inferred from register state.
9621  	 */
9622  	for (i = 0; i < meta.access_size; i++) {
9623  		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
9624  				       BPF_WRITE, -1, false, false);
9625  		if (err)
9626  			return err;
9627  	}
9628  
9629  	regs = cur_regs(env);
9630  
9631  	if (meta.release_regno) {
9632  		err = -EINVAL;
9633  		/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
9634  		 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
9635  		 * is safe to do directly.
9636  		 */
9637  		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
9638  			if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
9639  				verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
9640  				return -EFAULT;
9641  			}
9642  			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
9643  		} else if (meta.ref_obj_id) {
9644  			err = release_reference(env, meta.ref_obj_id);
9645  		} else if (register_is_null(&regs[meta.release_regno])) {
9646  			/* meta.ref_obj_id can only be 0 if register that is meant to be
9647  			 * released is NULL, which must be > R0.
9648  			 */
9649  			err = 0;
9650  		}
9651  		if (err) {
9652  			verbose(env, "func %s#%d reference has not been acquired before\n",
9653  				func_id_name(func_id), func_id);
9654  			return err;
9655  		}
9656  	}
9657  
9658  	switch (func_id) {
9659  	case BPF_FUNC_tail_call:
9660  		err = check_reference_leak(env);
9661  		if (err) {
9662  			verbose(env, "tail_call would lead to reference leak\n");
9663  			return err;
9664  		}
9665  		break;
9666  	case BPF_FUNC_get_local_storage:
9667  		/* check that flags argument in get_local_storage(map, flags) is 0,
9668  		 * this is required because get_local_storage() can't return an error.
9669  		 */
9670  		if (!register_is_null(&regs[BPF_REG_2])) {
9671  			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
9672  			return -EINVAL;
9673  		}
9674  		break;
9675  	case BPF_FUNC_for_each_map_elem:
9676  		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
9677  					set_map_elem_callback_state);
9678  		break;
9679  	case BPF_FUNC_timer_set_callback:
9680  		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
9681  					set_timer_callback_state);
9682  		break;
9683  	case BPF_FUNC_find_vma:
9684  		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
9685  					set_find_vma_callback_state);
9686  		break;
9687  	case BPF_FUNC_snprintf:
9688  		err = check_bpf_snprintf_call(env, regs);
9689  		break;
9690  	case BPF_FUNC_loop:
9691  		update_loop_inline_state(env, meta.subprogno);
9692  		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
9693  					set_loop_callback_state);
9694  		break;
9695  	case BPF_FUNC_dynptr_from_mem:
9696  		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
9697  			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
9698  				reg_type_str(env, regs[BPF_REG_1].type));
9699  			return -EACCES;
9700  		}
9701  		break;
9702  	case BPF_FUNC_set_retval:
9703  		if (prog_type == BPF_PROG_TYPE_LSM &&
9704  		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
9705  			if (!env->prog->aux->attach_func_proto->type) {
9706  				/* Make sure programs that attach to void
9707  				 * hooks don't try to modify return value.
9708  				 */
9709  				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
9710  				return -EINVAL;
9711  			}
9712  		}
9713  		break;
9714  	case BPF_FUNC_dynptr_data:
9715  	{
9716  		struct bpf_reg_state *reg;
9717  		int id, ref_obj_id;
9718  
9719  		reg = get_dynptr_arg_reg(env, fn, regs);
9720  		if (!reg)
9721  			return -EFAULT;
9722  
9723  
9724  		if (meta.dynptr_id) {
9725  			verbose(env, "verifier internal error: meta.dynptr_id already set\n");
9726  			return -EFAULT;
9727  		}
9728  		if (meta.ref_obj_id) {
9729  			verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
9730  			return -EFAULT;
9731  		}
9732  
9733  		id = dynptr_id(env, reg);
9734  		if (id < 0) {
9735  			verbose(env, "verifier internal error: failed to obtain dynptr id\n");
9736  			return id;
9737  		}
9738  
9739  		ref_obj_id = dynptr_ref_obj_id(env, reg);
9740  		if (ref_obj_id < 0) {
9741  			verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
9742  			return ref_obj_id;
9743  		}
9744  
9745  		meta.dynptr_id = id;
9746  		meta.ref_obj_id = ref_obj_id;
9747  
9748  		break;
9749  	}
9750  	case BPF_FUNC_dynptr_write:
9751  	{
9752  		enum bpf_dynptr_type dynptr_type;
9753  		struct bpf_reg_state *reg;
9754  
9755  		reg = get_dynptr_arg_reg(env, fn, regs);
9756  		if (!reg)
9757  			return -EFAULT;
9758  
9759  		dynptr_type = dynptr_get_type(env, reg);
9760  		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
9761  			return -EFAULT;
9762  
9763  		if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
9764  			/* this will trigger clear_all_pkt_pointers(), which will
9765  			 * invalidate all dynptr slices associated with the skb
9766  			 */
9767  			changes_data = true;
9768  
9769  		break;
9770  	}
9771  	case BPF_FUNC_user_ringbuf_drain:
9772  		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
9773  					set_user_ringbuf_callback_state);
9774  		break;
9775  	}
9776  
9777  	if (err)
9778  		return err;
9779  
9780  	/* reset caller saved regs */
9781  	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9782  		mark_reg_not_init(env, regs, caller_saved[i]);
9783  		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
9784  	}
9785  
9786  	/* helper call returns 64-bit value. */
9787  	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9788  
9789  	/* update return register (already marked as written above) */
9790  	ret_type = fn->ret_type;
9791  	ret_flag = type_flag(ret_type);
9792  
9793  	switch (base_type(ret_type)) {
9794  	case RET_INTEGER:
9795  		/* sets type to SCALAR_VALUE */
9796  		mark_reg_unknown(env, regs, BPF_REG_0);
9797  		break;
9798  	case RET_VOID:
9799  		regs[BPF_REG_0].type = NOT_INIT;
9800  		break;
9801  	case RET_PTR_TO_MAP_VALUE:
9802  		/* There is no offset yet applied, variable or fixed */
9803  		mark_reg_known_zero(env, regs, BPF_REG_0);
9804  		/* remember map_ptr, so that check_map_access()
9805  		 * can check 'value_size' boundary of memory access
9806  		 * to map element returned from bpf_map_lookup_elem()
9807  		 */
9808  		if (meta.map_ptr == NULL) {
9809  			verbose(env,
9810  				"kernel subsystem misconfigured verifier\n");
9811  			return -EINVAL;
9812  		}
9813  		regs[BPF_REG_0].map_ptr = meta.map_ptr;
9814  		regs[BPF_REG_0].map_uid = meta.map_uid;
9815  		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
9816  		if (!type_may_be_null(ret_type) &&
9817  		    btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
9818  			regs[BPF_REG_0].id = ++env->id_gen;
9819  		}
9820  		break;
9821  	case RET_PTR_TO_SOCKET:
9822  		mark_reg_known_zero(env, regs, BPF_REG_0);
9823  		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
9824  		break;
9825  	case RET_PTR_TO_SOCK_COMMON:
9826  		mark_reg_known_zero(env, regs, BPF_REG_0);
9827  		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
9828  		break;
9829  	case RET_PTR_TO_TCP_SOCK:
9830  		mark_reg_known_zero(env, regs, BPF_REG_0);
9831  		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
9832  		break;
9833  	case RET_PTR_TO_MEM:
9834  		mark_reg_known_zero(env, regs, BPF_REG_0);
9835  		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
9836  		regs[BPF_REG_0].mem_size = meta.mem_size;
9837  		break;
9838  	case RET_PTR_TO_MEM_OR_BTF_ID:
9839  	{
9840  		const struct btf_type *t;
9841  
9842  		mark_reg_known_zero(env, regs, BPF_REG_0);
9843  		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
9844  		if (!btf_type_is_struct(t)) {
9845  			u32 tsize;
9846  			const struct btf_type *ret;
9847  			const char *tname;
9848  
9849  			/* resolve the type size of ksym. */
9850  			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
9851  			if (IS_ERR(ret)) {
9852  				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
9853  				verbose(env, "unable to resolve the size of type '%s': %ld\n",
9854  					tname, PTR_ERR(ret));
9855  				return -EINVAL;
9856  			}
9857  			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
9858  			regs[BPF_REG_0].mem_size = tsize;
9859  		} else {
9860  			/* MEM_RDONLY may be carried from ret_flag, but it
9861  			 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
9862  			 * it will confuse the check of PTR_TO_BTF_ID in
9863  			 * check_mem_access().
9864  			 */
9865  			ret_flag &= ~MEM_RDONLY;
9866  
9867  			regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
9868  			regs[BPF_REG_0].btf = meta.ret_btf;
9869  			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
9870  		}
9871  		break;
9872  	}
9873  	case RET_PTR_TO_BTF_ID:
9874  	{
9875  		struct btf *ret_btf;
9876  		int ret_btf_id;
9877  
9878  		mark_reg_known_zero(env, regs, BPF_REG_0);
9879  		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
9880  		if (func_id == BPF_FUNC_kptr_xchg) {
9881  			ret_btf = meta.kptr_field->kptr.btf;
9882  			ret_btf_id = meta.kptr_field->kptr.btf_id;
9883  			if (!btf_is_kernel(ret_btf))
9884  				regs[BPF_REG_0].type |= MEM_ALLOC;
9885  		} else {
9886  			if (fn->ret_btf_id == BPF_PTR_POISON) {
9887  				verbose(env, "verifier internal error:");
9888  				verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
9889  					func_id_name(func_id));
9890  				return -EINVAL;
9891  			}
9892  			ret_btf = btf_vmlinux;
9893  			ret_btf_id = *fn->ret_btf_id;
9894  		}
9895  		if (ret_btf_id == 0) {
9896  			verbose(env, "invalid return type %u of func %s#%d\n",
9897  				base_type(ret_type), func_id_name(func_id),
9898  				func_id);
9899  			return -EINVAL;
9900  		}
9901  		regs[BPF_REG_0].btf = ret_btf;
9902  		regs[BPF_REG_0].btf_id = ret_btf_id;
9903  		break;
9904  	}
9905  	default:
9906  		verbose(env, "unknown return type %u of func %s#%d\n",
9907  			base_type(ret_type), func_id_name(func_id), func_id);
9908  		return -EINVAL;
9909  	}
9910  
9911  	if (type_may_be_null(regs[BPF_REG_0].type))
9912  		regs[BPF_REG_0].id = ++env->id_gen;
9913  
9914  	if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
9915  		verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
9916  			func_id_name(func_id), func_id);
9917  		return -EFAULT;
9918  	}
9919  
9920  	if (is_dynptr_ref_function(func_id))
9921  		regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
9922  
9923  	if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
9924  		/* For release_reference() */
9925  		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
9926  	} else if (is_acquire_function(func_id, meta.map_ptr)) {
9927  		int id = acquire_reference_state(env, insn_idx);
9928  
9929  		if (id < 0)
9930  			return id;
9931  		/* For mark_ptr_or_null_reg() */
9932  		regs[BPF_REG_0].id = id;
9933  		/* For release_reference() */
9934  		regs[BPF_REG_0].ref_obj_id = id;
9935  	}
9936  
9937  	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
9938  
9939  	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
9940  	if (err)
9941  		return err;
9942  
9943  	if ((func_id == BPF_FUNC_get_stack ||
9944  	     func_id == BPF_FUNC_get_task_stack) &&
9945  	    !env->prog->has_callchain_buf) {
9946  		const char *err_str;
9947  
9948  #ifdef CONFIG_PERF_EVENTS
9949  		err = get_callchain_buffers(sysctl_perf_event_max_stack);
9950  		err_str = "cannot get callchain buffer for func %s#%d\n";
9951  #else
9952  		err = -ENOTSUPP;
9953  		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
9954  #endif
9955  		if (err) {
9956  			verbose(env, err_str, func_id_name(func_id), func_id);
9957  			return err;
9958  		}
9959  
9960  		env->prog->has_callchain_buf = true;
9961  	}
9962  
9963  	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
9964  		env->prog->call_get_stack = true;
9965  
9966  	if (func_id == BPF_FUNC_get_func_ip) {
9967  		if (check_get_func_ip(env))
9968  			return -ENOTSUPP;
9969  		env->prog->call_get_func_ip = true;
9970  	}
9971  
9972  	if (changes_data)
9973  		clear_all_pkt_pointers(env);
9974  	return 0;
9975  }
9976  
9977  /* mark_btf_func_reg_size() is used when the reg size is determined by
9978   * the BTF func_proto's return value size and argument.
9979   */
mark_btf_func_reg_size(struct bpf_verifier_env * env,u32 regno,size_t reg_size)9980  static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
9981  				   size_t reg_size)
9982  {
9983  	struct bpf_reg_state *reg = &cur_regs(env)[regno];
9984  
9985  	if (regno == BPF_REG_0) {
9986  		/* Function return value */
9987  		reg->live |= REG_LIVE_WRITTEN;
9988  		reg->subreg_def = reg_size == sizeof(u64) ?
9989  			DEF_NOT_SUBREG : env->insn_idx + 1;
9990  	} else {
9991  		/* Function argument */
9992  		if (reg_size == sizeof(u64)) {
9993  			mark_insn_zext(env, reg);
9994  			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
9995  		} else {
9996  			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
9997  		}
9998  	}
9999  }
10000  
is_kfunc_acquire(struct bpf_kfunc_call_arg_meta * meta)10001  static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10002  {
10003  	return meta->kfunc_flags & KF_ACQUIRE;
10004  }
10005  
is_kfunc_release(struct bpf_kfunc_call_arg_meta * meta)10006  static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10007  {
10008  	return meta->kfunc_flags & KF_RELEASE;
10009  }
10010  
is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta * meta)10011  static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
10012  {
10013  	return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
10014  }
10015  
is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta * meta)10016  static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
10017  {
10018  	return meta->kfunc_flags & KF_SLEEPABLE;
10019  }
10020  
is_kfunc_destructive(struct bpf_kfunc_call_arg_meta * meta)10021  static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10022  {
10023  	return meta->kfunc_flags & KF_DESTRUCTIVE;
10024  }
10025  
is_kfunc_rcu(struct bpf_kfunc_call_arg_meta * meta)10026  static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10027  {
10028  	return meta->kfunc_flags & KF_RCU;
10029  }
10030  
__kfunc_param_match_suffix(const struct btf * btf,const struct btf_param * arg,const char * suffix)10031  static bool __kfunc_param_match_suffix(const struct btf *btf,
10032  				       const struct btf_param *arg,
10033  				       const char *suffix)
10034  {
10035  	int suffix_len = strlen(suffix), len;
10036  	const char *param_name;
10037  
10038  	/* In the future, this can be ported to use BTF tagging */
10039  	param_name = btf_name_by_offset(btf, arg->name_off);
10040  	if (str_is_empty(param_name))
10041  		return false;
10042  	len = strlen(param_name);
10043  	if (len < suffix_len)
10044  		return false;
10045  	param_name += len - suffix_len;
10046  	return !strncmp(param_name, suffix, suffix_len);
10047  }
10048  
is_kfunc_arg_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)10049  static bool is_kfunc_arg_mem_size(const struct btf *btf,
10050  				  const struct btf_param *arg,
10051  				  const struct bpf_reg_state *reg)
10052  {
10053  	const struct btf_type *t;
10054  
10055  	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10056  	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10057  		return false;
10058  
10059  	return __kfunc_param_match_suffix(btf, arg, "__sz");
10060  }
10061  
is_kfunc_arg_const_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)10062  static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10063  					const struct btf_param *arg,
10064  					const struct bpf_reg_state *reg)
10065  {
10066  	const struct btf_type *t;
10067  
10068  	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10069  	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10070  		return false;
10071  
10072  	return __kfunc_param_match_suffix(btf, arg, "__szk");
10073  }
10074  
is_kfunc_arg_optional(const struct btf * btf,const struct btf_param * arg)10075  static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
10076  {
10077  	return __kfunc_param_match_suffix(btf, arg, "__opt");
10078  }
10079  
is_kfunc_arg_constant(const struct btf * btf,const struct btf_param * arg)10080  static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10081  {
10082  	return __kfunc_param_match_suffix(btf, arg, "__k");
10083  }
10084  
is_kfunc_arg_ignore(const struct btf * btf,const struct btf_param * arg)10085  static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10086  {
10087  	return __kfunc_param_match_suffix(btf, arg, "__ign");
10088  }
10089  
is_kfunc_arg_alloc_obj(const struct btf * btf,const struct btf_param * arg)10090  static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10091  {
10092  	return __kfunc_param_match_suffix(btf, arg, "__alloc");
10093  }
10094  
is_kfunc_arg_uninit(const struct btf * btf,const struct btf_param * arg)10095  static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10096  {
10097  	return __kfunc_param_match_suffix(btf, arg, "__uninit");
10098  }
10099  
is_kfunc_arg_refcounted_kptr(const struct btf * btf,const struct btf_param * arg)10100  static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10101  {
10102  	return __kfunc_param_match_suffix(btf, arg, "__refcounted_kptr");
10103  }
10104  
is_kfunc_arg_scalar_with_name(const struct btf * btf,const struct btf_param * arg,const char * name)10105  static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10106  					  const struct btf_param *arg,
10107  					  const char *name)
10108  {
10109  	int len, target_len = strlen(name);
10110  	const char *param_name;
10111  
10112  	param_name = btf_name_by_offset(btf, arg->name_off);
10113  	if (str_is_empty(param_name))
10114  		return false;
10115  	len = strlen(param_name);
10116  	if (len != target_len)
10117  		return false;
10118  	if (strcmp(param_name, name))
10119  		return false;
10120  
10121  	return true;
10122  }
10123  
10124  enum {
10125  	KF_ARG_DYNPTR_ID,
10126  	KF_ARG_LIST_HEAD_ID,
10127  	KF_ARG_LIST_NODE_ID,
10128  	KF_ARG_RB_ROOT_ID,
10129  	KF_ARG_RB_NODE_ID,
10130  };
10131  
10132  BTF_ID_LIST(kf_arg_btf_ids)
BTF_ID(struct,bpf_dynptr_kern)10133  BTF_ID(struct, bpf_dynptr_kern)
10134  BTF_ID(struct, bpf_list_head)
10135  BTF_ID(struct, bpf_list_node)
10136  BTF_ID(struct, bpf_rb_root)
10137  BTF_ID(struct, bpf_rb_node)
10138  
10139  static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10140  				    const struct btf_param *arg, int type)
10141  {
10142  	const struct btf_type *t;
10143  	u32 res_id;
10144  
10145  	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10146  	if (!t)
10147  		return false;
10148  	if (!btf_type_is_ptr(t))
10149  		return false;
10150  	t = btf_type_skip_modifiers(btf, t->type, &res_id);
10151  	if (!t)
10152  		return false;
10153  	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10154  }
10155  
is_kfunc_arg_dynptr(const struct btf * btf,const struct btf_param * arg)10156  static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10157  {
10158  	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
10159  }
10160  
is_kfunc_arg_list_head(const struct btf * btf,const struct btf_param * arg)10161  static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
10162  {
10163  	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
10164  }
10165  
is_kfunc_arg_list_node(const struct btf * btf,const struct btf_param * arg)10166  static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
10167  {
10168  	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
10169  }
10170  
is_kfunc_arg_rbtree_root(const struct btf * btf,const struct btf_param * arg)10171  static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
10172  {
10173  	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
10174  }
10175  
is_kfunc_arg_rbtree_node(const struct btf * btf,const struct btf_param * arg)10176  static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
10177  {
10178  	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
10179  }
10180  
is_kfunc_arg_callback(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_param * arg)10181  static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
10182  				  const struct btf_param *arg)
10183  {
10184  	const struct btf_type *t;
10185  
10186  	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
10187  	if (!t)
10188  		return false;
10189  
10190  	return true;
10191  }
10192  
10193  /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
__btf_type_is_scalar_struct(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_type * t,int rec)10194  static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
10195  					const struct btf *btf,
10196  					const struct btf_type *t, int rec)
10197  {
10198  	const struct btf_type *member_type;
10199  	const struct btf_member *member;
10200  	u32 i;
10201  
10202  	if (!btf_type_is_struct(t))
10203  		return false;
10204  
10205  	for_each_member(i, t, member) {
10206  		const struct btf_array *array;
10207  
10208  		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
10209  		if (btf_type_is_struct(member_type)) {
10210  			if (rec >= 3) {
10211  				verbose(env, "max struct nesting depth exceeded\n");
10212  				return false;
10213  			}
10214  			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
10215  				return false;
10216  			continue;
10217  		}
10218  		if (btf_type_is_array(member_type)) {
10219  			array = btf_array(member_type);
10220  			if (!array->nelems)
10221  				return false;
10222  			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
10223  			if (!btf_type_is_scalar(member_type))
10224  				return false;
10225  			continue;
10226  		}
10227  		if (!btf_type_is_scalar(member_type))
10228  			return false;
10229  	}
10230  	return true;
10231  }
10232  
10233  enum kfunc_ptr_arg_type {
10234  	KF_ARG_PTR_TO_CTX,
10235  	KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
10236  	KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
10237  	KF_ARG_PTR_TO_DYNPTR,
10238  	KF_ARG_PTR_TO_ITER,
10239  	KF_ARG_PTR_TO_LIST_HEAD,
10240  	KF_ARG_PTR_TO_LIST_NODE,
10241  	KF_ARG_PTR_TO_BTF_ID,	       /* Also covers reg2btf_ids conversions */
10242  	KF_ARG_PTR_TO_MEM,
10243  	KF_ARG_PTR_TO_MEM_SIZE,	       /* Size derived from next argument, skip it */
10244  	KF_ARG_PTR_TO_CALLBACK,
10245  	KF_ARG_PTR_TO_RB_ROOT,
10246  	KF_ARG_PTR_TO_RB_NODE,
10247  };
10248  
10249  enum special_kfunc_type {
10250  	KF_bpf_obj_new_impl,
10251  	KF_bpf_obj_drop_impl,
10252  	KF_bpf_refcount_acquire_impl,
10253  	KF_bpf_list_push_front_impl,
10254  	KF_bpf_list_push_back_impl,
10255  	KF_bpf_list_pop_front,
10256  	KF_bpf_list_pop_back,
10257  	KF_bpf_cast_to_kern_ctx,
10258  	KF_bpf_rdonly_cast,
10259  	KF_bpf_rcu_read_lock,
10260  	KF_bpf_rcu_read_unlock,
10261  	KF_bpf_rbtree_remove,
10262  	KF_bpf_rbtree_add_impl,
10263  	KF_bpf_rbtree_first,
10264  	KF_bpf_dynptr_from_skb,
10265  	KF_bpf_dynptr_from_xdp,
10266  	KF_bpf_dynptr_slice,
10267  	KF_bpf_dynptr_slice_rdwr,
10268  	KF_bpf_dynptr_clone,
10269  };
10270  
10271  BTF_SET_START(special_kfunc_set)
BTF_ID(func,bpf_obj_new_impl)10272  BTF_ID(func, bpf_obj_new_impl)
10273  BTF_ID(func, bpf_obj_drop_impl)
10274  BTF_ID(func, bpf_refcount_acquire_impl)
10275  BTF_ID(func, bpf_list_push_front_impl)
10276  BTF_ID(func, bpf_list_push_back_impl)
10277  BTF_ID(func, bpf_list_pop_front)
10278  BTF_ID(func, bpf_list_pop_back)
10279  BTF_ID(func, bpf_cast_to_kern_ctx)
10280  BTF_ID(func, bpf_rdonly_cast)
10281  BTF_ID(func, bpf_rbtree_remove)
10282  BTF_ID(func, bpf_rbtree_add_impl)
10283  BTF_ID(func, bpf_rbtree_first)
10284  BTF_ID(func, bpf_dynptr_from_skb)
10285  BTF_ID(func, bpf_dynptr_from_xdp)
10286  BTF_ID(func, bpf_dynptr_slice)
10287  BTF_ID(func, bpf_dynptr_slice_rdwr)
10288  BTF_ID(func, bpf_dynptr_clone)
10289  BTF_SET_END(special_kfunc_set)
10290  
10291  BTF_ID_LIST(special_kfunc_list)
10292  BTF_ID(func, bpf_obj_new_impl)
10293  BTF_ID(func, bpf_obj_drop_impl)
10294  BTF_ID(func, bpf_refcount_acquire_impl)
10295  BTF_ID(func, bpf_list_push_front_impl)
10296  BTF_ID(func, bpf_list_push_back_impl)
10297  BTF_ID(func, bpf_list_pop_front)
10298  BTF_ID(func, bpf_list_pop_back)
10299  BTF_ID(func, bpf_cast_to_kern_ctx)
10300  BTF_ID(func, bpf_rdonly_cast)
10301  BTF_ID(func, bpf_rcu_read_lock)
10302  BTF_ID(func, bpf_rcu_read_unlock)
10303  BTF_ID(func, bpf_rbtree_remove)
10304  BTF_ID(func, bpf_rbtree_add_impl)
10305  BTF_ID(func, bpf_rbtree_first)
10306  BTF_ID(func, bpf_dynptr_from_skb)
10307  BTF_ID(func, bpf_dynptr_from_xdp)
10308  BTF_ID(func, bpf_dynptr_slice)
10309  BTF_ID(func, bpf_dynptr_slice_rdwr)
10310  BTF_ID(func, bpf_dynptr_clone)
10311  
10312  static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
10313  {
10314  	if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
10315  	    meta->arg_owning_ref) {
10316  		return false;
10317  	}
10318  
10319  	return meta->kfunc_flags & KF_RET_NULL;
10320  }
10321  
is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta * meta)10322  static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
10323  {
10324  	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
10325  }
10326  
is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta * meta)10327  static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
10328  {
10329  	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
10330  }
10331  
10332  static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,const struct btf_type * t,const struct btf_type * ref_t,const char * ref_tname,const struct btf_param * args,int argno,int nargs)10333  get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
10334  		       struct bpf_kfunc_call_arg_meta *meta,
10335  		       const struct btf_type *t, const struct btf_type *ref_t,
10336  		       const char *ref_tname, const struct btf_param *args,
10337  		       int argno, int nargs)
10338  {
10339  	u32 regno = argno + 1;
10340  	struct bpf_reg_state *regs = cur_regs(env);
10341  	struct bpf_reg_state *reg = &regs[regno];
10342  	bool arg_mem_size = false;
10343  
10344  	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
10345  		return KF_ARG_PTR_TO_CTX;
10346  
10347  	/* In this function, we verify the kfunc's BTF as per the argument type,
10348  	 * leaving the rest of the verification with respect to the register
10349  	 * type to our caller. When a set of conditions hold in the BTF type of
10350  	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
10351  	 */
10352  	if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
10353  		return KF_ARG_PTR_TO_CTX;
10354  
10355  	if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
10356  		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
10357  
10358  	if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
10359  		return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
10360  
10361  	if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
10362  		return KF_ARG_PTR_TO_DYNPTR;
10363  
10364  	if (is_kfunc_arg_iter(meta, argno))
10365  		return KF_ARG_PTR_TO_ITER;
10366  
10367  	if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
10368  		return KF_ARG_PTR_TO_LIST_HEAD;
10369  
10370  	if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
10371  		return KF_ARG_PTR_TO_LIST_NODE;
10372  
10373  	if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
10374  		return KF_ARG_PTR_TO_RB_ROOT;
10375  
10376  	if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
10377  		return KF_ARG_PTR_TO_RB_NODE;
10378  
10379  	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
10380  		if (!btf_type_is_struct(ref_t)) {
10381  			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
10382  				meta->func_name, argno, btf_type_str(ref_t), ref_tname);
10383  			return -EINVAL;
10384  		}
10385  		return KF_ARG_PTR_TO_BTF_ID;
10386  	}
10387  
10388  	if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
10389  		return KF_ARG_PTR_TO_CALLBACK;
10390  
10391  
10392  	if (argno + 1 < nargs &&
10393  	    (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
10394  	     is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
10395  		arg_mem_size = true;
10396  
10397  	/* This is the catch all argument type of register types supported by
10398  	 * check_helper_mem_access. However, we only allow when argument type is
10399  	 * pointer to scalar, or struct composed (recursively) of scalars. When
10400  	 * arg_mem_size is true, the pointer can be void *.
10401  	 */
10402  	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
10403  	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
10404  		verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
10405  			argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
10406  		return -EINVAL;
10407  	}
10408  	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
10409  }
10410  
process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const struct btf_type * ref_t,const char * ref_tname,u32 ref_id,struct bpf_kfunc_call_arg_meta * meta,int argno)10411  static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
10412  					struct bpf_reg_state *reg,
10413  					const struct btf_type *ref_t,
10414  					const char *ref_tname, u32 ref_id,
10415  					struct bpf_kfunc_call_arg_meta *meta,
10416  					int argno)
10417  {
10418  	const struct btf_type *reg_ref_t;
10419  	bool strict_type_match = false;
10420  	const struct btf *reg_btf;
10421  	const char *reg_ref_tname;
10422  	u32 reg_ref_id;
10423  
10424  	if (base_type(reg->type) == PTR_TO_BTF_ID) {
10425  		reg_btf = reg->btf;
10426  		reg_ref_id = reg->btf_id;
10427  	} else {
10428  		reg_btf = btf_vmlinux;
10429  		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
10430  	}
10431  
10432  	/* Enforce strict type matching for calls to kfuncs that are acquiring
10433  	 * or releasing a reference, or are no-cast aliases. We do _not_
10434  	 * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
10435  	 * as we want to enable BPF programs to pass types that are bitwise
10436  	 * equivalent without forcing them to explicitly cast with something
10437  	 * like bpf_cast_to_kern_ctx().
10438  	 *
10439  	 * For example, say we had a type like the following:
10440  	 *
10441  	 * struct bpf_cpumask {
10442  	 *	cpumask_t cpumask;
10443  	 *	refcount_t usage;
10444  	 * };
10445  	 *
10446  	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
10447  	 * to a struct cpumask, so it would be safe to pass a struct
10448  	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
10449  	 *
10450  	 * The philosophy here is similar to how we allow scalars of different
10451  	 * types to be passed to kfuncs as long as the size is the same. The
10452  	 * only difference here is that we're simply allowing
10453  	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
10454  	 * resolve types.
10455  	 */
10456  	if (is_kfunc_acquire(meta) ||
10457  	    (is_kfunc_release(meta) && reg->ref_obj_id) ||
10458  	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
10459  		strict_type_match = true;
10460  
10461  	WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
10462  
10463  	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
10464  	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
10465  	if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
10466  		verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
10467  			meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
10468  			btf_type_str(reg_ref_t), reg_ref_tname);
10469  		return -EINVAL;
10470  	}
10471  	return 0;
10472  }
10473  
ref_set_non_owning(struct bpf_verifier_env * env,struct bpf_reg_state * reg)10474  static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
10475  {
10476  	struct bpf_verifier_state *state = env->cur_state;
10477  	struct btf_record *rec = reg_btf_record(reg);
10478  
10479  	if (!state->active_lock.ptr) {
10480  		verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
10481  		return -EFAULT;
10482  	}
10483  
10484  	if (type_flag(reg->type) & NON_OWN_REF) {
10485  		verbose(env, "verifier internal error: NON_OWN_REF already set\n");
10486  		return -EFAULT;
10487  	}
10488  
10489  	reg->type |= NON_OWN_REF;
10490  	if (rec->refcount_off >= 0)
10491  		reg->type |= MEM_RCU;
10492  
10493  	return 0;
10494  }
10495  
ref_convert_owning_non_owning(struct bpf_verifier_env * env,u32 ref_obj_id)10496  static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
10497  {
10498  	struct bpf_func_state *state, *unused;
10499  	struct bpf_reg_state *reg;
10500  	int i;
10501  
10502  	state = cur_func(env);
10503  
10504  	if (!ref_obj_id) {
10505  		verbose(env, "verifier internal error: ref_obj_id is zero for "
10506  			     "owning -> non-owning conversion\n");
10507  		return -EFAULT;
10508  	}
10509  
10510  	for (i = 0; i < state->acquired_refs; i++) {
10511  		if (state->refs[i].id != ref_obj_id)
10512  			continue;
10513  
10514  		/* Clear ref_obj_id here so release_reference doesn't clobber
10515  		 * the whole reg
10516  		 */
10517  		bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
10518  			if (reg->ref_obj_id == ref_obj_id) {
10519  				reg->ref_obj_id = 0;
10520  				ref_set_non_owning(env, reg);
10521  			}
10522  		}));
10523  		return 0;
10524  	}
10525  
10526  	verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
10527  	return -EFAULT;
10528  }
10529  
10530  /* Implementation details:
10531   *
10532   * Each register points to some region of memory, which we define as an
10533   * allocation. Each allocation may embed a bpf_spin_lock which protects any
10534   * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
10535   * allocation. The lock and the data it protects are colocated in the same
10536   * memory region.
10537   *
10538   * Hence, everytime a register holds a pointer value pointing to such
10539   * allocation, the verifier preserves a unique reg->id for it.
10540   *
10541   * The verifier remembers the lock 'ptr' and the lock 'id' whenever
10542   * bpf_spin_lock is called.
10543   *
10544   * To enable this, lock state in the verifier captures two values:
10545   *	active_lock.ptr = Register's type specific pointer
10546   *	active_lock.id  = A unique ID for each register pointer value
10547   *
10548   * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
10549   * supported register types.
10550   *
10551   * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
10552   * allocated objects is the reg->btf pointer.
10553   *
10554   * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
10555   * can establish the provenance of the map value statically for each distinct
10556   * lookup into such maps. They always contain a single map value hence unique
10557   * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
10558   *
10559   * So, in case of global variables, they use array maps with max_entries = 1,
10560   * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
10561   * into the same map value as max_entries is 1, as described above).
10562   *
10563   * In case of inner map lookups, the inner map pointer has same map_ptr as the
10564   * outer map pointer (in verifier context), but each lookup into an inner map
10565   * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
10566   * maps from the same outer map share the same map_ptr as active_lock.ptr, they
10567   * will get different reg->id assigned to each lookup, hence different
10568   * active_lock.id.
10569   *
10570   * In case of allocated objects, active_lock.ptr is the reg->btf, and the
10571   * reg->id is a unique ID preserved after the NULL pointer check on the pointer
10572   * returned from bpf_obj_new. Each allocation receives a new reg->id.
10573   */
check_reg_allocation_locked(struct bpf_verifier_env * env,struct bpf_reg_state * reg)10574  static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
10575  {
10576  	void *ptr;
10577  	u32 id;
10578  
10579  	switch ((int)reg->type) {
10580  	case PTR_TO_MAP_VALUE:
10581  		ptr = reg->map_ptr;
10582  		break;
10583  	case PTR_TO_BTF_ID | MEM_ALLOC:
10584  		ptr = reg->btf;
10585  		break;
10586  	default:
10587  		verbose(env, "verifier internal error: unknown reg type for lock check\n");
10588  		return -EFAULT;
10589  	}
10590  	id = reg->id;
10591  
10592  	if (!env->cur_state->active_lock.ptr)
10593  		return -EINVAL;
10594  	if (env->cur_state->active_lock.ptr != ptr ||
10595  	    env->cur_state->active_lock.id != id) {
10596  		verbose(env, "held lock and object are not in the same allocation\n");
10597  		return -EINVAL;
10598  	}
10599  	return 0;
10600  }
10601  
is_bpf_list_api_kfunc(u32 btf_id)10602  static bool is_bpf_list_api_kfunc(u32 btf_id)
10603  {
10604  	return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
10605  	       btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
10606  	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
10607  	       btf_id == special_kfunc_list[KF_bpf_list_pop_back];
10608  }
10609  
is_bpf_rbtree_api_kfunc(u32 btf_id)10610  static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
10611  {
10612  	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
10613  	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
10614  	       btf_id == special_kfunc_list[KF_bpf_rbtree_first];
10615  }
10616  
is_bpf_graph_api_kfunc(u32 btf_id)10617  static bool is_bpf_graph_api_kfunc(u32 btf_id)
10618  {
10619  	return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
10620  	       btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
10621  }
10622  
is_callback_calling_kfunc(u32 btf_id)10623  static bool is_callback_calling_kfunc(u32 btf_id)
10624  {
10625  	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
10626  }
10627  
is_rbtree_lock_required_kfunc(u32 btf_id)10628  static bool is_rbtree_lock_required_kfunc(u32 btf_id)
10629  {
10630  	return is_bpf_rbtree_api_kfunc(btf_id);
10631  }
10632  
check_kfunc_is_graph_root_api(struct bpf_verifier_env * env,enum btf_field_type head_field_type,u32 kfunc_btf_id)10633  static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
10634  					  enum btf_field_type head_field_type,
10635  					  u32 kfunc_btf_id)
10636  {
10637  	bool ret;
10638  
10639  	switch (head_field_type) {
10640  	case BPF_LIST_HEAD:
10641  		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
10642  		break;
10643  	case BPF_RB_ROOT:
10644  		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
10645  		break;
10646  	default:
10647  		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
10648  			btf_field_type_name(head_field_type));
10649  		return false;
10650  	}
10651  
10652  	if (!ret)
10653  		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
10654  			btf_field_type_name(head_field_type));
10655  	return ret;
10656  }
10657  
check_kfunc_is_graph_node_api(struct bpf_verifier_env * env,enum btf_field_type node_field_type,u32 kfunc_btf_id)10658  static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
10659  					  enum btf_field_type node_field_type,
10660  					  u32 kfunc_btf_id)
10661  {
10662  	bool ret;
10663  
10664  	switch (node_field_type) {
10665  	case BPF_LIST_NODE:
10666  		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
10667  		       kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
10668  		break;
10669  	case BPF_RB_NODE:
10670  		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
10671  		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
10672  		break;
10673  	default:
10674  		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
10675  			btf_field_type_name(node_field_type));
10676  		return false;
10677  	}
10678  
10679  	if (!ret)
10680  		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
10681  			btf_field_type_name(node_field_type));
10682  	return ret;
10683  }
10684  
10685  static int
__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,struct btf_field ** head_field)10686  __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
10687  				   struct bpf_reg_state *reg, u32 regno,
10688  				   struct bpf_kfunc_call_arg_meta *meta,
10689  				   enum btf_field_type head_field_type,
10690  				   struct btf_field **head_field)
10691  {
10692  	const char *head_type_name;
10693  	struct btf_field *field;
10694  	struct btf_record *rec;
10695  	u32 head_off;
10696  
10697  	if (meta->btf != btf_vmlinux) {
10698  		verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
10699  		return -EFAULT;
10700  	}
10701  
10702  	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
10703  		return -EFAULT;
10704  
10705  	head_type_name = btf_field_type_name(head_field_type);
10706  	if (!tnum_is_const(reg->var_off)) {
10707  		verbose(env,
10708  			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
10709  			regno, head_type_name);
10710  		return -EINVAL;
10711  	}
10712  
10713  	rec = reg_btf_record(reg);
10714  	head_off = reg->off + reg->var_off.value;
10715  	field = btf_record_find(rec, head_off, head_field_type);
10716  	if (!field) {
10717  		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
10718  		return -EINVAL;
10719  	}
10720  
10721  	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
10722  	if (check_reg_allocation_locked(env, reg)) {
10723  		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
10724  			rec->spin_lock_off, head_type_name);
10725  		return -EINVAL;
10726  	}
10727  
10728  	if (*head_field) {
10729  		verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
10730  		return -EFAULT;
10731  	}
10732  	*head_field = field;
10733  	return 0;
10734  }
10735  
process_kf_arg_ptr_to_list_head(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)10736  static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
10737  					   struct bpf_reg_state *reg, u32 regno,
10738  					   struct bpf_kfunc_call_arg_meta *meta)
10739  {
10740  	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
10741  							  &meta->arg_list_head.field);
10742  }
10743  
process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)10744  static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
10745  					     struct bpf_reg_state *reg, u32 regno,
10746  					     struct bpf_kfunc_call_arg_meta *meta)
10747  {
10748  	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
10749  							  &meta->arg_rbtree_root.field);
10750  }
10751  
10752  static int
__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,enum btf_field_type node_field_type,struct btf_field ** node_field)10753  __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
10754  				   struct bpf_reg_state *reg, u32 regno,
10755  				   struct bpf_kfunc_call_arg_meta *meta,
10756  				   enum btf_field_type head_field_type,
10757  				   enum btf_field_type node_field_type,
10758  				   struct btf_field **node_field)
10759  {
10760  	const char *node_type_name;
10761  	const struct btf_type *et, *t;
10762  	struct btf_field *field;
10763  	u32 node_off;
10764  
10765  	if (meta->btf != btf_vmlinux) {
10766  		verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
10767  		return -EFAULT;
10768  	}
10769  
10770  	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
10771  		return -EFAULT;
10772  
10773  	node_type_name = btf_field_type_name(node_field_type);
10774  	if (!tnum_is_const(reg->var_off)) {
10775  		verbose(env,
10776  			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
10777  			regno, node_type_name);
10778  		return -EINVAL;
10779  	}
10780  
10781  	node_off = reg->off + reg->var_off.value;
10782  	field = reg_find_field_offset(reg, node_off, node_field_type);
10783  	if (!field || field->offset != node_off) {
10784  		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
10785  		return -EINVAL;
10786  	}
10787  
10788  	field = *node_field;
10789  
10790  	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
10791  	t = btf_type_by_id(reg->btf, reg->btf_id);
10792  	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
10793  				  field->graph_root.value_btf_id, true)) {
10794  		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
10795  			"in struct %s, but arg is at offset=%d in struct %s\n",
10796  			btf_field_type_name(head_field_type),
10797  			btf_field_type_name(node_field_type),
10798  			field->graph_root.node_offset,
10799  			btf_name_by_offset(field->graph_root.btf, et->name_off),
10800  			node_off, btf_name_by_offset(reg->btf, t->name_off));
10801  		return -EINVAL;
10802  	}
10803  	meta->arg_btf = reg->btf;
10804  	meta->arg_btf_id = reg->btf_id;
10805  
10806  	if (node_off != field->graph_root.node_offset) {
10807  		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
10808  			node_off, btf_field_type_name(node_field_type),
10809  			field->graph_root.node_offset,
10810  			btf_name_by_offset(field->graph_root.btf, et->name_off));
10811  		return -EINVAL;
10812  	}
10813  
10814  	return 0;
10815  }
10816  
process_kf_arg_ptr_to_list_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)10817  static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
10818  					   struct bpf_reg_state *reg, u32 regno,
10819  					   struct bpf_kfunc_call_arg_meta *meta)
10820  {
10821  	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
10822  						  BPF_LIST_HEAD, BPF_LIST_NODE,
10823  						  &meta->arg_list_head.field);
10824  }
10825  
process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)10826  static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
10827  					     struct bpf_reg_state *reg, u32 regno,
10828  					     struct bpf_kfunc_call_arg_meta *meta)
10829  {
10830  	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
10831  						  BPF_RB_ROOT, BPF_RB_NODE,
10832  						  &meta->arg_rbtree_root.field);
10833  }
10834  
check_kfunc_args(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,int insn_idx)10835  static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
10836  			    int insn_idx)
10837  {
10838  	const char *func_name = meta->func_name, *ref_tname;
10839  	const struct btf *btf = meta->btf;
10840  	const struct btf_param *args;
10841  	struct btf_record *rec;
10842  	u32 i, nargs;
10843  	int ret;
10844  
10845  	args = (const struct btf_param *)(meta->func_proto + 1);
10846  	nargs = btf_type_vlen(meta->func_proto);
10847  	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
10848  		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
10849  			MAX_BPF_FUNC_REG_ARGS);
10850  		return -EINVAL;
10851  	}
10852  
10853  	/* Check that BTF function arguments match actual types that the
10854  	 * verifier sees.
10855  	 */
10856  	for (i = 0; i < nargs; i++) {
10857  		struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
10858  		const struct btf_type *t, *ref_t, *resolve_ret;
10859  		enum bpf_arg_type arg_type = ARG_DONTCARE;
10860  		u32 regno = i + 1, ref_id, type_size;
10861  		bool is_ret_buf_sz = false;
10862  		int kf_arg_type;
10863  
10864  		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
10865  
10866  		if (is_kfunc_arg_ignore(btf, &args[i]))
10867  			continue;
10868  
10869  		if (btf_type_is_scalar(t)) {
10870  			if (reg->type != SCALAR_VALUE) {
10871  				verbose(env, "R%d is not a scalar\n", regno);
10872  				return -EINVAL;
10873  			}
10874  
10875  			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
10876  				if (meta->arg_constant.found) {
10877  					verbose(env, "verifier internal error: only one constant argument permitted\n");
10878  					return -EFAULT;
10879  				}
10880  				if (!tnum_is_const(reg->var_off)) {
10881  					verbose(env, "R%d must be a known constant\n", regno);
10882  					return -EINVAL;
10883  				}
10884  				ret = mark_chain_precision(env, regno);
10885  				if (ret < 0)
10886  					return ret;
10887  				meta->arg_constant.found = true;
10888  				meta->arg_constant.value = reg->var_off.value;
10889  			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
10890  				meta->r0_rdonly = true;
10891  				is_ret_buf_sz = true;
10892  			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
10893  				is_ret_buf_sz = true;
10894  			}
10895  
10896  			if (is_ret_buf_sz) {
10897  				if (meta->r0_size) {
10898  					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
10899  					return -EINVAL;
10900  				}
10901  
10902  				if (!tnum_is_const(reg->var_off)) {
10903  					verbose(env, "R%d is not a const\n", regno);
10904  					return -EINVAL;
10905  				}
10906  
10907  				meta->r0_size = reg->var_off.value;
10908  				ret = mark_chain_precision(env, regno);
10909  				if (ret)
10910  					return ret;
10911  			}
10912  			continue;
10913  		}
10914  
10915  		if (!btf_type_is_ptr(t)) {
10916  			verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
10917  			return -EINVAL;
10918  		}
10919  
10920  		if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
10921  		    (register_is_null(reg) || type_may_be_null(reg->type))) {
10922  			verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
10923  			return -EACCES;
10924  		}
10925  
10926  		if (reg->ref_obj_id) {
10927  			if (is_kfunc_release(meta) && meta->ref_obj_id) {
10928  				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
10929  					regno, reg->ref_obj_id,
10930  					meta->ref_obj_id);
10931  				return -EFAULT;
10932  			}
10933  			meta->ref_obj_id = reg->ref_obj_id;
10934  			if (is_kfunc_release(meta))
10935  				meta->release_regno = regno;
10936  		}
10937  
10938  		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
10939  		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
10940  
10941  		kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
10942  		if (kf_arg_type < 0)
10943  			return kf_arg_type;
10944  
10945  		switch (kf_arg_type) {
10946  		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
10947  		case KF_ARG_PTR_TO_BTF_ID:
10948  			if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
10949  				break;
10950  
10951  			if (!is_trusted_reg(reg)) {
10952  				if (!is_kfunc_rcu(meta)) {
10953  					verbose(env, "R%d must be referenced or trusted\n", regno);
10954  					return -EINVAL;
10955  				}
10956  				if (!is_rcu_reg(reg)) {
10957  					verbose(env, "R%d must be a rcu pointer\n", regno);
10958  					return -EINVAL;
10959  				}
10960  			}
10961  
10962  			fallthrough;
10963  		case KF_ARG_PTR_TO_CTX:
10964  			/* Trusted arguments have the same offset checks as release arguments */
10965  			arg_type |= OBJ_RELEASE;
10966  			break;
10967  		case KF_ARG_PTR_TO_DYNPTR:
10968  		case KF_ARG_PTR_TO_ITER:
10969  		case KF_ARG_PTR_TO_LIST_HEAD:
10970  		case KF_ARG_PTR_TO_LIST_NODE:
10971  		case KF_ARG_PTR_TO_RB_ROOT:
10972  		case KF_ARG_PTR_TO_RB_NODE:
10973  		case KF_ARG_PTR_TO_MEM:
10974  		case KF_ARG_PTR_TO_MEM_SIZE:
10975  		case KF_ARG_PTR_TO_CALLBACK:
10976  		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
10977  			/* Trusted by default */
10978  			break;
10979  		default:
10980  			WARN_ON_ONCE(1);
10981  			return -EFAULT;
10982  		}
10983  
10984  		if (is_kfunc_release(meta) && reg->ref_obj_id)
10985  			arg_type |= OBJ_RELEASE;
10986  		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
10987  		if (ret < 0)
10988  			return ret;
10989  
10990  		switch (kf_arg_type) {
10991  		case KF_ARG_PTR_TO_CTX:
10992  			if (reg->type != PTR_TO_CTX) {
10993  				verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
10994  				return -EINVAL;
10995  			}
10996  
10997  			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
10998  				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
10999  				if (ret < 0)
11000  					return -EINVAL;
11001  				meta->ret_btf_id  = ret;
11002  			}
11003  			break;
11004  		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11005  			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11006  				verbose(env, "arg#%d expected pointer to allocated object\n", i);
11007  				return -EINVAL;
11008  			}
11009  			if (!reg->ref_obj_id) {
11010  				verbose(env, "allocated object must be referenced\n");
11011  				return -EINVAL;
11012  			}
11013  			if (meta->btf == btf_vmlinux &&
11014  			    meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
11015  				meta->arg_btf = reg->btf;
11016  				meta->arg_btf_id = reg->btf_id;
11017  			}
11018  			break;
11019  		case KF_ARG_PTR_TO_DYNPTR:
11020  		{
11021  			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
11022  			int clone_ref_obj_id = 0;
11023  
11024  			if (reg->type != PTR_TO_STACK &&
11025  			    reg->type != CONST_PTR_TO_DYNPTR) {
11026  				verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
11027  				return -EINVAL;
11028  			}
11029  
11030  			if (reg->type == CONST_PTR_TO_DYNPTR)
11031  				dynptr_arg_type |= MEM_RDONLY;
11032  
11033  			if (is_kfunc_arg_uninit(btf, &args[i]))
11034  				dynptr_arg_type |= MEM_UNINIT;
11035  
11036  			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
11037  				dynptr_arg_type |= DYNPTR_TYPE_SKB;
11038  			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
11039  				dynptr_arg_type |= DYNPTR_TYPE_XDP;
11040  			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
11041  				   (dynptr_arg_type & MEM_UNINIT)) {
11042  				enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
11043  
11044  				if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
11045  					verbose(env, "verifier internal error: no dynptr type for parent of clone\n");
11046  					return -EFAULT;
11047  				}
11048  
11049  				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
11050  				clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
11051  				if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
11052  					verbose(env, "verifier internal error: missing ref obj id for parent of clone\n");
11053  					return -EFAULT;
11054  				}
11055  			}
11056  
11057  			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
11058  			if (ret < 0)
11059  				return ret;
11060  
11061  			if (!(dynptr_arg_type & MEM_UNINIT)) {
11062  				int id = dynptr_id(env, reg);
11063  
11064  				if (id < 0) {
11065  					verbose(env, "verifier internal error: failed to obtain dynptr id\n");
11066  					return id;
11067  				}
11068  				meta->initialized_dynptr.id = id;
11069  				meta->initialized_dynptr.type = dynptr_get_type(env, reg);
11070  				meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
11071  			}
11072  
11073  			break;
11074  		}
11075  		case KF_ARG_PTR_TO_ITER:
11076  			ret = process_iter_arg(env, regno, insn_idx, meta);
11077  			if (ret < 0)
11078  				return ret;
11079  			break;
11080  		case KF_ARG_PTR_TO_LIST_HEAD:
11081  			if (reg->type != PTR_TO_MAP_VALUE &&
11082  			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11083  				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
11084  				return -EINVAL;
11085  			}
11086  			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
11087  				verbose(env, "allocated object must be referenced\n");
11088  				return -EINVAL;
11089  			}
11090  			ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
11091  			if (ret < 0)
11092  				return ret;
11093  			break;
11094  		case KF_ARG_PTR_TO_RB_ROOT:
11095  			if (reg->type != PTR_TO_MAP_VALUE &&
11096  			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11097  				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
11098  				return -EINVAL;
11099  			}
11100  			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
11101  				verbose(env, "allocated object must be referenced\n");
11102  				return -EINVAL;
11103  			}
11104  			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
11105  			if (ret < 0)
11106  				return ret;
11107  			break;
11108  		case KF_ARG_PTR_TO_LIST_NODE:
11109  			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11110  				verbose(env, "arg#%d expected pointer to allocated object\n", i);
11111  				return -EINVAL;
11112  			}
11113  			if (!reg->ref_obj_id) {
11114  				verbose(env, "allocated object must be referenced\n");
11115  				return -EINVAL;
11116  			}
11117  			ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
11118  			if (ret < 0)
11119  				return ret;
11120  			break;
11121  		case KF_ARG_PTR_TO_RB_NODE:
11122  			if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
11123  				if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
11124  					verbose(env, "rbtree_remove node input must be non-owning ref\n");
11125  					return -EINVAL;
11126  				}
11127  				if (in_rbtree_lock_required_cb(env)) {
11128  					verbose(env, "rbtree_remove not allowed in rbtree cb\n");
11129  					return -EINVAL;
11130  				}
11131  			} else {
11132  				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11133  					verbose(env, "arg#%d expected pointer to allocated object\n", i);
11134  					return -EINVAL;
11135  				}
11136  				if (!reg->ref_obj_id) {
11137  					verbose(env, "allocated object must be referenced\n");
11138  					return -EINVAL;
11139  				}
11140  			}
11141  
11142  			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
11143  			if (ret < 0)
11144  				return ret;
11145  			break;
11146  		case KF_ARG_PTR_TO_BTF_ID:
11147  			/* Only base_type is checked, further checks are done here */
11148  			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
11149  			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
11150  			    !reg2btf_ids[base_type(reg->type)]) {
11151  				verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
11152  				verbose(env, "expected %s or socket\n",
11153  					reg_type_str(env, base_type(reg->type) |
11154  							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
11155  				return -EINVAL;
11156  			}
11157  			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
11158  			if (ret < 0)
11159  				return ret;
11160  			break;
11161  		case KF_ARG_PTR_TO_MEM:
11162  			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
11163  			if (IS_ERR(resolve_ret)) {
11164  				verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
11165  					i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
11166  				return -EINVAL;
11167  			}
11168  			ret = check_mem_reg(env, reg, regno, type_size);
11169  			if (ret < 0)
11170  				return ret;
11171  			break;
11172  		case KF_ARG_PTR_TO_MEM_SIZE:
11173  		{
11174  			struct bpf_reg_state *buff_reg = &regs[regno];
11175  			const struct btf_param *buff_arg = &args[i];
11176  			struct bpf_reg_state *size_reg = &regs[regno + 1];
11177  			const struct btf_param *size_arg = &args[i + 1];
11178  
11179  			if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) {
11180  				ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
11181  				if (ret < 0) {
11182  					verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
11183  					return ret;
11184  				}
11185  			}
11186  
11187  			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
11188  				if (meta->arg_constant.found) {
11189  					verbose(env, "verifier internal error: only one constant argument permitted\n");
11190  					return -EFAULT;
11191  				}
11192  				if (!tnum_is_const(size_reg->var_off)) {
11193  					verbose(env, "R%d must be a known constant\n", regno + 1);
11194  					return -EINVAL;
11195  				}
11196  				meta->arg_constant.found = true;
11197  				meta->arg_constant.value = size_reg->var_off.value;
11198  			}
11199  
11200  			/* Skip next '__sz' or '__szk' argument */
11201  			i++;
11202  			break;
11203  		}
11204  		case KF_ARG_PTR_TO_CALLBACK:
11205  			meta->subprogno = reg->subprogno;
11206  			break;
11207  		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
11208  			if (!type_is_ptr_alloc_obj(reg->type)) {
11209  				verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
11210  				return -EINVAL;
11211  			}
11212  			if (!type_is_non_owning_ref(reg->type))
11213  				meta->arg_owning_ref = true;
11214  
11215  			rec = reg_btf_record(reg);
11216  			if (!rec) {
11217  				verbose(env, "verifier internal error: Couldn't find btf_record\n");
11218  				return -EFAULT;
11219  			}
11220  
11221  			if (rec->refcount_off < 0) {
11222  				verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
11223  				return -EINVAL;
11224  			}
11225  
11226  			meta->arg_btf = reg->btf;
11227  			meta->arg_btf_id = reg->btf_id;
11228  			break;
11229  		}
11230  	}
11231  
11232  	if (is_kfunc_release(meta) && !meta->release_regno) {
11233  		verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
11234  			func_name);
11235  		return -EINVAL;
11236  	}
11237  
11238  	return 0;
11239  }
11240  
fetch_kfunc_meta(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_kfunc_call_arg_meta * meta,const char ** kfunc_name)11241  static int fetch_kfunc_meta(struct bpf_verifier_env *env,
11242  			    struct bpf_insn *insn,
11243  			    struct bpf_kfunc_call_arg_meta *meta,
11244  			    const char **kfunc_name)
11245  {
11246  	const struct btf_type *func, *func_proto;
11247  	u32 func_id, *kfunc_flags;
11248  	const char *func_name;
11249  	struct btf *desc_btf;
11250  
11251  	if (kfunc_name)
11252  		*kfunc_name = NULL;
11253  
11254  	if (!insn->imm)
11255  		return -EINVAL;
11256  
11257  	desc_btf = find_kfunc_desc_btf(env, insn->off);
11258  	if (IS_ERR(desc_btf))
11259  		return PTR_ERR(desc_btf);
11260  
11261  	func_id = insn->imm;
11262  	func = btf_type_by_id(desc_btf, func_id);
11263  	func_name = btf_name_by_offset(desc_btf, func->name_off);
11264  	if (kfunc_name)
11265  		*kfunc_name = func_name;
11266  	func_proto = btf_type_by_id(desc_btf, func->type);
11267  
11268  	kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
11269  	if (!kfunc_flags) {
11270  		return -EACCES;
11271  	}
11272  
11273  	memset(meta, 0, sizeof(*meta));
11274  	meta->btf = desc_btf;
11275  	meta->func_id = func_id;
11276  	meta->kfunc_flags = *kfunc_flags;
11277  	meta->func_proto = func_proto;
11278  	meta->func_name = func_name;
11279  
11280  	return 0;
11281  }
11282  
check_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)11283  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
11284  			    int *insn_idx_p)
11285  {
11286  	const struct btf_type *t, *ptr_type;
11287  	u32 i, nargs, ptr_type_id, release_ref_obj_id;
11288  	struct bpf_reg_state *regs = cur_regs(env);
11289  	const char *func_name, *ptr_type_name;
11290  	bool sleepable, rcu_lock, rcu_unlock;
11291  	struct bpf_kfunc_call_arg_meta meta;
11292  	struct bpf_insn_aux_data *insn_aux;
11293  	int err, insn_idx = *insn_idx_p;
11294  	const struct btf_param *args;
11295  	const struct btf_type *ret_t;
11296  	struct btf *desc_btf;
11297  
11298  	/* skip for now, but return error when we find this in fixup_kfunc_call */
11299  	if (!insn->imm)
11300  		return 0;
11301  
11302  	err = fetch_kfunc_meta(env, insn, &meta, &func_name);
11303  	if (err == -EACCES && func_name)
11304  		verbose(env, "calling kernel function %s is not allowed\n", func_name);
11305  	if (err)
11306  		return err;
11307  	desc_btf = meta.btf;
11308  	insn_aux = &env->insn_aux_data[insn_idx];
11309  
11310  	insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
11311  
11312  	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
11313  		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
11314  		return -EACCES;
11315  	}
11316  
11317  	sleepable = is_kfunc_sleepable(&meta);
11318  	if (sleepable && !env->prog->aux->sleepable) {
11319  		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
11320  		return -EACCES;
11321  	}
11322  
11323  	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
11324  	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
11325  
11326  	if (env->cur_state->active_rcu_lock) {
11327  		struct bpf_func_state *state;
11328  		struct bpf_reg_state *reg;
11329  
11330  		if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
11331  			verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
11332  			return -EACCES;
11333  		}
11334  
11335  		if (rcu_lock) {
11336  			verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
11337  			return -EINVAL;
11338  		} else if (rcu_unlock) {
11339  			bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
11340  				if (reg->type & MEM_RCU) {
11341  					reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
11342  					reg->type |= PTR_UNTRUSTED;
11343  				}
11344  			}));
11345  			env->cur_state->active_rcu_lock = false;
11346  		} else if (sleepable) {
11347  			verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
11348  			return -EACCES;
11349  		}
11350  	} else if (rcu_lock) {
11351  		env->cur_state->active_rcu_lock = true;
11352  	} else if (rcu_unlock) {
11353  		verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
11354  		return -EINVAL;
11355  	}
11356  
11357  	/* Check the arguments */
11358  	err = check_kfunc_args(env, &meta, insn_idx);
11359  	if (err < 0)
11360  		return err;
11361  	/* In case of release function, we get register number of refcounted
11362  	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
11363  	 */
11364  	if (meta.release_regno) {
11365  		err = release_reference(env, regs[meta.release_regno].ref_obj_id);
11366  		if (err) {
11367  			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
11368  				func_name, meta.func_id);
11369  			return err;
11370  		}
11371  	}
11372  
11373  	if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11374  	    meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
11375  	    meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
11376  		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
11377  		insn_aux->insert_off = regs[BPF_REG_2].off;
11378  		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
11379  		err = ref_convert_owning_non_owning(env, release_ref_obj_id);
11380  		if (err) {
11381  			verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
11382  				func_name, meta.func_id);
11383  			return err;
11384  		}
11385  
11386  		err = release_reference(env, release_ref_obj_id);
11387  		if (err) {
11388  			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
11389  				func_name, meta.func_id);
11390  			return err;
11391  		}
11392  	}
11393  
11394  	if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
11395  		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
11396  					set_rbtree_add_callback_state);
11397  		if (err) {
11398  			verbose(env, "kfunc %s#%d failed callback verification\n",
11399  				func_name, meta.func_id);
11400  			return err;
11401  		}
11402  	}
11403  
11404  	for (i = 0; i < CALLER_SAVED_REGS; i++)
11405  		mark_reg_not_init(env, regs, caller_saved[i]);
11406  
11407  	/* Check return type */
11408  	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
11409  
11410  	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
11411  		/* Only exception is bpf_obj_new_impl */
11412  		if (meta.btf != btf_vmlinux ||
11413  		    (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
11414  		     meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
11415  			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
11416  			return -EINVAL;
11417  		}
11418  	}
11419  
11420  	if (btf_type_is_scalar(t)) {
11421  		mark_reg_unknown(env, regs, BPF_REG_0);
11422  		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
11423  	} else if (btf_type_is_ptr(t)) {
11424  		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
11425  
11426  		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
11427  			if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
11428  				struct btf *ret_btf;
11429  				u32 ret_btf_id;
11430  
11431  				if (unlikely(!bpf_global_ma_set))
11432  					return -ENOMEM;
11433  
11434  				if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
11435  					verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
11436  					return -EINVAL;
11437  				}
11438  
11439  				ret_btf = env->prog->aux->btf;
11440  				ret_btf_id = meta.arg_constant.value;
11441  
11442  				/* This may be NULL due to user not supplying a BTF */
11443  				if (!ret_btf) {
11444  					verbose(env, "bpf_obj_new requires prog BTF\n");
11445  					return -EINVAL;
11446  				}
11447  
11448  				ret_t = btf_type_by_id(ret_btf, ret_btf_id);
11449  				if (!ret_t || !__btf_type_is_struct(ret_t)) {
11450  					verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
11451  					return -EINVAL;
11452  				}
11453  
11454  				mark_reg_known_zero(env, regs, BPF_REG_0);
11455  				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
11456  				regs[BPF_REG_0].btf = ret_btf;
11457  				regs[BPF_REG_0].btf_id = ret_btf_id;
11458  
11459  				insn_aux->obj_new_size = ret_t->size;
11460  				insn_aux->kptr_struct_meta =
11461  					btf_find_struct_meta(ret_btf, ret_btf_id);
11462  			} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
11463  				mark_reg_known_zero(env, regs, BPF_REG_0);
11464  				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
11465  				regs[BPF_REG_0].btf = meta.arg_btf;
11466  				regs[BPF_REG_0].btf_id = meta.arg_btf_id;
11467  
11468  				insn_aux->kptr_struct_meta =
11469  					btf_find_struct_meta(meta.arg_btf,
11470  							     meta.arg_btf_id);
11471  			} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11472  				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
11473  				struct btf_field *field = meta.arg_list_head.field;
11474  
11475  				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
11476  			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11477  				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
11478  				struct btf_field *field = meta.arg_rbtree_root.field;
11479  
11480  				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
11481  			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
11482  				mark_reg_known_zero(env, regs, BPF_REG_0);
11483  				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
11484  				regs[BPF_REG_0].btf = desc_btf;
11485  				regs[BPF_REG_0].btf_id = meta.ret_btf_id;
11486  			} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
11487  				ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
11488  				if (!ret_t || !btf_type_is_struct(ret_t)) {
11489  					verbose(env,
11490  						"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
11491  					return -EINVAL;
11492  				}
11493  
11494  				mark_reg_known_zero(env, regs, BPF_REG_0);
11495  				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
11496  				regs[BPF_REG_0].btf = desc_btf;
11497  				regs[BPF_REG_0].btf_id = meta.arg_constant.value;
11498  			} else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
11499  				   meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
11500  				enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
11501  
11502  				mark_reg_known_zero(env, regs, BPF_REG_0);
11503  
11504  				if (!meta.arg_constant.found) {
11505  					verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
11506  					return -EFAULT;
11507  				}
11508  
11509  				regs[BPF_REG_0].mem_size = meta.arg_constant.value;
11510  
11511  				/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
11512  				regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
11513  
11514  				if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
11515  					regs[BPF_REG_0].type |= MEM_RDONLY;
11516  				} else {
11517  					/* this will set env->seen_direct_write to true */
11518  					if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
11519  						verbose(env, "the prog does not allow writes to packet data\n");
11520  						return -EINVAL;
11521  					}
11522  				}
11523  
11524  				if (!meta.initialized_dynptr.id) {
11525  					verbose(env, "verifier internal error: no dynptr id\n");
11526  					return -EFAULT;
11527  				}
11528  				regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
11529  
11530  				/* we don't need to set BPF_REG_0's ref obj id
11531  				 * because packet slices are not refcounted (see
11532  				 * dynptr_type_refcounted)
11533  				 */
11534  			} else {
11535  				verbose(env, "kernel function %s unhandled dynamic return type\n",
11536  					meta.func_name);
11537  				return -EFAULT;
11538  			}
11539  		} else if (!__btf_type_is_struct(ptr_type)) {
11540  			if (!meta.r0_size) {
11541  				__u32 sz;
11542  
11543  				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
11544  					meta.r0_size = sz;
11545  					meta.r0_rdonly = true;
11546  				}
11547  			}
11548  			if (!meta.r0_size) {
11549  				ptr_type_name = btf_name_by_offset(desc_btf,
11550  								   ptr_type->name_off);
11551  				verbose(env,
11552  					"kernel function %s returns pointer type %s %s is not supported\n",
11553  					func_name,
11554  					btf_type_str(ptr_type),
11555  					ptr_type_name);
11556  				return -EINVAL;
11557  			}
11558  
11559  			mark_reg_known_zero(env, regs, BPF_REG_0);
11560  			regs[BPF_REG_0].type = PTR_TO_MEM;
11561  			regs[BPF_REG_0].mem_size = meta.r0_size;
11562  
11563  			if (meta.r0_rdonly)
11564  				regs[BPF_REG_0].type |= MEM_RDONLY;
11565  
11566  			/* Ensures we don't access the memory after a release_reference() */
11567  			if (meta.ref_obj_id)
11568  				regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
11569  		} else {
11570  			mark_reg_known_zero(env, regs, BPF_REG_0);
11571  			regs[BPF_REG_0].btf = desc_btf;
11572  			regs[BPF_REG_0].type = PTR_TO_BTF_ID;
11573  			regs[BPF_REG_0].btf_id = ptr_type_id;
11574  		}
11575  
11576  		if (is_kfunc_ret_null(&meta)) {
11577  			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
11578  			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
11579  			regs[BPF_REG_0].id = ++env->id_gen;
11580  		}
11581  		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
11582  		if (is_kfunc_acquire(&meta)) {
11583  			int id = acquire_reference_state(env, insn_idx);
11584  
11585  			if (id < 0)
11586  				return id;
11587  			if (is_kfunc_ret_null(&meta))
11588  				regs[BPF_REG_0].id = id;
11589  			regs[BPF_REG_0].ref_obj_id = id;
11590  		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
11591  			ref_set_non_owning(env, &regs[BPF_REG_0]);
11592  		}
11593  
11594  		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
11595  			regs[BPF_REG_0].id = ++env->id_gen;
11596  	} else if (btf_type_is_void(t)) {
11597  		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
11598  			if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
11599  				insn_aux->kptr_struct_meta =
11600  					btf_find_struct_meta(meta.arg_btf,
11601  							     meta.arg_btf_id);
11602  			}
11603  		}
11604  	}
11605  
11606  	nargs = btf_type_vlen(meta.func_proto);
11607  	args = (const struct btf_param *)(meta.func_proto + 1);
11608  	for (i = 0; i < nargs; i++) {
11609  		u32 regno = i + 1;
11610  
11611  		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
11612  		if (btf_type_is_ptr(t))
11613  			mark_btf_func_reg_size(env, regno, sizeof(void *));
11614  		else
11615  			/* scalar. ensured by btf_check_kfunc_arg_match() */
11616  			mark_btf_func_reg_size(env, regno, t->size);
11617  	}
11618  
11619  	if (is_iter_next_kfunc(&meta)) {
11620  		err = process_iter_next_call(env, insn_idx, &meta);
11621  		if (err)
11622  			return err;
11623  	}
11624  
11625  	return 0;
11626  }
11627  
signed_add_overflows(s64 a,s64 b)11628  static bool signed_add_overflows(s64 a, s64 b)
11629  {
11630  	/* Do the add in u64, where overflow is well-defined */
11631  	s64 res = (s64)((u64)a + (u64)b);
11632  
11633  	if (b < 0)
11634  		return res > a;
11635  	return res < a;
11636  }
11637  
signed_add32_overflows(s32 a,s32 b)11638  static bool signed_add32_overflows(s32 a, s32 b)
11639  {
11640  	/* Do the add in u32, where overflow is well-defined */
11641  	s32 res = (s32)((u32)a + (u32)b);
11642  
11643  	if (b < 0)
11644  		return res > a;
11645  	return res < a;
11646  }
11647  
signed_sub_overflows(s64 a,s64 b)11648  static bool signed_sub_overflows(s64 a, s64 b)
11649  {
11650  	/* Do the sub in u64, where overflow is well-defined */
11651  	s64 res = (s64)((u64)a - (u64)b);
11652  
11653  	if (b < 0)
11654  		return res < a;
11655  	return res > a;
11656  }
11657  
signed_sub32_overflows(s32 a,s32 b)11658  static bool signed_sub32_overflows(s32 a, s32 b)
11659  {
11660  	/* Do the sub in u32, where overflow is well-defined */
11661  	s32 res = (s32)((u32)a - (u32)b);
11662  
11663  	if (b < 0)
11664  		return res < a;
11665  	return res > a;
11666  }
11667  
check_reg_sane_offset(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,enum bpf_reg_type type)11668  static bool check_reg_sane_offset(struct bpf_verifier_env *env,
11669  				  const struct bpf_reg_state *reg,
11670  				  enum bpf_reg_type type)
11671  {
11672  	bool known = tnum_is_const(reg->var_off);
11673  	s64 val = reg->var_off.value;
11674  	s64 smin = reg->smin_value;
11675  
11676  	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
11677  		verbose(env, "math between %s pointer and %lld is not allowed\n",
11678  			reg_type_str(env, type), val);
11679  		return false;
11680  	}
11681  
11682  	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
11683  		verbose(env, "%s pointer offset %d is not allowed\n",
11684  			reg_type_str(env, type), reg->off);
11685  		return false;
11686  	}
11687  
11688  	if (smin == S64_MIN) {
11689  		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
11690  			reg_type_str(env, type));
11691  		return false;
11692  	}
11693  
11694  	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
11695  		verbose(env, "value %lld makes %s pointer be out of bounds\n",
11696  			smin, reg_type_str(env, type));
11697  		return false;
11698  	}
11699  
11700  	return true;
11701  }
11702  
11703  enum {
11704  	REASON_BOUNDS	= -1,
11705  	REASON_TYPE	= -2,
11706  	REASON_PATHS	= -3,
11707  	REASON_LIMIT	= -4,
11708  	REASON_STACK	= -5,
11709  };
11710  
retrieve_ptr_limit(const struct bpf_reg_state * ptr_reg,u32 * alu_limit,bool mask_to_left)11711  static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
11712  			      u32 *alu_limit, bool mask_to_left)
11713  {
11714  	u32 max = 0, ptr_limit = 0;
11715  
11716  	switch (ptr_reg->type) {
11717  	case PTR_TO_STACK:
11718  		/* Offset 0 is out-of-bounds, but acceptable start for the
11719  		 * left direction, see BPF_REG_FP. Also, unknown scalar
11720  		 * offset where we would need to deal with min/max bounds is
11721  		 * currently prohibited for unprivileged.
11722  		 */
11723  		max = MAX_BPF_STACK + mask_to_left;
11724  		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
11725  		break;
11726  	case PTR_TO_MAP_VALUE:
11727  		max = ptr_reg->map_ptr->value_size;
11728  		ptr_limit = (mask_to_left ?
11729  			     ptr_reg->smin_value :
11730  			     ptr_reg->umax_value) + ptr_reg->off;
11731  		break;
11732  	default:
11733  		return REASON_TYPE;
11734  	}
11735  
11736  	if (ptr_limit >= max)
11737  		return REASON_LIMIT;
11738  	*alu_limit = ptr_limit;
11739  	return 0;
11740  }
11741  
can_skip_alu_sanitation(const struct bpf_verifier_env * env,const struct bpf_insn * insn)11742  static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
11743  				    const struct bpf_insn *insn)
11744  {
11745  	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
11746  }
11747  
update_alu_sanitation_state(struct bpf_insn_aux_data * aux,u32 alu_state,u32 alu_limit)11748  static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
11749  				       u32 alu_state, u32 alu_limit)
11750  {
11751  	/* If we arrived here from different branches with different
11752  	 * state or limits to sanitize, then this won't work.
11753  	 */
11754  	if (aux->alu_state &&
11755  	    (aux->alu_state != alu_state ||
11756  	     aux->alu_limit != alu_limit))
11757  		return REASON_PATHS;
11758  
11759  	/* Corresponding fixup done in do_misc_fixups(). */
11760  	aux->alu_state = alu_state;
11761  	aux->alu_limit = alu_limit;
11762  	return 0;
11763  }
11764  
sanitize_val_alu(struct bpf_verifier_env * env,struct bpf_insn * insn)11765  static int sanitize_val_alu(struct bpf_verifier_env *env,
11766  			    struct bpf_insn *insn)
11767  {
11768  	struct bpf_insn_aux_data *aux = cur_aux(env);
11769  
11770  	if (can_skip_alu_sanitation(env, insn))
11771  		return 0;
11772  
11773  	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
11774  }
11775  
sanitize_needed(u8 opcode)11776  static bool sanitize_needed(u8 opcode)
11777  {
11778  	return opcode == BPF_ADD || opcode == BPF_SUB;
11779  }
11780  
11781  struct bpf_sanitize_info {
11782  	struct bpf_insn_aux_data aux;
11783  	bool mask_to_left;
11784  };
11785  
11786  static struct bpf_verifier_state *
sanitize_speculative_path(struct bpf_verifier_env * env,const struct bpf_insn * insn,u32 next_idx,u32 curr_idx)11787  sanitize_speculative_path(struct bpf_verifier_env *env,
11788  			  const struct bpf_insn *insn,
11789  			  u32 next_idx, u32 curr_idx)
11790  {
11791  	struct bpf_verifier_state *branch;
11792  	struct bpf_reg_state *regs;
11793  
11794  	branch = push_stack(env, next_idx, curr_idx, true);
11795  	if (branch && insn) {
11796  		regs = branch->frame[branch->curframe]->regs;
11797  		if (BPF_SRC(insn->code) == BPF_K) {
11798  			mark_reg_unknown(env, regs, insn->dst_reg);
11799  		} else if (BPF_SRC(insn->code) == BPF_X) {
11800  			mark_reg_unknown(env, regs, insn->dst_reg);
11801  			mark_reg_unknown(env, regs, insn->src_reg);
11802  		}
11803  	}
11804  	return branch;
11805  }
11806  
sanitize_ptr_alu(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg,struct bpf_reg_state * dst_reg,struct bpf_sanitize_info * info,const bool commit_window)11807  static int sanitize_ptr_alu(struct bpf_verifier_env *env,
11808  			    struct bpf_insn *insn,
11809  			    const struct bpf_reg_state *ptr_reg,
11810  			    const struct bpf_reg_state *off_reg,
11811  			    struct bpf_reg_state *dst_reg,
11812  			    struct bpf_sanitize_info *info,
11813  			    const bool commit_window)
11814  {
11815  	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
11816  	struct bpf_verifier_state *vstate = env->cur_state;
11817  	bool off_is_imm = tnum_is_const(off_reg->var_off);
11818  	bool off_is_neg = off_reg->smin_value < 0;
11819  	bool ptr_is_dst_reg = ptr_reg == dst_reg;
11820  	u8 opcode = BPF_OP(insn->code);
11821  	u32 alu_state, alu_limit;
11822  	struct bpf_reg_state tmp;
11823  	bool ret;
11824  	int err;
11825  
11826  	if (can_skip_alu_sanitation(env, insn))
11827  		return 0;
11828  
11829  	/* We already marked aux for masking from non-speculative
11830  	 * paths, thus we got here in the first place. We only care
11831  	 * to explore bad access from here.
11832  	 */
11833  	if (vstate->speculative)
11834  		goto do_sim;
11835  
11836  	if (!commit_window) {
11837  		if (!tnum_is_const(off_reg->var_off) &&
11838  		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
11839  			return REASON_BOUNDS;
11840  
11841  		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
11842  				     (opcode == BPF_SUB && !off_is_neg);
11843  	}
11844  
11845  	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
11846  	if (err < 0)
11847  		return err;
11848  
11849  	if (commit_window) {
11850  		/* In commit phase we narrow the masking window based on
11851  		 * the observed pointer move after the simulated operation.
11852  		 */
11853  		alu_state = info->aux.alu_state;
11854  		alu_limit = abs(info->aux.alu_limit - alu_limit);
11855  	} else {
11856  		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
11857  		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
11858  		alu_state |= ptr_is_dst_reg ?
11859  			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
11860  
11861  		/* Limit pruning on unknown scalars to enable deep search for
11862  		 * potential masking differences from other program paths.
11863  		 */
11864  		if (!off_is_imm)
11865  			env->explore_alu_limits = true;
11866  	}
11867  
11868  	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
11869  	if (err < 0)
11870  		return err;
11871  do_sim:
11872  	/* If we're in commit phase, we're done here given we already
11873  	 * pushed the truncated dst_reg into the speculative verification
11874  	 * stack.
11875  	 *
11876  	 * Also, when register is a known constant, we rewrite register-based
11877  	 * operation to immediate-based, and thus do not need masking (and as
11878  	 * a consequence, do not need to simulate the zero-truncation either).
11879  	 */
11880  	if (commit_window || off_is_imm)
11881  		return 0;
11882  
11883  	/* Simulate and find potential out-of-bounds access under
11884  	 * speculative execution from truncation as a result of
11885  	 * masking when off was not within expected range. If off
11886  	 * sits in dst, then we temporarily need to move ptr there
11887  	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
11888  	 * for cases where we use K-based arithmetic in one direction
11889  	 * and truncated reg-based in the other in order to explore
11890  	 * bad access.
11891  	 */
11892  	if (!ptr_is_dst_reg) {
11893  		tmp = *dst_reg;
11894  		copy_register_state(dst_reg, ptr_reg);
11895  	}
11896  	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
11897  					env->insn_idx);
11898  	if (!ptr_is_dst_reg && ret)
11899  		*dst_reg = tmp;
11900  	return !ret ? REASON_STACK : 0;
11901  }
11902  
sanitize_mark_insn_seen(struct bpf_verifier_env * env)11903  static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
11904  {
11905  	struct bpf_verifier_state *vstate = env->cur_state;
11906  
11907  	/* If we simulate paths under speculation, we don't update the
11908  	 * insn as 'seen' such that when we verify unreachable paths in
11909  	 * the non-speculative domain, sanitize_dead_code() can still
11910  	 * rewrite/sanitize them.
11911  	 */
11912  	if (!vstate->speculative)
11913  		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
11914  }
11915  
sanitize_err(struct bpf_verifier_env * env,const struct bpf_insn * insn,int reason,const struct bpf_reg_state * off_reg,const struct bpf_reg_state * dst_reg)11916  static int sanitize_err(struct bpf_verifier_env *env,
11917  			const struct bpf_insn *insn, int reason,
11918  			const struct bpf_reg_state *off_reg,
11919  			const struct bpf_reg_state *dst_reg)
11920  {
11921  	static const char *err = "pointer arithmetic with it prohibited for !root";
11922  	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
11923  	u32 dst = insn->dst_reg, src = insn->src_reg;
11924  
11925  	switch (reason) {
11926  	case REASON_BOUNDS:
11927  		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
11928  			off_reg == dst_reg ? dst : src, err);
11929  		break;
11930  	case REASON_TYPE:
11931  		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
11932  			off_reg == dst_reg ? src : dst, err);
11933  		break;
11934  	case REASON_PATHS:
11935  		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
11936  			dst, op, err);
11937  		break;
11938  	case REASON_LIMIT:
11939  		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
11940  			dst, op, err);
11941  		break;
11942  	case REASON_STACK:
11943  		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
11944  			dst, err);
11945  		break;
11946  	default:
11947  		verbose(env, "verifier internal error: unknown reason (%d)\n",
11948  			reason);
11949  		break;
11950  	}
11951  
11952  	return -EACCES;
11953  }
11954  
11955  /* check that stack access falls within stack limits and that 'reg' doesn't
11956   * have a variable offset.
11957   *
11958   * Variable offset is prohibited for unprivileged mode for simplicity since it
11959   * requires corresponding support in Spectre masking for stack ALU.  See also
11960   * retrieve_ptr_limit().
11961   *
11962   *
11963   * 'off' includes 'reg->off'.
11964   */
check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env * env,int regno,const struct bpf_reg_state * reg,int off)11965  static int check_stack_access_for_ptr_arithmetic(
11966  				struct bpf_verifier_env *env,
11967  				int regno,
11968  				const struct bpf_reg_state *reg,
11969  				int off)
11970  {
11971  	if (!tnum_is_const(reg->var_off)) {
11972  		char tn_buf[48];
11973  
11974  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
11975  		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
11976  			regno, tn_buf, off);
11977  		return -EACCES;
11978  	}
11979  
11980  	if (off >= 0 || off < -MAX_BPF_STACK) {
11981  		verbose(env, "R%d stack pointer arithmetic goes out of range, "
11982  			"prohibited for !root; off=%d\n", regno, off);
11983  		return -EACCES;
11984  	}
11985  
11986  	return 0;
11987  }
11988  
sanitize_check_bounds(struct bpf_verifier_env * env,const struct bpf_insn * insn,const struct bpf_reg_state * dst_reg)11989  static int sanitize_check_bounds(struct bpf_verifier_env *env,
11990  				 const struct bpf_insn *insn,
11991  				 const struct bpf_reg_state *dst_reg)
11992  {
11993  	u32 dst = insn->dst_reg;
11994  
11995  	/* For unprivileged we require that resulting offset must be in bounds
11996  	 * in order to be able to sanitize access later on.
11997  	 */
11998  	if (env->bypass_spec_v1)
11999  		return 0;
12000  
12001  	switch (dst_reg->type) {
12002  	case PTR_TO_STACK:
12003  		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
12004  					dst_reg->off + dst_reg->var_off.value))
12005  			return -EACCES;
12006  		break;
12007  	case PTR_TO_MAP_VALUE:
12008  		if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
12009  			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
12010  				"prohibited for !root\n", dst);
12011  			return -EACCES;
12012  		}
12013  		break;
12014  	default:
12015  		break;
12016  	}
12017  
12018  	return 0;
12019  }
12020  
12021  /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
12022   * Caller should also handle BPF_MOV case separately.
12023   * If we return -EACCES, caller may want to try again treating pointer as a
12024   * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
12025   */
adjust_ptr_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg)12026  static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
12027  				   struct bpf_insn *insn,
12028  				   const struct bpf_reg_state *ptr_reg,
12029  				   const struct bpf_reg_state *off_reg)
12030  {
12031  	struct bpf_verifier_state *vstate = env->cur_state;
12032  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
12033  	struct bpf_reg_state *regs = state->regs, *dst_reg;
12034  	bool known = tnum_is_const(off_reg->var_off);
12035  	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
12036  	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
12037  	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
12038  	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
12039  	struct bpf_sanitize_info info = {};
12040  	u8 opcode = BPF_OP(insn->code);
12041  	u32 dst = insn->dst_reg;
12042  	int ret;
12043  
12044  	dst_reg = &regs[dst];
12045  
12046  	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
12047  	    smin_val > smax_val || umin_val > umax_val) {
12048  		/* Taint dst register if offset had invalid bounds derived from
12049  		 * e.g. dead branches.
12050  		 */
12051  		__mark_reg_unknown(env, dst_reg);
12052  		return 0;
12053  	}
12054  
12055  	if (BPF_CLASS(insn->code) != BPF_ALU64) {
12056  		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
12057  		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
12058  			__mark_reg_unknown(env, dst_reg);
12059  			return 0;
12060  		}
12061  
12062  		verbose(env,
12063  			"R%d 32-bit pointer arithmetic prohibited\n",
12064  			dst);
12065  		return -EACCES;
12066  	}
12067  
12068  	if (ptr_reg->type & PTR_MAYBE_NULL) {
12069  		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
12070  			dst, reg_type_str(env, ptr_reg->type));
12071  		return -EACCES;
12072  	}
12073  
12074  	switch (base_type(ptr_reg->type)) {
12075  	case CONST_PTR_TO_MAP:
12076  		/* smin_val represents the known value */
12077  		if (known && smin_val == 0 && opcode == BPF_ADD)
12078  			break;
12079  		fallthrough;
12080  	case PTR_TO_PACKET_END:
12081  	case PTR_TO_SOCKET:
12082  	case PTR_TO_SOCK_COMMON:
12083  	case PTR_TO_TCP_SOCK:
12084  	case PTR_TO_XDP_SOCK:
12085  		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
12086  			dst, reg_type_str(env, ptr_reg->type));
12087  		return -EACCES;
12088  	default:
12089  		break;
12090  	}
12091  
12092  	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
12093  	 * The id may be overwritten later if we create a new variable offset.
12094  	 */
12095  	dst_reg->type = ptr_reg->type;
12096  	dst_reg->id = ptr_reg->id;
12097  
12098  	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
12099  	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
12100  		return -EINVAL;
12101  
12102  	/* pointer types do not carry 32-bit bounds at the moment. */
12103  	__mark_reg32_unbounded(dst_reg);
12104  
12105  	if (sanitize_needed(opcode)) {
12106  		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
12107  				       &info, false);
12108  		if (ret < 0)
12109  			return sanitize_err(env, insn, ret, off_reg, dst_reg);
12110  	}
12111  
12112  	switch (opcode) {
12113  	case BPF_ADD:
12114  		/* We can take a fixed offset as long as it doesn't overflow
12115  		 * the s32 'off' field
12116  		 */
12117  		if (known && (ptr_reg->off + smin_val ==
12118  			      (s64)(s32)(ptr_reg->off + smin_val))) {
12119  			/* pointer += K.  Accumulate it into fixed offset */
12120  			dst_reg->smin_value = smin_ptr;
12121  			dst_reg->smax_value = smax_ptr;
12122  			dst_reg->umin_value = umin_ptr;
12123  			dst_reg->umax_value = umax_ptr;
12124  			dst_reg->var_off = ptr_reg->var_off;
12125  			dst_reg->off = ptr_reg->off + smin_val;
12126  			dst_reg->raw = ptr_reg->raw;
12127  			break;
12128  		}
12129  		/* A new variable offset is created.  Note that off_reg->off
12130  		 * == 0, since it's a scalar.
12131  		 * dst_reg gets the pointer type and since some positive
12132  		 * integer value was added to the pointer, give it a new 'id'
12133  		 * if it's a PTR_TO_PACKET.
12134  		 * this creates a new 'base' pointer, off_reg (variable) gets
12135  		 * added into the variable offset, and we copy the fixed offset
12136  		 * from ptr_reg.
12137  		 */
12138  		if (signed_add_overflows(smin_ptr, smin_val) ||
12139  		    signed_add_overflows(smax_ptr, smax_val)) {
12140  			dst_reg->smin_value = S64_MIN;
12141  			dst_reg->smax_value = S64_MAX;
12142  		} else {
12143  			dst_reg->smin_value = smin_ptr + smin_val;
12144  			dst_reg->smax_value = smax_ptr + smax_val;
12145  		}
12146  		if (umin_ptr + umin_val < umin_ptr ||
12147  		    umax_ptr + umax_val < umax_ptr) {
12148  			dst_reg->umin_value = 0;
12149  			dst_reg->umax_value = U64_MAX;
12150  		} else {
12151  			dst_reg->umin_value = umin_ptr + umin_val;
12152  			dst_reg->umax_value = umax_ptr + umax_val;
12153  		}
12154  		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
12155  		dst_reg->off = ptr_reg->off;
12156  		dst_reg->raw = ptr_reg->raw;
12157  		if (reg_is_pkt_pointer(ptr_reg)) {
12158  			dst_reg->id = ++env->id_gen;
12159  			/* something was added to pkt_ptr, set range to zero */
12160  			memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
12161  		}
12162  		break;
12163  	case BPF_SUB:
12164  		if (dst_reg == off_reg) {
12165  			/* scalar -= pointer.  Creates an unknown scalar */
12166  			verbose(env, "R%d tried to subtract pointer from scalar\n",
12167  				dst);
12168  			return -EACCES;
12169  		}
12170  		/* We don't allow subtraction from FP, because (according to
12171  		 * test_verifier.c test "invalid fp arithmetic", JITs might not
12172  		 * be able to deal with it.
12173  		 */
12174  		if (ptr_reg->type == PTR_TO_STACK) {
12175  			verbose(env, "R%d subtraction from stack pointer prohibited\n",
12176  				dst);
12177  			return -EACCES;
12178  		}
12179  		if (known && (ptr_reg->off - smin_val ==
12180  			      (s64)(s32)(ptr_reg->off - smin_val))) {
12181  			/* pointer -= K.  Subtract it from fixed offset */
12182  			dst_reg->smin_value = smin_ptr;
12183  			dst_reg->smax_value = smax_ptr;
12184  			dst_reg->umin_value = umin_ptr;
12185  			dst_reg->umax_value = umax_ptr;
12186  			dst_reg->var_off = ptr_reg->var_off;
12187  			dst_reg->id = ptr_reg->id;
12188  			dst_reg->off = ptr_reg->off - smin_val;
12189  			dst_reg->raw = ptr_reg->raw;
12190  			break;
12191  		}
12192  		/* A new variable offset is created.  If the subtrahend is known
12193  		 * nonnegative, then any reg->range we had before is still good.
12194  		 */
12195  		if (signed_sub_overflows(smin_ptr, smax_val) ||
12196  		    signed_sub_overflows(smax_ptr, smin_val)) {
12197  			/* Overflow possible, we know nothing */
12198  			dst_reg->smin_value = S64_MIN;
12199  			dst_reg->smax_value = S64_MAX;
12200  		} else {
12201  			dst_reg->smin_value = smin_ptr - smax_val;
12202  			dst_reg->smax_value = smax_ptr - smin_val;
12203  		}
12204  		if (umin_ptr < umax_val) {
12205  			/* Overflow possible, we know nothing */
12206  			dst_reg->umin_value = 0;
12207  			dst_reg->umax_value = U64_MAX;
12208  		} else {
12209  			/* Cannot overflow (as long as bounds are consistent) */
12210  			dst_reg->umin_value = umin_ptr - umax_val;
12211  			dst_reg->umax_value = umax_ptr - umin_val;
12212  		}
12213  		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
12214  		dst_reg->off = ptr_reg->off;
12215  		dst_reg->raw = ptr_reg->raw;
12216  		if (reg_is_pkt_pointer(ptr_reg)) {
12217  			dst_reg->id = ++env->id_gen;
12218  			/* something was added to pkt_ptr, set range to zero */
12219  			if (smin_val < 0)
12220  				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
12221  		}
12222  		break;
12223  	case BPF_AND:
12224  	case BPF_OR:
12225  	case BPF_XOR:
12226  		/* bitwise ops on pointers are troublesome, prohibit. */
12227  		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
12228  			dst, bpf_alu_string[opcode >> 4]);
12229  		return -EACCES;
12230  	default:
12231  		/* other operators (e.g. MUL,LSH) produce non-pointer results */
12232  		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
12233  			dst, bpf_alu_string[opcode >> 4]);
12234  		return -EACCES;
12235  	}
12236  
12237  	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
12238  		return -EINVAL;
12239  	reg_bounds_sync(dst_reg);
12240  	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
12241  		return -EACCES;
12242  	if (sanitize_needed(opcode)) {
12243  		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
12244  				       &info, true);
12245  		if (ret < 0)
12246  			return sanitize_err(env, insn, ret, off_reg, dst_reg);
12247  	}
12248  
12249  	return 0;
12250  }
12251  
scalar32_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12252  static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
12253  				 struct bpf_reg_state *src_reg)
12254  {
12255  	s32 smin_val = src_reg->s32_min_value;
12256  	s32 smax_val = src_reg->s32_max_value;
12257  	u32 umin_val = src_reg->u32_min_value;
12258  	u32 umax_val = src_reg->u32_max_value;
12259  
12260  	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
12261  	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
12262  		dst_reg->s32_min_value = S32_MIN;
12263  		dst_reg->s32_max_value = S32_MAX;
12264  	} else {
12265  		dst_reg->s32_min_value += smin_val;
12266  		dst_reg->s32_max_value += smax_val;
12267  	}
12268  	if (dst_reg->u32_min_value + umin_val < umin_val ||
12269  	    dst_reg->u32_max_value + umax_val < umax_val) {
12270  		dst_reg->u32_min_value = 0;
12271  		dst_reg->u32_max_value = U32_MAX;
12272  	} else {
12273  		dst_reg->u32_min_value += umin_val;
12274  		dst_reg->u32_max_value += umax_val;
12275  	}
12276  }
12277  
scalar_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12278  static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
12279  			       struct bpf_reg_state *src_reg)
12280  {
12281  	s64 smin_val = src_reg->smin_value;
12282  	s64 smax_val = src_reg->smax_value;
12283  	u64 umin_val = src_reg->umin_value;
12284  	u64 umax_val = src_reg->umax_value;
12285  
12286  	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
12287  	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
12288  		dst_reg->smin_value = S64_MIN;
12289  		dst_reg->smax_value = S64_MAX;
12290  	} else {
12291  		dst_reg->smin_value += smin_val;
12292  		dst_reg->smax_value += smax_val;
12293  	}
12294  	if (dst_reg->umin_value + umin_val < umin_val ||
12295  	    dst_reg->umax_value + umax_val < umax_val) {
12296  		dst_reg->umin_value = 0;
12297  		dst_reg->umax_value = U64_MAX;
12298  	} else {
12299  		dst_reg->umin_value += umin_val;
12300  		dst_reg->umax_value += umax_val;
12301  	}
12302  }
12303  
scalar32_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12304  static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
12305  				 struct bpf_reg_state *src_reg)
12306  {
12307  	s32 smin_val = src_reg->s32_min_value;
12308  	s32 smax_val = src_reg->s32_max_value;
12309  	u32 umin_val = src_reg->u32_min_value;
12310  	u32 umax_val = src_reg->u32_max_value;
12311  
12312  	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
12313  	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
12314  		/* Overflow possible, we know nothing */
12315  		dst_reg->s32_min_value = S32_MIN;
12316  		dst_reg->s32_max_value = S32_MAX;
12317  	} else {
12318  		dst_reg->s32_min_value -= smax_val;
12319  		dst_reg->s32_max_value -= smin_val;
12320  	}
12321  	if (dst_reg->u32_min_value < umax_val) {
12322  		/* Overflow possible, we know nothing */
12323  		dst_reg->u32_min_value = 0;
12324  		dst_reg->u32_max_value = U32_MAX;
12325  	} else {
12326  		/* Cannot overflow (as long as bounds are consistent) */
12327  		dst_reg->u32_min_value -= umax_val;
12328  		dst_reg->u32_max_value -= umin_val;
12329  	}
12330  }
12331  
scalar_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12332  static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
12333  			       struct bpf_reg_state *src_reg)
12334  {
12335  	s64 smin_val = src_reg->smin_value;
12336  	s64 smax_val = src_reg->smax_value;
12337  	u64 umin_val = src_reg->umin_value;
12338  	u64 umax_val = src_reg->umax_value;
12339  
12340  	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
12341  	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
12342  		/* Overflow possible, we know nothing */
12343  		dst_reg->smin_value = S64_MIN;
12344  		dst_reg->smax_value = S64_MAX;
12345  	} else {
12346  		dst_reg->smin_value -= smax_val;
12347  		dst_reg->smax_value -= smin_val;
12348  	}
12349  	if (dst_reg->umin_value < umax_val) {
12350  		/* Overflow possible, we know nothing */
12351  		dst_reg->umin_value = 0;
12352  		dst_reg->umax_value = U64_MAX;
12353  	} else {
12354  		/* Cannot overflow (as long as bounds are consistent) */
12355  		dst_reg->umin_value -= umax_val;
12356  		dst_reg->umax_value -= umin_val;
12357  	}
12358  }
12359  
scalar32_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12360  static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
12361  				 struct bpf_reg_state *src_reg)
12362  {
12363  	s32 smin_val = src_reg->s32_min_value;
12364  	u32 umin_val = src_reg->u32_min_value;
12365  	u32 umax_val = src_reg->u32_max_value;
12366  
12367  	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
12368  		/* Ain't nobody got time to multiply that sign */
12369  		__mark_reg32_unbounded(dst_reg);
12370  		return;
12371  	}
12372  	/* Both values are positive, so we can work with unsigned and
12373  	 * copy the result to signed (unless it exceeds S32_MAX).
12374  	 */
12375  	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
12376  		/* Potential overflow, we know nothing */
12377  		__mark_reg32_unbounded(dst_reg);
12378  		return;
12379  	}
12380  	dst_reg->u32_min_value *= umin_val;
12381  	dst_reg->u32_max_value *= umax_val;
12382  	if (dst_reg->u32_max_value > S32_MAX) {
12383  		/* Overflow possible, we know nothing */
12384  		dst_reg->s32_min_value = S32_MIN;
12385  		dst_reg->s32_max_value = S32_MAX;
12386  	} else {
12387  		dst_reg->s32_min_value = dst_reg->u32_min_value;
12388  		dst_reg->s32_max_value = dst_reg->u32_max_value;
12389  	}
12390  }
12391  
scalar_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12392  static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
12393  			       struct bpf_reg_state *src_reg)
12394  {
12395  	s64 smin_val = src_reg->smin_value;
12396  	u64 umin_val = src_reg->umin_value;
12397  	u64 umax_val = src_reg->umax_value;
12398  
12399  	if (smin_val < 0 || dst_reg->smin_value < 0) {
12400  		/* Ain't nobody got time to multiply that sign */
12401  		__mark_reg64_unbounded(dst_reg);
12402  		return;
12403  	}
12404  	/* Both values are positive, so we can work with unsigned and
12405  	 * copy the result to signed (unless it exceeds S64_MAX).
12406  	 */
12407  	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
12408  		/* Potential overflow, we know nothing */
12409  		__mark_reg64_unbounded(dst_reg);
12410  		return;
12411  	}
12412  	dst_reg->umin_value *= umin_val;
12413  	dst_reg->umax_value *= umax_val;
12414  	if (dst_reg->umax_value > S64_MAX) {
12415  		/* Overflow possible, we know nothing */
12416  		dst_reg->smin_value = S64_MIN;
12417  		dst_reg->smax_value = S64_MAX;
12418  	} else {
12419  		dst_reg->smin_value = dst_reg->umin_value;
12420  		dst_reg->smax_value = dst_reg->umax_value;
12421  	}
12422  }
12423  
scalar32_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12424  static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
12425  				 struct bpf_reg_state *src_reg)
12426  {
12427  	bool src_known = tnum_subreg_is_const(src_reg->var_off);
12428  	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
12429  	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
12430  	s32 smin_val = src_reg->s32_min_value;
12431  	u32 umax_val = src_reg->u32_max_value;
12432  
12433  	if (src_known && dst_known) {
12434  		__mark_reg32_known(dst_reg, var32_off.value);
12435  		return;
12436  	}
12437  
12438  	/* We get our minimum from the var_off, since that's inherently
12439  	 * bitwise.  Our maximum is the minimum of the operands' maxima.
12440  	 */
12441  	dst_reg->u32_min_value = var32_off.value;
12442  	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
12443  	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
12444  		/* Lose signed bounds when ANDing negative numbers,
12445  		 * ain't nobody got time for that.
12446  		 */
12447  		dst_reg->s32_min_value = S32_MIN;
12448  		dst_reg->s32_max_value = S32_MAX;
12449  	} else {
12450  		/* ANDing two positives gives a positive, so safe to
12451  		 * cast result into s64.
12452  		 */
12453  		dst_reg->s32_min_value = dst_reg->u32_min_value;
12454  		dst_reg->s32_max_value = dst_reg->u32_max_value;
12455  	}
12456  }
12457  
scalar_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12458  static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
12459  			       struct bpf_reg_state *src_reg)
12460  {
12461  	bool src_known = tnum_is_const(src_reg->var_off);
12462  	bool dst_known = tnum_is_const(dst_reg->var_off);
12463  	s64 smin_val = src_reg->smin_value;
12464  	u64 umax_val = src_reg->umax_value;
12465  
12466  	if (src_known && dst_known) {
12467  		__mark_reg_known(dst_reg, dst_reg->var_off.value);
12468  		return;
12469  	}
12470  
12471  	/* We get our minimum from the var_off, since that's inherently
12472  	 * bitwise.  Our maximum is the minimum of the operands' maxima.
12473  	 */
12474  	dst_reg->umin_value = dst_reg->var_off.value;
12475  	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
12476  	if (dst_reg->smin_value < 0 || smin_val < 0) {
12477  		/* Lose signed bounds when ANDing negative numbers,
12478  		 * ain't nobody got time for that.
12479  		 */
12480  		dst_reg->smin_value = S64_MIN;
12481  		dst_reg->smax_value = S64_MAX;
12482  	} else {
12483  		/* ANDing two positives gives a positive, so safe to
12484  		 * cast result into s64.
12485  		 */
12486  		dst_reg->smin_value = dst_reg->umin_value;
12487  		dst_reg->smax_value = dst_reg->umax_value;
12488  	}
12489  	/* We may learn something more from the var_off */
12490  	__update_reg_bounds(dst_reg);
12491  }
12492  
scalar32_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12493  static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
12494  				struct bpf_reg_state *src_reg)
12495  {
12496  	bool src_known = tnum_subreg_is_const(src_reg->var_off);
12497  	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
12498  	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
12499  	s32 smin_val = src_reg->s32_min_value;
12500  	u32 umin_val = src_reg->u32_min_value;
12501  
12502  	if (src_known && dst_known) {
12503  		__mark_reg32_known(dst_reg, var32_off.value);
12504  		return;
12505  	}
12506  
12507  	/* We get our maximum from the var_off, and our minimum is the
12508  	 * maximum of the operands' minima
12509  	 */
12510  	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
12511  	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
12512  	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
12513  		/* Lose signed bounds when ORing negative numbers,
12514  		 * ain't nobody got time for that.
12515  		 */
12516  		dst_reg->s32_min_value = S32_MIN;
12517  		dst_reg->s32_max_value = S32_MAX;
12518  	} else {
12519  		/* ORing two positives gives a positive, so safe to
12520  		 * cast result into s64.
12521  		 */
12522  		dst_reg->s32_min_value = dst_reg->u32_min_value;
12523  		dst_reg->s32_max_value = dst_reg->u32_max_value;
12524  	}
12525  }
12526  
scalar_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12527  static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
12528  			      struct bpf_reg_state *src_reg)
12529  {
12530  	bool src_known = tnum_is_const(src_reg->var_off);
12531  	bool dst_known = tnum_is_const(dst_reg->var_off);
12532  	s64 smin_val = src_reg->smin_value;
12533  	u64 umin_val = src_reg->umin_value;
12534  
12535  	if (src_known && dst_known) {
12536  		__mark_reg_known(dst_reg, dst_reg->var_off.value);
12537  		return;
12538  	}
12539  
12540  	/* We get our maximum from the var_off, and our minimum is the
12541  	 * maximum of the operands' minima
12542  	 */
12543  	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
12544  	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
12545  	if (dst_reg->smin_value < 0 || smin_val < 0) {
12546  		/* Lose signed bounds when ORing negative numbers,
12547  		 * ain't nobody got time for that.
12548  		 */
12549  		dst_reg->smin_value = S64_MIN;
12550  		dst_reg->smax_value = S64_MAX;
12551  	} else {
12552  		/* ORing two positives gives a positive, so safe to
12553  		 * cast result into s64.
12554  		 */
12555  		dst_reg->smin_value = dst_reg->umin_value;
12556  		dst_reg->smax_value = dst_reg->umax_value;
12557  	}
12558  	/* We may learn something more from the var_off */
12559  	__update_reg_bounds(dst_reg);
12560  }
12561  
scalar32_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12562  static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
12563  				 struct bpf_reg_state *src_reg)
12564  {
12565  	bool src_known = tnum_subreg_is_const(src_reg->var_off);
12566  	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
12567  	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
12568  	s32 smin_val = src_reg->s32_min_value;
12569  
12570  	if (src_known && dst_known) {
12571  		__mark_reg32_known(dst_reg, var32_off.value);
12572  		return;
12573  	}
12574  
12575  	/* We get both minimum and maximum from the var32_off. */
12576  	dst_reg->u32_min_value = var32_off.value;
12577  	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
12578  
12579  	if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
12580  		/* XORing two positive sign numbers gives a positive,
12581  		 * so safe to cast u32 result into s32.
12582  		 */
12583  		dst_reg->s32_min_value = dst_reg->u32_min_value;
12584  		dst_reg->s32_max_value = dst_reg->u32_max_value;
12585  	} else {
12586  		dst_reg->s32_min_value = S32_MIN;
12587  		dst_reg->s32_max_value = S32_MAX;
12588  	}
12589  }
12590  
scalar_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12591  static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
12592  			       struct bpf_reg_state *src_reg)
12593  {
12594  	bool src_known = tnum_is_const(src_reg->var_off);
12595  	bool dst_known = tnum_is_const(dst_reg->var_off);
12596  	s64 smin_val = src_reg->smin_value;
12597  
12598  	if (src_known && dst_known) {
12599  		/* dst_reg->var_off.value has been updated earlier */
12600  		__mark_reg_known(dst_reg, dst_reg->var_off.value);
12601  		return;
12602  	}
12603  
12604  	/* We get both minimum and maximum from the var_off. */
12605  	dst_reg->umin_value = dst_reg->var_off.value;
12606  	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
12607  
12608  	if (dst_reg->smin_value >= 0 && smin_val >= 0) {
12609  		/* XORing two positive sign numbers gives a positive,
12610  		 * so safe to cast u64 result into s64.
12611  		 */
12612  		dst_reg->smin_value = dst_reg->umin_value;
12613  		dst_reg->smax_value = dst_reg->umax_value;
12614  	} else {
12615  		dst_reg->smin_value = S64_MIN;
12616  		dst_reg->smax_value = S64_MAX;
12617  	}
12618  
12619  	__update_reg_bounds(dst_reg);
12620  }
12621  
__scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)12622  static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
12623  				   u64 umin_val, u64 umax_val)
12624  {
12625  	/* We lose all sign bit information (except what we can pick
12626  	 * up from var_off)
12627  	 */
12628  	dst_reg->s32_min_value = S32_MIN;
12629  	dst_reg->s32_max_value = S32_MAX;
12630  	/* If we might shift our top bit out, then we know nothing */
12631  	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
12632  		dst_reg->u32_min_value = 0;
12633  		dst_reg->u32_max_value = U32_MAX;
12634  	} else {
12635  		dst_reg->u32_min_value <<= umin_val;
12636  		dst_reg->u32_max_value <<= umax_val;
12637  	}
12638  }
12639  
scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12640  static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
12641  				 struct bpf_reg_state *src_reg)
12642  {
12643  	u32 umax_val = src_reg->u32_max_value;
12644  	u32 umin_val = src_reg->u32_min_value;
12645  	/* u32 alu operation will zext upper bits */
12646  	struct tnum subreg = tnum_subreg(dst_reg->var_off);
12647  
12648  	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
12649  	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
12650  	/* Not required but being careful mark reg64 bounds as unknown so
12651  	 * that we are forced to pick them up from tnum and zext later and
12652  	 * if some path skips this step we are still safe.
12653  	 */
12654  	__mark_reg64_unbounded(dst_reg);
12655  	__update_reg32_bounds(dst_reg);
12656  }
12657  
__scalar64_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)12658  static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
12659  				   u64 umin_val, u64 umax_val)
12660  {
12661  	/* Special case <<32 because it is a common compiler pattern to sign
12662  	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
12663  	 * positive we know this shift will also be positive so we can track
12664  	 * bounds correctly. Otherwise we lose all sign bit information except
12665  	 * what we can pick up from var_off. Perhaps we can generalize this
12666  	 * later to shifts of any length.
12667  	 */
12668  	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
12669  		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
12670  	else
12671  		dst_reg->smax_value = S64_MAX;
12672  
12673  	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
12674  		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
12675  	else
12676  		dst_reg->smin_value = S64_MIN;
12677  
12678  	/* If we might shift our top bit out, then we know nothing */
12679  	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
12680  		dst_reg->umin_value = 0;
12681  		dst_reg->umax_value = U64_MAX;
12682  	} else {
12683  		dst_reg->umin_value <<= umin_val;
12684  		dst_reg->umax_value <<= umax_val;
12685  	}
12686  }
12687  
scalar_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12688  static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
12689  			       struct bpf_reg_state *src_reg)
12690  {
12691  	u64 umax_val = src_reg->umax_value;
12692  	u64 umin_val = src_reg->umin_value;
12693  
12694  	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
12695  	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
12696  	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
12697  
12698  	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
12699  	/* We may learn something more from the var_off */
12700  	__update_reg_bounds(dst_reg);
12701  }
12702  
scalar32_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12703  static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
12704  				 struct bpf_reg_state *src_reg)
12705  {
12706  	struct tnum subreg = tnum_subreg(dst_reg->var_off);
12707  	u32 umax_val = src_reg->u32_max_value;
12708  	u32 umin_val = src_reg->u32_min_value;
12709  
12710  	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
12711  	 * be negative, then either:
12712  	 * 1) src_reg might be zero, so the sign bit of the result is
12713  	 *    unknown, so we lose our signed bounds
12714  	 * 2) it's known negative, thus the unsigned bounds capture the
12715  	 *    signed bounds
12716  	 * 3) the signed bounds cross zero, so they tell us nothing
12717  	 *    about the result
12718  	 * If the value in dst_reg is known nonnegative, then again the
12719  	 * unsigned bounds capture the signed bounds.
12720  	 * Thus, in all cases it suffices to blow away our signed bounds
12721  	 * and rely on inferring new ones from the unsigned bounds and
12722  	 * var_off of the result.
12723  	 */
12724  	dst_reg->s32_min_value = S32_MIN;
12725  	dst_reg->s32_max_value = S32_MAX;
12726  
12727  	dst_reg->var_off = tnum_rshift(subreg, umin_val);
12728  	dst_reg->u32_min_value >>= umax_val;
12729  	dst_reg->u32_max_value >>= umin_val;
12730  
12731  	__mark_reg64_unbounded(dst_reg);
12732  	__update_reg32_bounds(dst_reg);
12733  }
12734  
scalar_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12735  static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
12736  			       struct bpf_reg_state *src_reg)
12737  {
12738  	u64 umax_val = src_reg->umax_value;
12739  	u64 umin_val = src_reg->umin_value;
12740  
12741  	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
12742  	 * be negative, then either:
12743  	 * 1) src_reg might be zero, so the sign bit of the result is
12744  	 *    unknown, so we lose our signed bounds
12745  	 * 2) it's known negative, thus the unsigned bounds capture the
12746  	 *    signed bounds
12747  	 * 3) the signed bounds cross zero, so they tell us nothing
12748  	 *    about the result
12749  	 * If the value in dst_reg is known nonnegative, then again the
12750  	 * unsigned bounds capture the signed bounds.
12751  	 * Thus, in all cases it suffices to blow away our signed bounds
12752  	 * and rely on inferring new ones from the unsigned bounds and
12753  	 * var_off of the result.
12754  	 */
12755  	dst_reg->smin_value = S64_MIN;
12756  	dst_reg->smax_value = S64_MAX;
12757  	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
12758  	dst_reg->umin_value >>= umax_val;
12759  	dst_reg->umax_value >>= umin_val;
12760  
12761  	/* Its not easy to operate on alu32 bounds here because it depends
12762  	 * on bits being shifted in. Take easy way out and mark unbounded
12763  	 * so we can recalculate later from tnum.
12764  	 */
12765  	__mark_reg32_unbounded(dst_reg);
12766  	__update_reg_bounds(dst_reg);
12767  }
12768  
scalar32_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12769  static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
12770  				  struct bpf_reg_state *src_reg)
12771  {
12772  	u64 umin_val = src_reg->u32_min_value;
12773  
12774  	/* Upon reaching here, src_known is true and
12775  	 * umax_val is equal to umin_val.
12776  	 */
12777  	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
12778  	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
12779  
12780  	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
12781  
12782  	/* blow away the dst_reg umin_value/umax_value and rely on
12783  	 * dst_reg var_off to refine the result.
12784  	 */
12785  	dst_reg->u32_min_value = 0;
12786  	dst_reg->u32_max_value = U32_MAX;
12787  
12788  	__mark_reg64_unbounded(dst_reg);
12789  	__update_reg32_bounds(dst_reg);
12790  }
12791  
scalar_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)12792  static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
12793  				struct bpf_reg_state *src_reg)
12794  {
12795  	u64 umin_val = src_reg->umin_value;
12796  
12797  	/* Upon reaching here, src_known is true and umax_val is equal
12798  	 * to umin_val.
12799  	 */
12800  	dst_reg->smin_value >>= umin_val;
12801  	dst_reg->smax_value >>= umin_val;
12802  
12803  	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
12804  
12805  	/* blow away the dst_reg umin_value/umax_value and rely on
12806  	 * dst_reg var_off to refine the result.
12807  	 */
12808  	dst_reg->umin_value = 0;
12809  	dst_reg->umax_value = U64_MAX;
12810  
12811  	/* Its not easy to operate on alu32 bounds here because it depends
12812  	 * on bits being shifted in from upper 32-bits. Take easy way out
12813  	 * and mark unbounded so we can recalculate later from tnum.
12814  	 */
12815  	__mark_reg32_unbounded(dst_reg);
12816  	__update_reg_bounds(dst_reg);
12817  }
12818  
12819  /* WARNING: This function does calculations on 64-bit values, but the actual
12820   * execution may occur on 32-bit values. Therefore, things like bitshifts
12821   * need extra checks in the 32-bit case.
12822   */
adjust_scalar_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state src_reg)12823  static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
12824  				      struct bpf_insn *insn,
12825  				      struct bpf_reg_state *dst_reg,
12826  				      struct bpf_reg_state src_reg)
12827  {
12828  	struct bpf_reg_state *regs = cur_regs(env);
12829  	u8 opcode = BPF_OP(insn->code);
12830  	bool src_known;
12831  	s64 smin_val, smax_val;
12832  	u64 umin_val, umax_val;
12833  	s32 s32_min_val, s32_max_val;
12834  	u32 u32_min_val, u32_max_val;
12835  	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
12836  	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
12837  	int ret;
12838  
12839  	smin_val = src_reg.smin_value;
12840  	smax_val = src_reg.smax_value;
12841  	umin_val = src_reg.umin_value;
12842  	umax_val = src_reg.umax_value;
12843  
12844  	s32_min_val = src_reg.s32_min_value;
12845  	s32_max_val = src_reg.s32_max_value;
12846  	u32_min_val = src_reg.u32_min_value;
12847  	u32_max_val = src_reg.u32_max_value;
12848  
12849  	if (alu32) {
12850  		src_known = tnum_subreg_is_const(src_reg.var_off);
12851  		if ((src_known &&
12852  		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
12853  		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
12854  			/* Taint dst register if offset had invalid bounds
12855  			 * derived from e.g. dead branches.
12856  			 */
12857  			__mark_reg_unknown(env, dst_reg);
12858  			return 0;
12859  		}
12860  	} else {
12861  		src_known = tnum_is_const(src_reg.var_off);
12862  		if ((src_known &&
12863  		     (smin_val != smax_val || umin_val != umax_val)) ||
12864  		    smin_val > smax_val || umin_val > umax_val) {
12865  			/* Taint dst register if offset had invalid bounds
12866  			 * derived from e.g. dead branches.
12867  			 */
12868  			__mark_reg_unknown(env, dst_reg);
12869  			return 0;
12870  		}
12871  	}
12872  
12873  	if (!src_known &&
12874  	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
12875  		__mark_reg_unknown(env, dst_reg);
12876  		return 0;
12877  	}
12878  
12879  	if (sanitize_needed(opcode)) {
12880  		ret = sanitize_val_alu(env, insn);
12881  		if (ret < 0)
12882  			return sanitize_err(env, insn, ret, NULL, NULL);
12883  	}
12884  
12885  	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
12886  	 * There are two classes of instructions: The first class we track both
12887  	 * alu32 and alu64 sign/unsigned bounds independently this provides the
12888  	 * greatest amount of precision when alu operations are mixed with jmp32
12889  	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
12890  	 * and BPF_OR. This is possible because these ops have fairly easy to
12891  	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
12892  	 * See alu32 verifier tests for examples. The second class of
12893  	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
12894  	 * with regards to tracking sign/unsigned bounds because the bits may
12895  	 * cross subreg boundaries in the alu64 case. When this happens we mark
12896  	 * the reg unbounded in the subreg bound space and use the resulting
12897  	 * tnum to calculate an approximation of the sign/unsigned bounds.
12898  	 */
12899  	switch (opcode) {
12900  	case BPF_ADD:
12901  		scalar32_min_max_add(dst_reg, &src_reg);
12902  		scalar_min_max_add(dst_reg, &src_reg);
12903  		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
12904  		break;
12905  	case BPF_SUB:
12906  		scalar32_min_max_sub(dst_reg, &src_reg);
12907  		scalar_min_max_sub(dst_reg, &src_reg);
12908  		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
12909  		break;
12910  	case BPF_MUL:
12911  		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
12912  		scalar32_min_max_mul(dst_reg, &src_reg);
12913  		scalar_min_max_mul(dst_reg, &src_reg);
12914  		break;
12915  	case BPF_AND:
12916  		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
12917  		scalar32_min_max_and(dst_reg, &src_reg);
12918  		scalar_min_max_and(dst_reg, &src_reg);
12919  		break;
12920  	case BPF_OR:
12921  		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
12922  		scalar32_min_max_or(dst_reg, &src_reg);
12923  		scalar_min_max_or(dst_reg, &src_reg);
12924  		break;
12925  	case BPF_XOR:
12926  		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
12927  		scalar32_min_max_xor(dst_reg, &src_reg);
12928  		scalar_min_max_xor(dst_reg, &src_reg);
12929  		break;
12930  	case BPF_LSH:
12931  		if (umax_val >= insn_bitness) {
12932  			/* Shifts greater than 31 or 63 are undefined.
12933  			 * This includes shifts by a negative number.
12934  			 */
12935  			mark_reg_unknown(env, regs, insn->dst_reg);
12936  			break;
12937  		}
12938  		if (alu32)
12939  			scalar32_min_max_lsh(dst_reg, &src_reg);
12940  		else
12941  			scalar_min_max_lsh(dst_reg, &src_reg);
12942  		break;
12943  	case BPF_RSH:
12944  		if (umax_val >= insn_bitness) {
12945  			/* Shifts greater than 31 or 63 are undefined.
12946  			 * This includes shifts by a negative number.
12947  			 */
12948  			mark_reg_unknown(env, regs, insn->dst_reg);
12949  			break;
12950  		}
12951  		if (alu32)
12952  			scalar32_min_max_rsh(dst_reg, &src_reg);
12953  		else
12954  			scalar_min_max_rsh(dst_reg, &src_reg);
12955  		break;
12956  	case BPF_ARSH:
12957  		if (umax_val >= insn_bitness) {
12958  			/* Shifts greater than 31 or 63 are undefined.
12959  			 * This includes shifts by a negative number.
12960  			 */
12961  			mark_reg_unknown(env, regs, insn->dst_reg);
12962  			break;
12963  		}
12964  		if (alu32)
12965  			scalar32_min_max_arsh(dst_reg, &src_reg);
12966  		else
12967  			scalar_min_max_arsh(dst_reg, &src_reg);
12968  		break;
12969  	default:
12970  		mark_reg_unknown(env, regs, insn->dst_reg);
12971  		break;
12972  	}
12973  
12974  	/* ALU32 ops are zero extended into 64bit register */
12975  	if (alu32)
12976  		zext_32_to_64(dst_reg);
12977  	reg_bounds_sync(dst_reg);
12978  	return 0;
12979  }
12980  
12981  /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
12982   * and var_off.
12983   */
adjust_reg_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn)12984  static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
12985  				   struct bpf_insn *insn)
12986  {
12987  	struct bpf_verifier_state *vstate = env->cur_state;
12988  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
12989  	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
12990  	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
12991  	u8 opcode = BPF_OP(insn->code);
12992  	int err;
12993  
12994  	dst_reg = &regs[insn->dst_reg];
12995  	src_reg = NULL;
12996  	if (dst_reg->type != SCALAR_VALUE)
12997  		ptr_reg = dst_reg;
12998  	else
12999  		/* Make sure ID is cleared otherwise dst_reg min/max could be
13000  		 * incorrectly propagated into other registers by find_equal_scalars()
13001  		 */
13002  		dst_reg->id = 0;
13003  	if (BPF_SRC(insn->code) == BPF_X) {
13004  		src_reg = &regs[insn->src_reg];
13005  		if (src_reg->type != SCALAR_VALUE) {
13006  			if (dst_reg->type != SCALAR_VALUE) {
13007  				/* Combining two pointers by any ALU op yields
13008  				 * an arbitrary scalar. Disallow all math except
13009  				 * pointer subtraction
13010  				 */
13011  				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13012  					mark_reg_unknown(env, regs, insn->dst_reg);
13013  					return 0;
13014  				}
13015  				verbose(env, "R%d pointer %s pointer prohibited\n",
13016  					insn->dst_reg,
13017  					bpf_alu_string[opcode >> 4]);
13018  				return -EACCES;
13019  			} else {
13020  				/* scalar += pointer
13021  				 * This is legal, but we have to reverse our
13022  				 * src/dest handling in computing the range
13023  				 */
13024  				err = mark_chain_precision(env, insn->dst_reg);
13025  				if (err)
13026  					return err;
13027  				return adjust_ptr_min_max_vals(env, insn,
13028  							       src_reg, dst_reg);
13029  			}
13030  		} else if (ptr_reg) {
13031  			/* pointer += scalar */
13032  			err = mark_chain_precision(env, insn->src_reg);
13033  			if (err)
13034  				return err;
13035  			return adjust_ptr_min_max_vals(env, insn,
13036  						       dst_reg, src_reg);
13037  		} else if (dst_reg->precise) {
13038  			/* if dst_reg is precise, src_reg should be precise as well */
13039  			err = mark_chain_precision(env, insn->src_reg);
13040  			if (err)
13041  				return err;
13042  		}
13043  	} else {
13044  		/* Pretend the src is a reg with a known value, since we only
13045  		 * need to be able to read from this state.
13046  		 */
13047  		off_reg.type = SCALAR_VALUE;
13048  		__mark_reg_known(&off_reg, insn->imm);
13049  		src_reg = &off_reg;
13050  		if (ptr_reg) /* pointer += K */
13051  			return adjust_ptr_min_max_vals(env, insn,
13052  						       ptr_reg, src_reg);
13053  	}
13054  
13055  	/* Got here implies adding two SCALAR_VALUEs */
13056  	if (WARN_ON_ONCE(ptr_reg)) {
13057  		print_verifier_state(env, state, true);
13058  		verbose(env, "verifier internal error: unexpected ptr_reg\n");
13059  		return -EINVAL;
13060  	}
13061  	if (WARN_ON(!src_reg)) {
13062  		print_verifier_state(env, state, true);
13063  		verbose(env, "verifier internal error: no src_reg\n");
13064  		return -EINVAL;
13065  	}
13066  	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
13067  }
13068  
13069  /* check validity of 32-bit and 64-bit arithmetic operations */
check_alu_op(struct bpf_verifier_env * env,struct bpf_insn * insn)13070  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
13071  {
13072  	struct bpf_reg_state *regs = cur_regs(env);
13073  	u8 opcode = BPF_OP(insn->code);
13074  	int err;
13075  
13076  	if (opcode == BPF_END || opcode == BPF_NEG) {
13077  		if (opcode == BPF_NEG) {
13078  			if (BPF_SRC(insn->code) != BPF_K ||
13079  			    insn->src_reg != BPF_REG_0 ||
13080  			    insn->off != 0 || insn->imm != 0) {
13081  				verbose(env, "BPF_NEG uses reserved fields\n");
13082  				return -EINVAL;
13083  			}
13084  		} else {
13085  			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
13086  			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
13087  			    (BPF_CLASS(insn->code) == BPF_ALU64 &&
13088  			     BPF_SRC(insn->code) != BPF_TO_LE)) {
13089  				verbose(env, "BPF_END uses reserved fields\n");
13090  				return -EINVAL;
13091  			}
13092  		}
13093  
13094  		/* check src operand */
13095  		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13096  		if (err)
13097  			return err;
13098  
13099  		if (is_pointer_value(env, insn->dst_reg)) {
13100  			verbose(env, "R%d pointer arithmetic prohibited\n",
13101  				insn->dst_reg);
13102  			return -EACCES;
13103  		}
13104  
13105  		/* check dest operand */
13106  		err = check_reg_arg(env, insn->dst_reg, DST_OP);
13107  		if (err)
13108  			return err;
13109  
13110  	} else if (opcode == BPF_MOV) {
13111  
13112  		if (BPF_SRC(insn->code) == BPF_X) {
13113  			if (insn->imm != 0) {
13114  				verbose(env, "BPF_MOV uses reserved fields\n");
13115  				return -EINVAL;
13116  			}
13117  
13118  			if (BPF_CLASS(insn->code) == BPF_ALU) {
13119  				if (insn->off != 0 && insn->off != 8 && insn->off != 16) {
13120  					verbose(env, "BPF_MOV uses reserved fields\n");
13121  					return -EINVAL;
13122  				}
13123  			} else {
13124  				if (insn->off != 0 && insn->off != 8 && insn->off != 16 &&
13125  				    insn->off != 32) {
13126  					verbose(env, "BPF_MOV uses reserved fields\n");
13127  					return -EINVAL;
13128  				}
13129  			}
13130  
13131  			/* check src operand */
13132  			err = check_reg_arg(env, insn->src_reg, SRC_OP);
13133  			if (err)
13134  				return err;
13135  		} else {
13136  			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
13137  				verbose(env, "BPF_MOV uses reserved fields\n");
13138  				return -EINVAL;
13139  			}
13140  		}
13141  
13142  		/* check dest operand, mark as required later */
13143  		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
13144  		if (err)
13145  			return err;
13146  
13147  		if (BPF_SRC(insn->code) == BPF_X) {
13148  			struct bpf_reg_state *src_reg = regs + insn->src_reg;
13149  			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
13150  			bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
13151  				       !tnum_is_const(src_reg->var_off);
13152  
13153  			if (BPF_CLASS(insn->code) == BPF_ALU64) {
13154  				if (insn->off == 0) {
13155  					/* case: R1 = R2
13156  					 * copy register state to dest reg
13157  					 */
13158  					if (need_id)
13159  						/* Assign src and dst registers the same ID
13160  						 * that will be used by find_equal_scalars()
13161  						 * to propagate min/max range.
13162  						 */
13163  						src_reg->id = ++env->id_gen;
13164  					copy_register_state(dst_reg, src_reg);
13165  					dst_reg->live |= REG_LIVE_WRITTEN;
13166  					dst_reg->subreg_def = DEF_NOT_SUBREG;
13167  				} else {
13168  					/* case: R1 = (s8, s16 s32)R2 */
13169  					if (is_pointer_value(env, insn->src_reg)) {
13170  						verbose(env,
13171  							"R%d sign-extension part of pointer\n",
13172  							insn->src_reg);
13173  						return -EACCES;
13174  					} else if (src_reg->type == SCALAR_VALUE) {
13175  						bool no_sext;
13176  
13177  						no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
13178  						if (no_sext && need_id)
13179  							src_reg->id = ++env->id_gen;
13180  						copy_register_state(dst_reg, src_reg);
13181  						if (!no_sext)
13182  							dst_reg->id = 0;
13183  						coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
13184  						dst_reg->live |= REG_LIVE_WRITTEN;
13185  						dst_reg->subreg_def = DEF_NOT_SUBREG;
13186  					} else {
13187  						mark_reg_unknown(env, regs, insn->dst_reg);
13188  					}
13189  				}
13190  			} else {
13191  				/* R1 = (u32) R2 */
13192  				if (is_pointer_value(env, insn->src_reg)) {
13193  					verbose(env,
13194  						"R%d partial copy of pointer\n",
13195  						insn->src_reg);
13196  					return -EACCES;
13197  				} else if (src_reg->type == SCALAR_VALUE) {
13198  					if (insn->off == 0) {
13199  						bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
13200  
13201  						if (is_src_reg_u32 && need_id)
13202  							src_reg->id = ++env->id_gen;
13203  						copy_register_state(dst_reg, src_reg);
13204  						/* Make sure ID is cleared if src_reg is not in u32
13205  						 * range otherwise dst_reg min/max could be incorrectly
13206  						 * propagated into src_reg by find_equal_scalars()
13207  						 */
13208  						if (!is_src_reg_u32)
13209  							dst_reg->id = 0;
13210  						dst_reg->live |= REG_LIVE_WRITTEN;
13211  						dst_reg->subreg_def = env->insn_idx + 1;
13212  					} else {
13213  						/* case: W1 = (s8, s16)W2 */
13214  						bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
13215  
13216  						if (no_sext && need_id)
13217  							src_reg->id = ++env->id_gen;
13218  						copy_register_state(dst_reg, src_reg);
13219  						if (!no_sext)
13220  							dst_reg->id = 0;
13221  						dst_reg->live |= REG_LIVE_WRITTEN;
13222  						dst_reg->subreg_def = env->insn_idx + 1;
13223  						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
13224  					}
13225  				} else {
13226  					mark_reg_unknown(env, regs,
13227  							 insn->dst_reg);
13228  				}
13229  				zext_32_to_64(dst_reg);
13230  				reg_bounds_sync(dst_reg);
13231  			}
13232  		} else {
13233  			/* case: R = imm
13234  			 * remember the value we stored into this reg
13235  			 */
13236  			/* clear any state __mark_reg_known doesn't set */
13237  			mark_reg_unknown(env, regs, insn->dst_reg);
13238  			regs[insn->dst_reg].type = SCALAR_VALUE;
13239  			if (BPF_CLASS(insn->code) == BPF_ALU64) {
13240  				__mark_reg_known(regs + insn->dst_reg,
13241  						 insn->imm);
13242  			} else {
13243  				__mark_reg_known(regs + insn->dst_reg,
13244  						 (u32)insn->imm);
13245  			}
13246  		}
13247  
13248  	} else if (opcode > BPF_END) {
13249  		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
13250  		return -EINVAL;
13251  
13252  	} else {	/* all other ALU ops: and, sub, xor, add, ... */
13253  
13254  		if (BPF_SRC(insn->code) == BPF_X) {
13255  			if (insn->imm != 0 || insn->off > 1 ||
13256  			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
13257  				verbose(env, "BPF_ALU uses reserved fields\n");
13258  				return -EINVAL;
13259  			}
13260  			/* check src1 operand */
13261  			err = check_reg_arg(env, insn->src_reg, SRC_OP);
13262  			if (err)
13263  				return err;
13264  		} else {
13265  			if (insn->src_reg != BPF_REG_0 || insn->off > 1 ||
13266  			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
13267  				verbose(env, "BPF_ALU uses reserved fields\n");
13268  				return -EINVAL;
13269  			}
13270  		}
13271  
13272  		/* check src2 operand */
13273  		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13274  		if (err)
13275  			return err;
13276  
13277  		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
13278  		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
13279  			verbose(env, "div by zero\n");
13280  			return -EINVAL;
13281  		}
13282  
13283  		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
13284  		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
13285  			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
13286  
13287  			if (insn->imm < 0 || insn->imm >= size) {
13288  				verbose(env, "invalid shift %d\n", insn->imm);
13289  				return -EINVAL;
13290  			}
13291  		}
13292  
13293  		/* check dest operand */
13294  		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
13295  		if (err)
13296  			return err;
13297  
13298  		return adjust_reg_min_max_vals(env, insn);
13299  	}
13300  
13301  	return 0;
13302  }
13303  
find_good_pkt_pointers(struct bpf_verifier_state * vstate,struct bpf_reg_state * dst_reg,enum bpf_reg_type type,bool range_right_open)13304  static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
13305  				   struct bpf_reg_state *dst_reg,
13306  				   enum bpf_reg_type type,
13307  				   bool range_right_open)
13308  {
13309  	struct bpf_func_state *state;
13310  	struct bpf_reg_state *reg;
13311  	int new_range;
13312  
13313  	if (dst_reg->off < 0 ||
13314  	    (dst_reg->off == 0 && range_right_open))
13315  		/* This doesn't give us any range */
13316  		return;
13317  
13318  	if (dst_reg->umax_value > MAX_PACKET_OFF ||
13319  	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
13320  		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
13321  		 * than pkt_end, but that's because it's also less than pkt.
13322  		 */
13323  		return;
13324  
13325  	new_range = dst_reg->off;
13326  	if (range_right_open)
13327  		new_range++;
13328  
13329  	/* Examples for register markings:
13330  	 *
13331  	 * pkt_data in dst register:
13332  	 *
13333  	 *   r2 = r3;
13334  	 *   r2 += 8;
13335  	 *   if (r2 > pkt_end) goto <handle exception>
13336  	 *   <access okay>
13337  	 *
13338  	 *   r2 = r3;
13339  	 *   r2 += 8;
13340  	 *   if (r2 < pkt_end) goto <access okay>
13341  	 *   <handle exception>
13342  	 *
13343  	 *   Where:
13344  	 *     r2 == dst_reg, pkt_end == src_reg
13345  	 *     r2=pkt(id=n,off=8,r=0)
13346  	 *     r3=pkt(id=n,off=0,r=0)
13347  	 *
13348  	 * pkt_data in src register:
13349  	 *
13350  	 *   r2 = r3;
13351  	 *   r2 += 8;
13352  	 *   if (pkt_end >= r2) goto <access okay>
13353  	 *   <handle exception>
13354  	 *
13355  	 *   r2 = r3;
13356  	 *   r2 += 8;
13357  	 *   if (pkt_end <= r2) goto <handle exception>
13358  	 *   <access okay>
13359  	 *
13360  	 *   Where:
13361  	 *     pkt_end == dst_reg, r2 == src_reg
13362  	 *     r2=pkt(id=n,off=8,r=0)
13363  	 *     r3=pkt(id=n,off=0,r=0)
13364  	 *
13365  	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
13366  	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
13367  	 * and [r3, r3 + 8-1) respectively is safe to access depending on
13368  	 * the check.
13369  	 */
13370  
13371  	/* If our ids match, then we must have the same max_value.  And we
13372  	 * don't care about the other reg's fixed offset, since if it's too big
13373  	 * the range won't allow anything.
13374  	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
13375  	 */
13376  	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
13377  		if (reg->type == type && reg->id == dst_reg->id)
13378  			/* keep the maximum range already checked */
13379  			reg->range = max(reg->range, new_range);
13380  	}));
13381  }
13382  
is_branch32_taken(struct bpf_reg_state * reg,u32 val,u8 opcode)13383  static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
13384  {
13385  	struct tnum subreg = tnum_subreg(reg->var_off);
13386  	s32 sval = (s32)val;
13387  
13388  	switch (opcode) {
13389  	case BPF_JEQ:
13390  		if (tnum_is_const(subreg))
13391  			return !!tnum_equals_const(subreg, val);
13392  		else if (val < reg->u32_min_value || val > reg->u32_max_value)
13393  			return 0;
13394  		break;
13395  	case BPF_JNE:
13396  		if (tnum_is_const(subreg))
13397  			return !tnum_equals_const(subreg, val);
13398  		else if (val < reg->u32_min_value || val > reg->u32_max_value)
13399  			return 1;
13400  		break;
13401  	case BPF_JSET:
13402  		if ((~subreg.mask & subreg.value) & val)
13403  			return 1;
13404  		if (!((subreg.mask | subreg.value) & val))
13405  			return 0;
13406  		break;
13407  	case BPF_JGT:
13408  		if (reg->u32_min_value > val)
13409  			return 1;
13410  		else if (reg->u32_max_value <= val)
13411  			return 0;
13412  		break;
13413  	case BPF_JSGT:
13414  		if (reg->s32_min_value > sval)
13415  			return 1;
13416  		else if (reg->s32_max_value <= sval)
13417  			return 0;
13418  		break;
13419  	case BPF_JLT:
13420  		if (reg->u32_max_value < val)
13421  			return 1;
13422  		else if (reg->u32_min_value >= val)
13423  			return 0;
13424  		break;
13425  	case BPF_JSLT:
13426  		if (reg->s32_max_value < sval)
13427  			return 1;
13428  		else if (reg->s32_min_value >= sval)
13429  			return 0;
13430  		break;
13431  	case BPF_JGE:
13432  		if (reg->u32_min_value >= val)
13433  			return 1;
13434  		else if (reg->u32_max_value < val)
13435  			return 0;
13436  		break;
13437  	case BPF_JSGE:
13438  		if (reg->s32_min_value >= sval)
13439  			return 1;
13440  		else if (reg->s32_max_value < sval)
13441  			return 0;
13442  		break;
13443  	case BPF_JLE:
13444  		if (reg->u32_max_value <= val)
13445  			return 1;
13446  		else if (reg->u32_min_value > val)
13447  			return 0;
13448  		break;
13449  	case BPF_JSLE:
13450  		if (reg->s32_max_value <= sval)
13451  			return 1;
13452  		else if (reg->s32_min_value > sval)
13453  			return 0;
13454  		break;
13455  	}
13456  
13457  	return -1;
13458  }
13459  
13460  
is_branch64_taken(struct bpf_reg_state * reg,u64 val,u8 opcode)13461  static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
13462  {
13463  	s64 sval = (s64)val;
13464  
13465  	switch (opcode) {
13466  	case BPF_JEQ:
13467  		if (tnum_is_const(reg->var_off))
13468  			return !!tnum_equals_const(reg->var_off, val);
13469  		else if (val < reg->umin_value || val > reg->umax_value)
13470  			return 0;
13471  		break;
13472  	case BPF_JNE:
13473  		if (tnum_is_const(reg->var_off))
13474  			return !tnum_equals_const(reg->var_off, val);
13475  		else if (val < reg->umin_value || val > reg->umax_value)
13476  			return 1;
13477  		break;
13478  	case BPF_JSET:
13479  		if ((~reg->var_off.mask & reg->var_off.value) & val)
13480  			return 1;
13481  		if (!((reg->var_off.mask | reg->var_off.value) & val))
13482  			return 0;
13483  		break;
13484  	case BPF_JGT:
13485  		if (reg->umin_value > val)
13486  			return 1;
13487  		else if (reg->umax_value <= val)
13488  			return 0;
13489  		break;
13490  	case BPF_JSGT:
13491  		if (reg->smin_value > sval)
13492  			return 1;
13493  		else if (reg->smax_value <= sval)
13494  			return 0;
13495  		break;
13496  	case BPF_JLT:
13497  		if (reg->umax_value < val)
13498  			return 1;
13499  		else if (reg->umin_value >= val)
13500  			return 0;
13501  		break;
13502  	case BPF_JSLT:
13503  		if (reg->smax_value < sval)
13504  			return 1;
13505  		else if (reg->smin_value >= sval)
13506  			return 0;
13507  		break;
13508  	case BPF_JGE:
13509  		if (reg->umin_value >= val)
13510  			return 1;
13511  		else if (reg->umax_value < val)
13512  			return 0;
13513  		break;
13514  	case BPF_JSGE:
13515  		if (reg->smin_value >= sval)
13516  			return 1;
13517  		else if (reg->smax_value < sval)
13518  			return 0;
13519  		break;
13520  	case BPF_JLE:
13521  		if (reg->umax_value <= val)
13522  			return 1;
13523  		else if (reg->umin_value > val)
13524  			return 0;
13525  		break;
13526  	case BPF_JSLE:
13527  		if (reg->smax_value <= sval)
13528  			return 1;
13529  		else if (reg->smin_value > sval)
13530  			return 0;
13531  		break;
13532  	}
13533  
13534  	return -1;
13535  }
13536  
13537  /* compute branch direction of the expression "if (reg opcode val) goto target;"
13538   * and return:
13539   *  1 - branch will be taken and "goto target" will be executed
13540   *  0 - branch will not be taken and fall-through to next insn
13541   * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
13542   *      range [0,10]
13543   */
is_branch_taken(struct bpf_reg_state * reg,u64 val,u8 opcode,bool is_jmp32)13544  static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
13545  			   bool is_jmp32)
13546  {
13547  	if (__is_pointer_value(false, reg)) {
13548  		if (!reg_not_null(reg))
13549  			return -1;
13550  
13551  		/* If pointer is valid tests against zero will fail so we can
13552  		 * use this to direct branch taken.
13553  		 */
13554  		if (val != 0)
13555  			return -1;
13556  
13557  		switch (opcode) {
13558  		case BPF_JEQ:
13559  			return 0;
13560  		case BPF_JNE:
13561  			return 1;
13562  		default:
13563  			return -1;
13564  		}
13565  	}
13566  
13567  	if (is_jmp32)
13568  		return is_branch32_taken(reg, val, opcode);
13569  	return is_branch64_taken(reg, val, opcode);
13570  }
13571  
flip_opcode(u32 opcode)13572  static int flip_opcode(u32 opcode)
13573  {
13574  	/* How can we transform "a <op> b" into "b <op> a"? */
13575  	static const u8 opcode_flip[16] = {
13576  		/* these stay the same */
13577  		[BPF_JEQ  >> 4] = BPF_JEQ,
13578  		[BPF_JNE  >> 4] = BPF_JNE,
13579  		[BPF_JSET >> 4] = BPF_JSET,
13580  		/* these swap "lesser" and "greater" (L and G in the opcodes) */
13581  		[BPF_JGE  >> 4] = BPF_JLE,
13582  		[BPF_JGT  >> 4] = BPF_JLT,
13583  		[BPF_JLE  >> 4] = BPF_JGE,
13584  		[BPF_JLT  >> 4] = BPF_JGT,
13585  		[BPF_JSGE >> 4] = BPF_JSLE,
13586  		[BPF_JSGT >> 4] = BPF_JSLT,
13587  		[BPF_JSLE >> 4] = BPF_JSGE,
13588  		[BPF_JSLT >> 4] = BPF_JSGT
13589  	};
13590  	return opcode_flip[opcode >> 4];
13591  }
13592  
is_pkt_ptr_branch_taken(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,u8 opcode)13593  static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
13594  				   struct bpf_reg_state *src_reg,
13595  				   u8 opcode)
13596  {
13597  	struct bpf_reg_state *pkt;
13598  
13599  	if (src_reg->type == PTR_TO_PACKET_END) {
13600  		pkt = dst_reg;
13601  	} else if (dst_reg->type == PTR_TO_PACKET_END) {
13602  		pkt = src_reg;
13603  		opcode = flip_opcode(opcode);
13604  	} else {
13605  		return -1;
13606  	}
13607  
13608  	if (pkt->range >= 0)
13609  		return -1;
13610  
13611  	switch (opcode) {
13612  	case BPF_JLE:
13613  		/* pkt <= pkt_end */
13614  		fallthrough;
13615  	case BPF_JGT:
13616  		/* pkt > pkt_end */
13617  		if (pkt->range == BEYOND_PKT_END)
13618  			/* pkt has at last one extra byte beyond pkt_end */
13619  			return opcode == BPF_JGT;
13620  		break;
13621  	case BPF_JLT:
13622  		/* pkt < pkt_end */
13623  		fallthrough;
13624  	case BPF_JGE:
13625  		/* pkt >= pkt_end */
13626  		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
13627  			return opcode == BPF_JGE;
13628  		break;
13629  	}
13630  	return -1;
13631  }
13632  
13633  /* Adjusts the register min/max values in the case that the dst_reg is the
13634   * variable register that we are working on, and src_reg is a constant or we're
13635   * simply doing a BPF_K check.
13636   * In JEQ/JNE cases we also adjust the var_off values.
13637   */
reg_set_min_max(struct bpf_reg_state * true_reg,struct bpf_reg_state * false_reg,u64 val,u32 val32,u8 opcode,bool is_jmp32)13638  static void reg_set_min_max(struct bpf_reg_state *true_reg,
13639  			    struct bpf_reg_state *false_reg,
13640  			    u64 val, u32 val32,
13641  			    u8 opcode, bool is_jmp32)
13642  {
13643  	struct tnum false_32off = tnum_subreg(false_reg->var_off);
13644  	struct tnum false_64off = false_reg->var_off;
13645  	struct tnum true_32off = tnum_subreg(true_reg->var_off);
13646  	struct tnum true_64off = true_reg->var_off;
13647  	s64 sval = (s64)val;
13648  	s32 sval32 = (s32)val32;
13649  
13650  	/* If the dst_reg is a pointer, we can't learn anything about its
13651  	 * variable offset from the compare (unless src_reg were a pointer into
13652  	 * the same object, but we don't bother with that.
13653  	 * Since false_reg and true_reg have the same type by construction, we
13654  	 * only need to check one of them for pointerness.
13655  	 */
13656  	if (__is_pointer_value(false, false_reg))
13657  		return;
13658  
13659  	switch (opcode) {
13660  	/* JEQ/JNE comparison doesn't change the register equivalence.
13661  	 *
13662  	 * r1 = r2;
13663  	 * if (r1 == 42) goto label;
13664  	 * ...
13665  	 * label: // here both r1 and r2 are known to be 42.
13666  	 *
13667  	 * Hence when marking register as known preserve it's ID.
13668  	 */
13669  	case BPF_JEQ:
13670  		if (is_jmp32) {
13671  			__mark_reg32_known(true_reg, val32);
13672  			true_32off = tnum_subreg(true_reg->var_off);
13673  		} else {
13674  			___mark_reg_known(true_reg, val);
13675  			true_64off = true_reg->var_off;
13676  		}
13677  		break;
13678  	case BPF_JNE:
13679  		if (is_jmp32) {
13680  			__mark_reg32_known(false_reg, val32);
13681  			false_32off = tnum_subreg(false_reg->var_off);
13682  		} else {
13683  			___mark_reg_known(false_reg, val);
13684  			false_64off = false_reg->var_off;
13685  		}
13686  		break;
13687  	case BPF_JSET:
13688  		if (is_jmp32) {
13689  			false_32off = tnum_and(false_32off, tnum_const(~val32));
13690  			if (is_power_of_2(val32))
13691  				true_32off = tnum_or(true_32off,
13692  						     tnum_const(val32));
13693  		} else {
13694  			false_64off = tnum_and(false_64off, tnum_const(~val));
13695  			if (is_power_of_2(val))
13696  				true_64off = tnum_or(true_64off,
13697  						     tnum_const(val));
13698  		}
13699  		break;
13700  	case BPF_JGE:
13701  	case BPF_JGT:
13702  	{
13703  		if (is_jmp32) {
13704  			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
13705  			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
13706  
13707  			false_reg->u32_max_value = min(false_reg->u32_max_value,
13708  						       false_umax);
13709  			true_reg->u32_min_value = max(true_reg->u32_min_value,
13710  						      true_umin);
13711  		} else {
13712  			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
13713  			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
13714  
13715  			false_reg->umax_value = min(false_reg->umax_value, false_umax);
13716  			true_reg->umin_value = max(true_reg->umin_value, true_umin);
13717  		}
13718  		break;
13719  	}
13720  	case BPF_JSGE:
13721  	case BPF_JSGT:
13722  	{
13723  		if (is_jmp32) {
13724  			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
13725  			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
13726  
13727  			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
13728  			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
13729  		} else {
13730  			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
13731  			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
13732  
13733  			false_reg->smax_value = min(false_reg->smax_value, false_smax);
13734  			true_reg->smin_value = max(true_reg->smin_value, true_smin);
13735  		}
13736  		break;
13737  	}
13738  	case BPF_JLE:
13739  	case BPF_JLT:
13740  	{
13741  		if (is_jmp32) {
13742  			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
13743  			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
13744  
13745  			false_reg->u32_min_value = max(false_reg->u32_min_value,
13746  						       false_umin);
13747  			true_reg->u32_max_value = min(true_reg->u32_max_value,
13748  						      true_umax);
13749  		} else {
13750  			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
13751  			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
13752  
13753  			false_reg->umin_value = max(false_reg->umin_value, false_umin);
13754  			true_reg->umax_value = min(true_reg->umax_value, true_umax);
13755  		}
13756  		break;
13757  	}
13758  	case BPF_JSLE:
13759  	case BPF_JSLT:
13760  	{
13761  		if (is_jmp32) {
13762  			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
13763  			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
13764  
13765  			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
13766  			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
13767  		} else {
13768  			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
13769  			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
13770  
13771  			false_reg->smin_value = max(false_reg->smin_value, false_smin);
13772  			true_reg->smax_value = min(true_reg->smax_value, true_smax);
13773  		}
13774  		break;
13775  	}
13776  	default:
13777  		return;
13778  	}
13779  
13780  	if (is_jmp32) {
13781  		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
13782  					     tnum_subreg(false_32off));
13783  		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
13784  					    tnum_subreg(true_32off));
13785  		__reg_combine_32_into_64(false_reg);
13786  		__reg_combine_32_into_64(true_reg);
13787  	} else {
13788  		false_reg->var_off = false_64off;
13789  		true_reg->var_off = true_64off;
13790  		__reg_combine_64_into_32(false_reg);
13791  		__reg_combine_64_into_32(true_reg);
13792  	}
13793  }
13794  
13795  /* Same as above, but for the case that dst_reg holds a constant and src_reg is
13796   * the variable reg.
13797   */
reg_set_min_max_inv(struct bpf_reg_state * true_reg,struct bpf_reg_state * false_reg,u64 val,u32 val32,u8 opcode,bool is_jmp32)13798  static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
13799  				struct bpf_reg_state *false_reg,
13800  				u64 val, u32 val32,
13801  				u8 opcode, bool is_jmp32)
13802  {
13803  	opcode = flip_opcode(opcode);
13804  	/* This uses zero as "not present in table"; luckily the zero opcode,
13805  	 * BPF_JA, can't get here.
13806  	 */
13807  	if (opcode)
13808  		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
13809  }
13810  
13811  /* Regs are known to be equal, so intersect their min/max/var_off */
__reg_combine_min_max(struct bpf_reg_state * src_reg,struct bpf_reg_state * dst_reg)13812  static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
13813  				  struct bpf_reg_state *dst_reg)
13814  {
13815  	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
13816  							dst_reg->umin_value);
13817  	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
13818  							dst_reg->umax_value);
13819  	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
13820  							dst_reg->smin_value);
13821  	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
13822  							dst_reg->smax_value);
13823  	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
13824  							     dst_reg->var_off);
13825  	reg_bounds_sync(src_reg);
13826  	reg_bounds_sync(dst_reg);
13827  }
13828  
reg_combine_min_max(struct bpf_reg_state * true_src,struct bpf_reg_state * true_dst,struct bpf_reg_state * false_src,struct bpf_reg_state * false_dst,u8 opcode)13829  static void reg_combine_min_max(struct bpf_reg_state *true_src,
13830  				struct bpf_reg_state *true_dst,
13831  				struct bpf_reg_state *false_src,
13832  				struct bpf_reg_state *false_dst,
13833  				u8 opcode)
13834  {
13835  	switch (opcode) {
13836  	case BPF_JEQ:
13837  		__reg_combine_min_max(true_src, true_dst);
13838  		break;
13839  	case BPF_JNE:
13840  		__reg_combine_min_max(false_src, false_dst);
13841  		break;
13842  	}
13843  }
13844  
mark_ptr_or_null_reg(struct bpf_func_state * state,struct bpf_reg_state * reg,u32 id,bool is_null)13845  static void mark_ptr_or_null_reg(struct bpf_func_state *state,
13846  				 struct bpf_reg_state *reg, u32 id,
13847  				 bool is_null)
13848  {
13849  	if (type_may_be_null(reg->type) && reg->id == id &&
13850  	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
13851  		/* Old offset (both fixed and variable parts) should have been
13852  		 * known-zero, because we don't allow pointer arithmetic on
13853  		 * pointers that might be NULL. If we see this happening, don't
13854  		 * convert the register.
13855  		 *
13856  		 * But in some cases, some helpers that return local kptrs
13857  		 * advance offset for the returned pointer. In those cases, it
13858  		 * is fine to expect to see reg->off.
13859  		 */
13860  		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
13861  			return;
13862  		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
13863  		    WARN_ON_ONCE(reg->off))
13864  			return;
13865  
13866  		if (is_null) {
13867  			reg->type = SCALAR_VALUE;
13868  			/* We don't need id and ref_obj_id from this point
13869  			 * onwards anymore, thus we should better reset it,
13870  			 * so that state pruning has chances to take effect.
13871  			 */
13872  			reg->id = 0;
13873  			reg->ref_obj_id = 0;
13874  
13875  			return;
13876  		}
13877  
13878  		mark_ptr_not_null_reg(reg);
13879  
13880  		if (!reg_may_point_to_spin_lock(reg)) {
13881  			/* For not-NULL ptr, reg->ref_obj_id will be reset
13882  			 * in release_reference().
13883  			 *
13884  			 * reg->id is still used by spin_lock ptr. Other
13885  			 * than spin_lock ptr type, reg->id can be reset.
13886  			 */
13887  			reg->id = 0;
13888  		}
13889  	}
13890  }
13891  
13892  /* The logic is similar to find_good_pkt_pointers(), both could eventually
13893   * be folded together at some point.
13894   */
mark_ptr_or_null_regs(struct bpf_verifier_state * vstate,u32 regno,bool is_null)13895  static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
13896  				  bool is_null)
13897  {
13898  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13899  	struct bpf_reg_state *regs = state->regs, *reg;
13900  	u32 ref_obj_id = regs[regno].ref_obj_id;
13901  	u32 id = regs[regno].id;
13902  
13903  	if (ref_obj_id && ref_obj_id == id && is_null)
13904  		/* regs[regno] is in the " == NULL" branch.
13905  		 * No one could have freed the reference state before
13906  		 * doing the NULL check.
13907  		 */
13908  		WARN_ON_ONCE(release_reference_state(state, id));
13909  
13910  	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
13911  		mark_ptr_or_null_reg(state, reg, id, is_null);
13912  	}));
13913  }
13914  
try_match_pkt_pointers(const struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,struct bpf_verifier_state * this_branch,struct bpf_verifier_state * other_branch)13915  static bool try_match_pkt_pointers(const struct bpf_insn *insn,
13916  				   struct bpf_reg_state *dst_reg,
13917  				   struct bpf_reg_state *src_reg,
13918  				   struct bpf_verifier_state *this_branch,
13919  				   struct bpf_verifier_state *other_branch)
13920  {
13921  	if (BPF_SRC(insn->code) != BPF_X)
13922  		return false;
13923  
13924  	/* Pointers are always 64-bit. */
13925  	if (BPF_CLASS(insn->code) == BPF_JMP32)
13926  		return false;
13927  
13928  	switch (BPF_OP(insn->code)) {
13929  	case BPF_JGT:
13930  		if ((dst_reg->type == PTR_TO_PACKET &&
13931  		     src_reg->type == PTR_TO_PACKET_END) ||
13932  		    (dst_reg->type == PTR_TO_PACKET_META &&
13933  		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
13934  			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
13935  			find_good_pkt_pointers(this_branch, dst_reg,
13936  					       dst_reg->type, false);
13937  			mark_pkt_end(other_branch, insn->dst_reg, true);
13938  		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
13939  			    src_reg->type == PTR_TO_PACKET) ||
13940  			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
13941  			    src_reg->type == PTR_TO_PACKET_META)) {
13942  			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
13943  			find_good_pkt_pointers(other_branch, src_reg,
13944  					       src_reg->type, true);
13945  			mark_pkt_end(this_branch, insn->src_reg, false);
13946  		} else {
13947  			return false;
13948  		}
13949  		break;
13950  	case BPF_JLT:
13951  		if ((dst_reg->type == PTR_TO_PACKET &&
13952  		     src_reg->type == PTR_TO_PACKET_END) ||
13953  		    (dst_reg->type == PTR_TO_PACKET_META &&
13954  		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
13955  			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
13956  			find_good_pkt_pointers(other_branch, dst_reg,
13957  					       dst_reg->type, true);
13958  			mark_pkt_end(this_branch, insn->dst_reg, false);
13959  		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
13960  			    src_reg->type == PTR_TO_PACKET) ||
13961  			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
13962  			    src_reg->type == PTR_TO_PACKET_META)) {
13963  			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
13964  			find_good_pkt_pointers(this_branch, src_reg,
13965  					       src_reg->type, false);
13966  			mark_pkt_end(other_branch, insn->src_reg, true);
13967  		} else {
13968  			return false;
13969  		}
13970  		break;
13971  	case BPF_JGE:
13972  		if ((dst_reg->type == PTR_TO_PACKET &&
13973  		     src_reg->type == PTR_TO_PACKET_END) ||
13974  		    (dst_reg->type == PTR_TO_PACKET_META &&
13975  		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
13976  			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
13977  			find_good_pkt_pointers(this_branch, dst_reg,
13978  					       dst_reg->type, true);
13979  			mark_pkt_end(other_branch, insn->dst_reg, false);
13980  		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
13981  			    src_reg->type == PTR_TO_PACKET) ||
13982  			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
13983  			    src_reg->type == PTR_TO_PACKET_META)) {
13984  			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
13985  			find_good_pkt_pointers(other_branch, src_reg,
13986  					       src_reg->type, false);
13987  			mark_pkt_end(this_branch, insn->src_reg, true);
13988  		} else {
13989  			return false;
13990  		}
13991  		break;
13992  	case BPF_JLE:
13993  		if ((dst_reg->type == PTR_TO_PACKET &&
13994  		     src_reg->type == PTR_TO_PACKET_END) ||
13995  		    (dst_reg->type == PTR_TO_PACKET_META &&
13996  		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
13997  			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
13998  			find_good_pkt_pointers(other_branch, dst_reg,
13999  					       dst_reg->type, false);
14000  			mark_pkt_end(this_branch, insn->dst_reg, true);
14001  		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
14002  			    src_reg->type == PTR_TO_PACKET) ||
14003  			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
14004  			    src_reg->type == PTR_TO_PACKET_META)) {
14005  			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
14006  			find_good_pkt_pointers(this_branch, src_reg,
14007  					       src_reg->type, true);
14008  			mark_pkt_end(other_branch, insn->src_reg, false);
14009  		} else {
14010  			return false;
14011  		}
14012  		break;
14013  	default:
14014  		return false;
14015  	}
14016  
14017  	return true;
14018  }
14019  
find_equal_scalars(struct bpf_verifier_state * vstate,struct bpf_reg_state * known_reg)14020  static void find_equal_scalars(struct bpf_verifier_state *vstate,
14021  			       struct bpf_reg_state *known_reg)
14022  {
14023  	struct bpf_func_state *state;
14024  	struct bpf_reg_state *reg;
14025  
14026  	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14027  		if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
14028  			copy_register_state(reg, known_reg);
14029  	}));
14030  }
14031  
check_cond_jmp_op(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)14032  static int check_cond_jmp_op(struct bpf_verifier_env *env,
14033  			     struct bpf_insn *insn, int *insn_idx)
14034  {
14035  	struct bpf_verifier_state *this_branch = env->cur_state;
14036  	struct bpf_verifier_state *other_branch;
14037  	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
14038  	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
14039  	struct bpf_reg_state *eq_branch_regs;
14040  	u8 opcode = BPF_OP(insn->code);
14041  	bool is_jmp32;
14042  	int pred = -1;
14043  	int err;
14044  
14045  	/* Only conditional jumps are expected to reach here. */
14046  	if (opcode == BPF_JA || opcode > BPF_JSLE) {
14047  		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
14048  		return -EINVAL;
14049  	}
14050  
14051  	/* check src2 operand */
14052  	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14053  	if (err)
14054  		return err;
14055  
14056  	dst_reg = &regs[insn->dst_reg];
14057  	if (BPF_SRC(insn->code) == BPF_X) {
14058  		if (insn->imm != 0) {
14059  			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
14060  			return -EINVAL;
14061  		}
14062  
14063  		/* check src1 operand */
14064  		err = check_reg_arg(env, insn->src_reg, SRC_OP);
14065  		if (err)
14066  			return err;
14067  
14068  		src_reg = &regs[insn->src_reg];
14069  		if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
14070  		    is_pointer_value(env, insn->src_reg)) {
14071  			verbose(env, "R%d pointer comparison prohibited\n",
14072  				insn->src_reg);
14073  			return -EACCES;
14074  		}
14075  	} else {
14076  		if (insn->src_reg != BPF_REG_0) {
14077  			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
14078  			return -EINVAL;
14079  		}
14080  	}
14081  
14082  	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
14083  
14084  	if (BPF_SRC(insn->code) == BPF_K) {
14085  		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
14086  	} else if (src_reg->type == SCALAR_VALUE &&
14087  		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
14088  		pred = is_branch_taken(dst_reg,
14089  				       tnum_subreg(src_reg->var_off).value,
14090  				       opcode,
14091  				       is_jmp32);
14092  	} else if (src_reg->type == SCALAR_VALUE &&
14093  		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
14094  		pred = is_branch_taken(dst_reg,
14095  				       src_reg->var_off.value,
14096  				       opcode,
14097  				       is_jmp32);
14098  	} else if (dst_reg->type == SCALAR_VALUE &&
14099  		   is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) {
14100  		pred = is_branch_taken(src_reg,
14101  				       tnum_subreg(dst_reg->var_off).value,
14102  				       flip_opcode(opcode),
14103  				       is_jmp32);
14104  	} else if (dst_reg->type == SCALAR_VALUE &&
14105  		   !is_jmp32 && tnum_is_const(dst_reg->var_off)) {
14106  		pred = is_branch_taken(src_reg,
14107  				       dst_reg->var_off.value,
14108  				       flip_opcode(opcode),
14109  				       is_jmp32);
14110  	} else if (reg_is_pkt_pointer_any(dst_reg) &&
14111  		   reg_is_pkt_pointer_any(src_reg) &&
14112  		   !is_jmp32) {
14113  		pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
14114  	}
14115  
14116  	if (pred >= 0) {
14117  		/* If we get here with a dst_reg pointer type it is because
14118  		 * above is_branch_taken() special cased the 0 comparison.
14119  		 */
14120  		if (!__is_pointer_value(false, dst_reg))
14121  			err = mark_chain_precision(env, insn->dst_reg);
14122  		if (BPF_SRC(insn->code) == BPF_X && !err &&
14123  		    !__is_pointer_value(false, src_reg))
14124  			err = mark_chain_precision(env, insn->src_reg);
14125  		if (err)
14126  			return err;
14127  	}
14128  
14129  	if (pred == 1) {
14130  		/* Only follow the goto, ignore fall-through. If needed, push
14131  		 * the fall-through branch for simulation under speculative
14132  		 * execution.
14133  		 */
14134  		if (!env->bypass_spec_v1 &&
14135  		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
14136  					       *insn_idx))
14137  			return -EFAULT;
14138  		*insn_idx += insn->off;
14139  		return 0;
14140  	} else if (pred == 0) {
14141  		/* Only follow the fall-through branch, since that's where the
14142  		 * program will go. If needed, push the goto branch for
14143  		 * simulation under speculative execution.
14144  		 */
14145  		if (!env->bypass_spec_v1 &&
14146  		    !sanitize_speculative_path(env, insn,
14147  					       *insn_idx + insn->off + 1,
14148  					       *insn_idx))
14149  			return -EFAULT;
14150  		return 0;
14151  	}
14152  
14153  	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
14154  				  false);
14155  	if (!other_branch)
14156  		return -EFAULT;
14157  	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
14158  
14159  	/* detect if we are comparing against a constant value so we can adjust
14160  	 * our min/max values for our dst register.
14161  	 * this is only legit if both are scalars (or pointers to the same
14162  	 * object, I suppose, see the PTR_MAYBE_NULL related if block below),
14163  	 * because otherwise the different base pointers mean the offsets aren't
14164  	 * comparable.
14165  	 */
14166  	if (BPF_SRC(insn->code) == BPF_X) {
14167  		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
14168  
14169  		if (dst_reg->type == SCALAR_VALUE &&
14170  		    src_reg->type == SCALAR_VALUE) {
14171  			if (tnum_is_const(src_reg->var_off) ||
14172  			    (is_jmp32 &&
14173  			     tnum_is_const(tnum_subreg(src_reg->var_off))))
14174  				reg_set_min_max(&other_branch_regs[insn->dst_reg],
14175  						dst_reg,
14176  						src_reg->var_off.value,
14177  						tnum_subreg(src_reg->var_off).value,
14178  						opcode, is_jmp32);
14179  			else if (tnum_is_const(dst_reg->var_off) ||
14180  				 (is_jmp32 &&
14181  				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
14182  				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
14183  						    src_reg,
14184  						    dst_reg->var_off.value,
14185  						    tnum_subreg(dst_reg->var_off).value,
14186  						    opcode, is_jmp32);
14187  			else if (!is_jmp32 &&
14188  				 (opcode == BPF_JEQ || opcode == BPF_JNE))
14189  				/* Comparing for equality, we can combine knowledge */
14190  				reg_combine_min_max(&other_branch_regs[insn->src_reg],
14191  						    &other_branch_regs[insn->dst_reg],
14192  						    src_reg, dst_reg, opcode);
14193  			if (src_reg->id &&
14194  			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
14195  				find_equal_scalars(this_branch, src_reg);
14196  				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
14197  			}
14198  
14199  		}
14200  	} else if (dst_reg->type == SCALAR_VALUE) {
14201  		reg_set_min_max(&other_branch_regs[insn->dst_reg],
14202  					dst_reg, insn->imm, (u32)insn->imm,
14203  					opcode, is_jmp32);
14204  	}
14205  
14206  	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
14207  	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
14208  		find_equal_scalars(this_branch, dst_reg);
14209  		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
14210  	}
14211  
14212  	/* if one pointer register is compared to another pointer
14213  	 * register check if PTR_MAYBE_NULL could be lifted.
14214  	 * E.g. register A - maybe null
14215  	 *      register B - not null
14216  	 * for JNE A, B, ... - A is not null in the false branch;
14217  	 * for JEQ A, B, ... - A is not null in the true branch.
14218  	 *
14219  	 * Since PTR_TO_BTF_ID points to a kernel struct that does
14220  	 * not need to be null checked by the BPF program, i.e.,
14221  	 * could be null even without PTR_MAYBE_NULL marking, so
14222  	 * only propagate nullness when neither reg is that type.
14223  	 */
14224  	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
14225  	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
14226  	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
14227  	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
14228  	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
14229  		eq_branch_regs = NULL;
14230  		switch (opcode) {
14231  		case BPF_JEQ:
14232  			eq_branch_regs = other_branch_regs;
14233  			break;
14234  		case BPF_JNE:
14235  			eq_branch_regs = regs;
14236  			break;
14237  		default:
14238  			/* do nothing */
14239  			break;
14240  		}
14241  		if (eq_branch_regs) {
14242  			if (type_may_be_null(src_reg->type))
14243  				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
14244  			else
14245  				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
14246  		}
14247  	}
14248  
14249  	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
14250  	 * NOTE: these optimizations below are related with pointer comparison
14251  	 *       which will never be JMP32.
14252  	 */
14253  	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
14254  	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
14255  	    type_may_be_null(dst_reg->type)) {
14256  		/* Mark all identical registers in each branch as either
14257  		 * safe or unknown depending R == 0 or R != 0 conditional.
14258  		 */
14259  		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
14260  				      opcode == BPF_JNE);
14261  		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
14262  				      opcode == BPF_JEQ);
14263  	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
14264  					   this_branch, other_branch) &&
14265  		   is_pointer_value(env, insn->dst_reg)) {
14266  		verbose(env, "R%d pointer comparison prohibited\n",
14267  			insn->dst_reg);
14268  		return -EACCES;
14269  	}
14270  	if (env->log.level & BPF_LOG_LEVEL)
14271  		print_insn_state(env, this_branch->frame[this_branch->curframe]);
14272  	return 0;
14273  }
14274  
14275  /* verify BPF_LD_IMM64 instruction */
check_ld_imm(struct bpf_verifier_env * env,struct bpf_insn * insn)14276  static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
14277  {
14278  	struct bpf_insn_aux_data *aux = cur_aux(env);
14279  	struct bpf_reg_state *regs = cur_regs(env);
14280  	struct bpf_reg_state *dst_reg;
14281  	struct bpf_map *map;
14282  	int err;
14283  
14284  	if (BPF_SIZE(insn->code) != BPF_DW) {
14285  		verbose(env, "invalid BPF_LD_IMM insn\n");
14286  		return -EINVAL;
14287  	}
14288  	if (insn->off != 0) {
14289  		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
14290  		return -EINVAL;
14291  	}
14292  
14293  	err = check_reg_arg(env, insn->dst_reg, DST_OP);
14294  	if (err)
14295  		return err;
14296  
14297  	dst_reg = &regs[insn->dst_reg];
14298  	if (insn->src_reg == 0) {
14299  		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
14300  
14301  		dst_reg->type = SCALAR_VALUE;
14302  		__mark_reg_known(&regs[insn->dst_reg], imm);
14303  		return 0;
14304  	}
14305  
14306  	/* All special src_reg cases are listed below. From this point onwards
14307  	 * we either succeed and assign a corresponding dst_reg->type after
14308  	 * zeroing the offset, or fail and reject the program.
14309  	 */
14310  	mark_reg_known_zero(env, regs, insn->dst_reg);
14311  
14312  	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
14313  		dst_reg->type = aux->btf_var.reg_type;
14314  		switch (base_type(dst_reg->type)) {
14315  		case PTR_TO_MEM:
14316  			dst_reg->mem_size = aux->btf_var.mem_size;
14317  			break;
14318  		case PTR_TO_BTF_ID:
14319  			dst_reg->btf = aux->btf_var.btf;
14320  			dst_reg->btf_id = aux->btf_var.btf_id;
14321  			break;
14322  		default:
14323  			verbose(env, "bpf verifier is misconfigured\n");
14324  			return -EFAULT;
14325  		}
14326  		return 0;
14327  	}
14328  
14329  	if (insn->src_reg == BPF_PSEUDO_FUNC) {
14330  		struct bpf_prog_aux *aux = env->prog->aux;
14331  		u32 subprogno = find_subprog(env,
14332  					     env->insn_idx + insn->imm + 1);
14333  
14334  		if (!aux->func_info) {
14335  			verbose(env, "missing btf func_info\n");
14336  			return -EINVAL;
14337  		}
14338  		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
14339  			verbose(env, "callback function not static\n");
14340  			return -EINVAL;
14341  		}
14342  
14343  		dst_reg->type = PTR_TO_FUNC;
14344  		dst_reg->subprogno = subprogno;
14345  		return 0;
14346  	}
14347  
14348  	map = env->used_maps[aux->map_index];
14349  	dst_reg->map_ptr = map;
14350  
14351  	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
14352  	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
14353  		dst_reg->type = PTR_TO_MAP_VALUE;
14354  		dst_reg->off = aux->map_off;
14355  		WARN_ON_ONCE(map->max_entries != 1);
14356  		/* We want reg->id to be same (0) as map_value is not distinct */
14357  	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
14358  		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
14359  		dst_reg->type = CONST_PTR_TO_MAP;
14360  	} else {
14361  		verbose(env, "bpf verifier is misconfigured\n");
14362  		return -EINVAL;
14363  	}
14364  
14365  	return 0;
14366  }
14367  
may_access_skb(enum bpf_prog_type type)14368  static bool may_access_skb(enum bpf_prog_type type)
14369  {
14370  	switch (type) {
14371  	case BPF_PROG_TYPE_SOCKET_FILTER:
14372  	case BPF_PROG_TYPE_SCHED_CLS:
14373  	case BPF_PROG_TYPE_SCHED_ACT:
14374  		return true;
14375  	default:
14376  		return false;
14377  	}
14378  }
14379  
14380  /* verify safety of LD_ABS|LD_IND instructions:
14381   * - they can only appear in the programs where ctx == skb
14382   * - since they are wrappers of function calls, they scratch R1-R5 registers,
14383   *   preserve R6-R9, and store return value into R0
14384   *
14385   * Implicit input:
14386   *   ctx == skb == R6 == CTX
14387   *
14388   * Explicit input:
14389   *   SRC == any register
14390   *   IMM == 32-bit immediate
14391   *
14392   * Output:
14393   *   R0 - 8/16/32-bit skb data converted to cpu endianness
14394   */
check_ld_abs(struct bpf_verifier_env * env,struct bpf_insn * insn)14395  static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
14396  {
14397  	struct bpf_reg_state *regs = cur_regs(env);
14398  	static const int ctx_reg = BPF_REG_6;
14399  	u8 mode = BPF_MODE(insn->code);
14400  	int i, err;
14401  
14402  	if (!may_access_skb(resolve_prog_type(env->prog))) {
14403  		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
14404  		return -EINVAL;
14405  	}
14406  
14407  	if (!env->ops->gen_ld_abs) {
14408  		verbose(env, "bpf verifier is misconfigured\n");
14409  		return -EINVAL;
14410  	}
14411  
14412  	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
14413  	    BPF_SIZE(insn->code) == BPF_DW ||
14414  	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
14415  		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
14416  		return -EINVAL;
14417  	}
14418  
14419  	/* check whether implicit source operand (register R6) is readable */
14420  	err = check_reg_arg(env, ctx_reg, SRC_OP);
14421  	if (err)
14422  		return err;
14423  
14424  	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
14425  	 * gen_ld_abs() may terminate the program at runtime, leading to
14426  	 * reference leak.
14427  	 */
14428  	err = check_reference_leak(env);
14429  	if (err) {
14430  		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
14431  		return err;
14432  	}
14433  
14434  	if (env->cur_state->active_lock.ptr) {
14435  		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
14436  		return -EINVAL;
14437  	}
14438  
14439  	if (env->cur_state->active_rcu_lock) {
14440  		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
14441  		return -EINVAL;
14442  	}
14443  
14444  	if (regs[ctx_reg].type != PTR_TO_CTX) {
14445  		verbose(env,
14446  			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
14447  		return -EINVAL;
14448  	}
14449  
14450  	if (mode == BPF_IND) {
14451  		/* check explicit source operand */
14452  		err = check_reg_arg(env, insn->src_reg, SRC_OP);
14453  		if (err)
14454  			return err;
14455  	}
14456  
14457  	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
14458  	if (err < 0)
14459  		return err;
14460  
14461  	/* reset caller saved regs to unreadable */
14462  	for (i = 0; i < CALLER_SAVED_REGS; i++) {
14463  		mark_reg_not_init(env, regs, caller_saved[i]);
14464  		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
14465  	}
14466  
14467  	/* mark destination R0 register as readable, since it contains
14468  	 * the value fetched from the packet.
14469  	 * Already marked as written above.
14470  	 */
14471  	mark_reg_unknown(env, regs, BPF_REG_0);
14472  	/* ld_abs load up to 32-bit skb data. */
14473  	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
14474  	return 0;
14475  }
14476  
check_return_code(struct bpf_verifier_env * env)14477  static int check_return_code(struct bpf_verifier_env *env)
14478  {
14479  	struct tnum enforce_attach_type_range = tnum_unknown;
14480  	const struct bpf_prog *prog = env->prog;
14481  	struct bpf_reg_state *reg;
14482  	struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0);
14483  	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
14484  	int err;
14485  	struct bpf_func_state *frame = env->cur_state->frame[0];
14486  	const bool is_subprog = frame->subprogno;
14487  
14488  	/* LSM and struct_ops func-ptr's return type could be "void" */
14489  	if (!is_subprog) {
14490  		switch (prog_type) {
14491  		case BPF_PROG_TYPE_LSM:
14492  			if (prog->expected_attach_type == BPF_LSM_CGROUP)
14493  				/* See below, can be 0 or 0-1 depending on hook. */
14494  				break;
14495  			fallthrough;
14496  		case BPF_PROG_TYPE_STRUCT_OPS:
14497  			if (!prog->aux->attach_func_proto->type)
14498  				return 0;
14499  			break;
14500  		default:
14501  			break;
14502  		}
14503  	}
14504  
14505  	/* eBPF calling convention is such that R0 is used
14506  	 * to return the value from eBPF program.
14507  	 * Make sure that it's readable at this time
14508  	 * of bpf_exit, which means that program wrote
14509  	 * something into it earlier
14510  	 */
14511  	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
14512  	if (err)
14513  		return err;
14514  
14515  	if (is_pointer_value(env, BPF_REG_0)) {
14516  		verbose(env, "R0 leaks addr as return value\n");
14517  		return -EACCES;
14518  	}
14519  
14520  	reg = cur_regs(env) + BPF_REG_0;
14521  
14522  	if (frame->in_async_callback_fn) {
14523  		/* enforce return zero from async callbacks like timer */
14524  		if (reg->type != SCALAR_VALUE) {
14525  			verbose(env, "In async callback the register R0 is not a known value (%s)\n",
14526  				reg_type_str(env, reg->type));
14527  			return -EINVAL;
14528  		}
14529  
14530  		if (!tnum_in(const_0, reg->var_off)) {
14531  			verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0");
14532  			return -EINVAL;
14533  		}
14534  		return 0;
14535  	}
14536  
14537  	if (is_subprog) {
14538  		if (reg->type != SCALAR_VALUE) {
14539  			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
14540  				reg_type_str(env, reg->type));
14541  			return -EINVAL;
14542  		}
14543  		return 0;
14544  	}
14545  
14546  	switch (prog_type) {
14547  	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
14548  		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
14549  		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
14550  		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
14551  		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
14552  		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
14553  		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
14554  			range = tnum_range(1, 1);
14555  		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
14556  		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
14557  			range = tnum_range(0, 3);
14558  		break;
14559  	case BPF_PROG_TYPE_CGROUP_SKB:
14560  		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
14561  			range = tnum_range(0, 3);
14562  			enforce_attach_type_range = tnum_range(2, 3);
14563  		}
14564  		break;
14565  	case BPF_PROG_TYPE_CGROUP_SOCK:
14566  	case BPF_PROG_TYPE_SOCK_OPS:
14567  	case BPF_PROG_TYPE_CGROUP_DEVICE:
14568  	case BPF_PROG_TYPE_CGROUP_SYSCTL:
14569  	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
14570  		break;
14571  	case BPF_PROG_TYPE_RAW_TRACEPOINT:
14572  		if (!env->prog->aux->attach_btf_id)
14573  			return 0;
14574  		range = tnum_const(0);
14575  		break;
14576  	case BPF_PROG_TYPE_TRACING:
14577  		switch (env->prog->expected_attach_type) {
14578  		case BPF_TRACE_FENTRY:
14579  		case BPF_TRACE_FEXIT:
14580  			range = tnum_const(0);
14581  			break;
14582  		case BPF_TRACE_RAW_TP:
14583  		case BPF_MODIFY_RETURN:
14584  			return 0;
14585  		case BPF_TRACE_ITER:
14586  			break;
14587  		default:
14588  			return -ENOTSUPP;
14589  		}
14590  		break;
14591  	case BPF_PROG_TYPE_SK_LOOKUP:
14592  		range = tnum_range(SK_DROP, SK_PASS);
14593  		break;
14594  
14595  	case BPF_PROG_TYPE_LSM:
14596  		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
14597  			/* Regular BPF_PROG_TYPE_LSM programs can return
14598  			 * any value.
14599  			 */
14600  			return 0;
14601  		}
14602  		if (!env->prog->aux->attach_func_proto->type) {
14603  			/* Make sure programs that attach to void
14604  			 * hooks don't try to modify return value.
14605  			 */
14606  			range = tnum_range(1, 1);
14607  		}
14608  		break;
14609  
14610  	case BPF_PROG_TYPE_NETFILTER:
14611  		range = tnum_range(NF_DROP, NF_ACCEPT);
14612  		break;
14613  	case BPF_PROG_TYPE_EXT:
14614  		/* freplace program can return anything as its return value
14615  		 * depends on the to-be-replaced kernel func or bpf program.
14616  		 */
14617  	default:
14618  		return 0;
14619  	}
14620  
14621  	if (reg->type != SCALAR_VALUE) {
14622  		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
14623  			reg_type_str(env, reg->type));
14624  		return -EINVAL;
14625  	}
14626  
14627  	if (!tnum_in(range, reg->var_off)) {
14628  		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
14629  		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
14630  		    prog_type == BPF_PROG_TYPE_LSM &&
14631  		    !prog->aux->attach_func_proto->type)
14632  			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
14633  		return -EINVAL;
14634  	}
14635  
14636  	if (!tnum_is_unknown(enforce_attach_type_range) &&
14637  	    tnum_in(enforce_attach_type_range, reg->var_off))
14638  		env->prog->enforce_expected_attach_type = 1;
14639  	return 0;
14640  }
14641  
14642  /* non-recursive DFS pseudo code
14643   * 1  procedure DFS-iterative(G,v):
14644   * 2      label v as discovered
14645   * 3      let S be a stack
14646   * 4      S.push(v)
14647   * 5      while S is not empty
14648   * 6            t <- S.peek()
14649   * 7            if t is what we're looking for:
14650   * 8                return t
14651   * 9            for all edges e in G.adjacentEdges(t) do
14652   * 10               if edge e is already labelled
14653   * 11                   continue with the next edge
14654   * 12               w <- G.adjacentVertex(t,e)
14655   * 13               if vertex w is not discovered and not explored
14656   * 14                   label e as tree-edge
14657   * 15                   label w as discovered
14658   * 16                   S.push(w)
14659   * 17                   continue at 5
14660   * 18               else if vertex w is discovered
14661   * 19                   label e as back-edge
14662   * 20               else
14663   * 21                   // vertex w is explored
14664   * 22                   label e as forward- or cross-edge
14665   * 23           label t as explored
14666   * 24           S.pop()
14667   *
14668   * convention:
14669   * 0x10 - discovered
14670   * 0x11 - discovered and fall-through edge labelled
14671   * 0x12 - discovered and fall-through and branch edges labelled
14672   * 0x20 - explored
14673   */
14674  
14675  enum {
14676  	DISCOVERED = 0x10,
14677  	EXPLORED = 0x20,
14678  	FALLTHROUGH = 1,
14679  	BRANCH = 2,
14680  };
14681  
state_htab_size(struct bpf_verifier_env * env)14682  static u32 state_htab_size(struct bpf_verifier_env *env)
14683  {
14684  	return env->prog->len;
14685  }
14686  
explored_state(struct bpf_verifier_env * env,int idx)14687  static struct bpf_verifier_state_list **explored_state(
14688  					struct bpf_verifier_env *env,
14689  					int idx)
14690  {
14691  	struct bpf_verifier_state *cur = env->cur_state;
14692  	struct bpf_func_state *state = cur->frame[cur->curframe];
14693  
14694  	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
14695  }
14696  
mark_prune_point(struct bpf_verifier_env * env,int idx)14697  static void mark_prune_point(struct bpf_verifier_env *env, int idx)
14698  {
14699  	env->insn_aux_data[idx].prune_point = true;
14700  }
14701  
is_prune_point(struct bpf_verifier_env * env,int insn_idx)14702  static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
14703  {
14704  	return env->insn_aux_data[insn_idx].prune_point;
14705  }
14706  
mark_force_checkpoint(struct bpf_verifier_env * env,int idx)14707  static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
14708  {
14709  	env->insn_aux_data[idx].force_checkpoint = true;
14710  }
14711  
is_force_checkpoint(struct bpf_verifier_env * env,int insn_idx)14712  static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
14713  {
14714  	return env->insn_aux_data[insn_idx].force_checkpoint;
14715  }
14716  
14717  
14718  enum {
14719  	DONE_EXPLORING = 0,
14720  	KEEP_EXPLORING = 1,
14721  };
14722  
14723  /* t, w, e - match pseudo-code above:
14724   * t - index of current instruction
14725   * w - next instruction
14726   * e - edge
14727   */
push_insn(int t,int w,int e,struct bpf_verifier_env * env,bool loop_ok)14728  static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
14729  		     bool loop_ok)
14730  {
14731  	int *insn_stack = env->cfg.insn_stack;
14732  	int *insn_state = env->cfg.insn_state;
14733  
14734  	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
14735  		return DONE_EXPLORING;
14736  
14737  	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
14738  		return DONE_EXPLORING;
14739  
14740  	if (w < 0 || w >= env->prog->len) {
14741  		verbose_linfo(env, t, "%d: ", t);
14742  		verbose(env, "jump out of range from insn %d to %d\n", t, w);
14743  		return -EINVAL;
14744  	}
14745  
14746  	if (e == BRANCH) {
14747  		/* mark branch target for state pruning */
14748  		mark_prune_point(env, w);
14749  		mark_jmp_point(env, w);
14750  	}
14751  
14752  	if (insn_state[w] == 0) {
14753  		/* tree-edge */
14754  		insn_state[t] = DISCOVERED | e;
14755  		insn_state[w] = DISCOVERED;
14756  		if (env->cfg.cur_stack >= env->prog->len)
14757  			return -E2BIG;
14758  		insn_stack[env->cfg.cur_stack++] = w;
14759  		return KEEP_EXPLORING;
14760  	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
14761  		if (loop_ok && env->bpf_capable)
14762  			return DONE_EXPLORING;
14763  		verbose_linfo(env, t, "%d: ", t);
14764  		verbose_linfo(env, w, "%d: ", w);
14765  		verbose(env, "back-edge from insn %d to %d\n", t, w);
14766  		return -EINVAL;
14767  	} else if (insn_state[w] == EXPLORED) {
14768  		/* forward- or cross-edge */
14769  		insn_state[t] = DISCOVERED | e;
14770  	} else {
14771  		verbose(env, "insn state internal bug\n");
14772  		return -EFAULT;
14773  	}
14774  	return DONE_EXPLORING;
14775  }
14776  
visit_func_call_insn(int t,struct bpf_insn * insns,struct bpf_verifier_env * env,bool visit_callee)14777  static int visit_func_call_insn(int t, struct bpf_insn *insns,
14778  				struct bpf_verifier_env *env,
14779  				bool visit_callee)
14780  {
14781  	int ret;
14782  
14783  	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
14784  	if (ret)
14785  		return ret;
14786  
14787  	mark_prune_point(env, t + 1);
14788  	/* when we exit from subprog, we need to record non-linear history */
14789  	mark_jmp_point(env, t + 1);
14790  
14791  	if (visit_callee) {
14792  		mark_prune_point(env, t);
14793  		ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
14794  				/* It's ok to allow recursion from CFG point of
14795  				 * view. __check_func_call() will do the actual
14796  				 * check.
14797  				 */
14798  				bpf_pseudo_func(insns + t));
14799  	}
14800  	return ret;
14801  }
14802  
14803  /* Visits the instruction at index t and returns one of the following:
14804   *  < 0 - an error occurred
14805   *  DONE_EXPLORING - the instruction was fully explored
14806   *  KEEP_EXPLORING - there is still work to be done before it is fully explored
14807   */
visit_insn(int t,struct bpf_verifier_env * env)14808  static int visit_insn(int t, struct bpf_verifier_env *env)
14809  {
14810  	struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
14811  	int ret, off;
14812  
14813  	if (bpf_pseudo_func(insn))
14814  		return visit_func_call_insn(t, insns, env, true);
14815  
14816  	/* All non-branch instructions have a single fall-through edge. */
14817  	if (BPF_CLASS(insn->code) != BPF_JMP &&
14818  	    BPF_CLASS(insn->code) != BPF_JMP32)
14819  		return push_insn(t, t + 1, FALLTHROUGH, env, false);
14820  
14821  	switch (BPF_OP(insn->code)) {
14822  	case BPF_EXIT:
14823  		return DONE_EXPLORING;
14824  
14825  	case BPF_CALL:
14826  		if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback)
14827  			/* Mark this call insn as a prune point to trigger
14828  			 * is_state_visited() check before call itself is
14829  			 * processed by __check_func_call(). Otherwise new
14830  			 * async state will be pushed for further exploration.
14831  			 */
14832  			mark_prune_point(env, t);
14833  		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
14834  			struct bpf_kfunc_call_arg_meta meta;
14835  
14836  			ret = fetch_kfunc_meta(env, insn, &meta, NULL);
14837  			if (ret == 0 && is_iter_next_kfunc(&meta)) {
14838  				mark_prune_point(env, t);
14839  				/* Checking and saving state checkpoints at iter_next() call
14840  				 * is crucial for fast convergence of open-coded iterator loop
14841  				 * logic, so we need to force it. If we don't do that,
14842  				 * is_state_visited() might skip saving a checkpoint, causing
14843  				 * unnecessarily long sequence of not checkpointed
14844  				 * instructions and jumps, leading to exhaustion of jump
14845  				 * history buffer, and potentially other undesired outcomes.
14846  				 * It is expected that with correct open-coded iterators
14847  				 * convergence will happen quickly, so we don't run a risk of
14848  				 * exhausting memory.
14849  				 */
14850  				mark_force_checkpoint(env, t);
14851  			}
14852  		}
14853  		return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
14854  
14855  	case BPF_JA:
14856  		if (BPF_SRC(insn->code) != BPF_K)
14857  			return -EINVAL;
14858  
14859  		if (BPF_CLASS(insn->code) == BPF_JMP)
14860  			off = insn->off;
14861  		else
14862  			off = insn->imm;
14863  
14864  		/* unconditional jump with single edge */
14865  		ret = push_insn(t, t + off + 1, FALLTHROUGH, env,
14866  				true);
14867  		if (ret)
14868  			return ret;
14869  
14870  		mark_prune_point(env, t + off + 1);
14871  		mark_jmp_point(env, t + off + 1);
14872  
14873  		return ret;
14874  
14875  	default:
14876  		/* conditional jump with two edges */
14877  		mark_prune_point(env, t);
14878  
14879  		ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
14880  		if (ret)
14881  			return ret;
14882  
14883  		return push_insn(t, t + insn->off + 1, BRANCH, env, true);
14884  	}
14885  }
14886  
14887  /* non-recursive depth-first-search to detect loops in BPF program
14888   * loop == back-edge in directed graph
14889   */
check_cfg(struct bpf_verifier_env * env)14890  static int check_cfg(struct bpf_verifier_env *env)
14891  {
14892  	int insn_cnt = env->prog->len;
14893  	int *insn_stack, *insn_state;
14894  	int ret = 0;
14895  	int i;
14896  
14897  	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
14898  	if (!insn_state)
14899  		return -ENOMEM;
14900  
14901  	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
14902  	if (!insn_stack) {
14903  		kvfree(insn_state);
14904  		return -ENOMEM;
14905  	}
14906  
14907  	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
14908  	insn_stack[0] = 0; /* 0 is the first instruction */
14909  	env->cfg.cur_stack = 1;
14910  
14911  	while (env->cfg.cur_stack > 0) {
14912  		int t = insn_stack[env->cfg.cur_stack - 1];
14913  
14914  		ret = visit_insn(t, env);
14915  		switch (ret) {
14916  		case DONE_EXPLORING:
14917  			insn_state[t] = EXPLORED;
14918  			env->cfg.cur_stack--;
14919  			break;
14920  		case KEEP_EXPLORING:
14921  			break;
14922  		default:
14923  			if (ret > 0) {
14924  				verbose(env, "visit_insn internal bug\n");
14925  				ret = -EFAULT;
14926  			}
14927  			goto err_free;
14928  		}
14929  	}
14930  
14931  	if (env->cfg.cur_stack < 0) {
14932  		verbose(env, "pop stack internal bug\n");
14933  		ret = -EFAULT;
14934  		goto err_free;
14935  	}
14936  
14937  	for (i = 0; i < insn_cnt; i++) {
14938  		if (insn_state[i] != EXPLORED) {
14939  			verbose(env, "unreachable insn %d\n", i);
14940  			ret = -EINVAL;
14941  			goto err_free;
14942  		}
14943  	}
14944  	ret = 0; /* cfg looks good */
14945  
14946  err_free:
14947  	kvfree(insn_state);
14948  	kvfree(insn_stack);
14949  	env->cfg.insn_state = env->cfg.insn_stack = NULL;
14950  	return ret;
14951  }
14952  
check_abnormal_return(struct bpf_verifier_env * env)14953  static int check_abnormal_return(struct bpf_verifier_env *env)
14954  {
14955  	int i;
14956  
14957  	for (i = 1; i < env->subprog_cnt; i++) {
14958  		if (env->subprog_info[i].has_ld_abs) {
14959  			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
14960  			return -EINVAL;
14961  		}
14962  		if (env->subprog_info[i].has_tail_call) {
14963  			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
14964  			return -EINVAL;
14965  		}
14966  	}
14967  	return 0;
14968  }
14969  
14970  /* The minimum supported BTF func info size */
14971  #define MIN_BPF_FUNCINFO_SIZE	8
14972  #define MAX_FUNCINFO_REC_SIZE	252
14973  
check_btf_func(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)14974  static int check_btf_func(struct bpf_verifier_env *env,
14975  			  const union bpf_attr *attr,
14976  			  bpfptr_t uattr)
14977  {
14978  	const struct btf_type *type, *func_proto, *ret_type;
14979  	u32 i, nfuncs, urec_size, min_size;
14980  	u32 krec_size = sizeof(struct bpf_func_info);
14981  	struct bpf_func_info *krecord;
14982  	struct bpf_func_info_aux *info_aux = NULL;
14983  	struct bpf_prog *prog;
14984  	const struct btf *btf;
14985  	bpfptr_t urecord;
14986  	u32 prev_offset = 0;
14987  	bool scalar_return;
14988  	int ret = -ENOMEM;
14989  
14990  	nfuncs = attr->func_info_cnt;
14991  	if (!nfuncs) {
14992  		if (check_abnormal_return(env))
14993  			return -EINVAL;
14994  		return 0;
14995  	}
14996  
14997  	if (nfuncs != env->subprog_cnt) {
14998  		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
14999  		return -EINVAL;
15000  	}
15001  
15002  	urec_size = attr->func_info_rec_size;
15003  	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
15004  	    urec_size > MAX_FUNCINFO_REC_SIZE ||
15005  	    urec_size % sizeof(u32)) {
15006  		verbose(env, "invalid func info rec size %u\n", urec_size);
15007  		return -EINVAL;
15008  	}
15009  
15010  	prog = env->prog;
15011  	btf = prog->aux->btf;
15012  
15013  	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
15014  	min_size = min_t(u32, krec_size, urec_size);
15015  
15016  	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
15017  	if (!krecord)
15018  		return -ENOMEM;
15019  	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
15020  	if (!info_aux)
15021  		goto err_free;
15022  
15023  	for (i = 0; i < nfuncs; i++) {
15024  		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
15025  		if (ret) {
15026  			if (ret == -E2BIG) {
15027  				verbose(env, "nonzero tailing record in func info");
15028  				/* set the size kernel expects so loader can zero
15029  				 * out the rest of the record.
15030  				 */
15031  				if (copy_to_bpfptr_offset(uattr,
15032  							  offsetof(union bpf_attr, func_info_rec_size),
15033  							  &min_size, sizeof(min_size)))
15034  					ret = -EFAULT;
15035  			}
15036  			goto err_free;
15037  		}
15038  
15039  		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
15040  			ret = -EFAULT;
15041  			goto err_free;
15042  		}
15043  
15044  		/* check insn_off */
15045  		ret = -EINVAL;
15046  		if (i == 0) {
15047  			if (krecord[i].insn_off) {
15048  				verbose(env,
15049  					"nonzero insn_off %u for the first func info record",
15050  					krecord[i].insn_off);
15051  				goto err_free;
15052  			}
15053  		} else if (krecord[i].insn_off <= prev_offset) {
15054  			verbose(env,
15055  				"same or smaller insn offset (%u) than previous func info record (%u)",
15056  				krecord[i].insn_off, prev_offset);
15057  			goto err_free;
15058  		}
15059  
15060  		if (env->subprog_info[i].start != krecord[i].insn_off) {
15061  			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
15062  			goto err_free;
15063  		}
15064  
15065  		/* check type_id */
15066  		type = btf_type_by_id(btf, krecord[i].type_id);
15067  		if (!type || !btf_type_is_func(type)) {
15068  			verbose(env, "invalid type id %d in func info",
15069  				krecord[i].type_id);
15070  			goto err_free;
15071  		}
15072  		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
15073  
15074  		func_proto = btf_type_by_id(btf, type->type);
15075  		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
15076  			/* btf_func_check() already verified it during BTF load */
15077  			goto err_free;
15078  		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
15079  		scalar_return =
15080  			btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
15081  		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
15082  			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
15083  			goto err_free;
15084  		}
15085  		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
15086  			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
15087  			goto err_free;
15088  		}
15089  
15090  		prev_offset = krecord[i].insn_off;
15091  		bpfptr_add(&urecord, urec_size);
15092  	}
15093  
15094  	prog->aux->func_info = krecord;
15095  	prog->aux->func_info_cnt = nfuncs;
15096  	prog->aux->func_info_aux = info_aux;
15097  	return 0;
15098  
15099  err_free:
15100  	kvfree(krecord);
15101  	kfree(info_aux);
15102  	return ret;
15103  }
15104  
adjust_btf_func(struct bpf_verifier_env * env)15105  static void adjust_btf_func(struct bpf_verifier_env *env)
15106  {
15107  	struct bpf_prog_aux *aux = env->prog->aux;
15108  	int i;
15109  
15110  	if (!aux->func_info)
15111  		return;
15112  
15113  	for (i = 0; i < env->subprog_cnt; i++)
15114  		aux->func_info[i].insn_off = env->subprog_info[i].start;
15115  }
15116  
15117  #define MIN_BPF_LINEINFO_SIZE	offsetofend(struct bpf_line_info, line_col)
15118  #define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
15119  
check_btf_line(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)15120  static int check_btf_line(struct bpf_verifier_env *env,
15121  			  const union bpf_attr *attr,
15122  			  bpfptr_t uattr)
15123  {
15124  	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
15125  	struct bpf_subprog_info *sub;
15126  	struct bpf_line_info *linfo;
15127  	struct bpf_prog *prog;
15128  	const struct btf *btf;
15129  	bpfptr_t ulinfo;
15130  	int err;
15131  
15132  	nr_linfo = attr->line_info_cnt;
15133  	if (!nr_linfo)
15134  		return 0;
15135  	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
15136  		return -EINVAL;
15137  
15138  	rec_size = attr->line_info_rec_size;
15139  	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
15140  	    rec_size > MAX_LINEINFO_REC_SIZE ||
15141  	    rec_size & (sizeof(u32) - 1))
15142  		return -EINVAL;
15143  
15144  	/* Need to zero it in case the userspace may
15145  	 * pass in a smaller bpf_line_info object.
15146  	 */
15147  	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
15148  			 GFP_KERNEL | __GFP_NOWARN);
15149  	if (!linfo)
15150  		return -ENOMEM;
15151  
15152  	prog = env->prog;
15153  	btf = prog->aux->btf;
15154  
15155  	s = 0;
15156  	sub = env->subprog_info;
15157  	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
15158  	expected_size = sizeof(struct bpf_line_info);
15159  	ncopy = min_t(u32, expected_size, rec_size);
15160  	for (i = 0; i < nr_linfo; i++) {
15161  		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
15162  		if (err) {
15163  			if (err == -E2BIG) {
15164  				verbose(env, "nonzero tailing record in line_info");
15165  				if (copy_to_bpfptr_offset(uattr,
15166  							  offsetof(union bpf_attr, line_info_rec_size),
15167  							  &expected_size, sizeof(expected_size)))
15168  					err = -EFAULT;
15169  			}
15170  			goto err_free;
15171  		}
15172  
15173  		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
15174  			err = -EFAULT;
15175  			goto err_free;
15176  		}
15177  
15178  		/*
15179  		 * Check insn_off to ensure
15180  		 * 1) strictly increasing AND
15181  		 * 2) bounded by prog->len
15182  		 *
15183  		 * The linfo[0].insn_off == 0 check logically falls into
15184  		 * the later "missing bpf_line_info for func..." case
15185  		 * because the first linfo[0].insn_off must be the
15186  		 * first sub also and the first sub must have
15187  		 * subprog_info[0].start == 0.
15188  		 */
15189  		if ((i && linfo[i].insn_off <= prev_offset) ||
15190  		    linfo[i].insn_off >= prog->len) {
15191  			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
15192  				i, linfo[i].insn_off, prev_offset,
15193  				prog->len);
15194  			err = -EINVAL;
15195  			goto err_free;
15196  		}
15197  
15198  		if (!prog->insnsi[linfo[i].insn_off].code) {
15199  			verbose(env,
15200  				"Invalid insn code at line_info[%u].insn_off\n",
15201  				i);
15202  			err = -EINVAL;
15203  			goto err_free;
15204  		}
15205  
15206  		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
15207  		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
15208  			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
15209  			err = -EINVAL;
15210  			goto err_free;
15211  		}
15212  
15213  		if (s != env->subprog_cnt) {
15214  			if (linfo[i].insn_off == sub[s].start) {
15215  				sub[s].linfo_idx = i;
15216  				s++;
15217  			} else if (sub[s].start < linfo[i].insn_off) {
15218  				verbose(env, "missing bpf_line_info for func#%u\n", s);
15219  				err = -EINVAL;
15220  				goto err_free;
15221  			}
15222  		}
15223  
15224  		prev_offset = linfo[i].insn_off;
15225  		bpfptr_add(&ulinfo, rec_size);
15226  	}
15227  
15228  	if (s != env->subprog_cnt) {
15229  		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
15230  			env->subprog_cnt - s, s);
15231  		err = -EINVAL;
15232  		goto err_free;
15233  	}
15234  
15235  	prog->aux->linfo = linfo;
15236  	prog->aux->nr_linfo = nr_linfo;
15237  
15238  	return 0;
15239  
15240  err_free:
15241  	kvfree(linfo);
15242  	return err;
15243  }
15244  
15245  #define MIN_CORE_RELO_SIZE	sizeof(struct bpf_core_relo)
15246  #define MAX_CORE_RELO_SIZE	MAX_FUNCINFO_REC_SIZE
15247  
check_core_relo(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)15248  static int check_core_relo(struct bpf_verifier_env *env,
15249  			   const union bpf_attr *attr,
15250  			   bpfptr_t uattr)
15251  {
15252  	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
15253  	struct bpf_core_relo core_relo = {};
15254  	struct bpf_prog *prog = env->prog;
15255  	const struct btf *btf = prog->aux->btf;
15256  	struct bpf_core_ctx ctx = {
15257  		.log = &env->log,
15258  		.btf = btf,
15259  	};
15260  	bpfptr_t u_core_relo;
15261  	int err;
15262  
15263  	nr_core_relo = attr->core_relo_cnt;
15264  	if (!nr_core_relo)
15265  		return 0;
15266  	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
15267  		return -EINVAL;
15268  
15269  	rec_size = attr->core_relo_rec_size;
15270  	if (rec_size < MIN_CORE_RELO_SIZE ||
15271  	    rec_size > MAX_CORE_RELO_SIZE ||
15272  	    rec_size % sizeof(u32))
15273  		return -EINVAL;
15274  
15275  	u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
15276  	expected_size = sizeof(struct bpf_core_relo);
15277  	ncopy = min_t(u32, expected_size, rec_size);
15278  
15279  	/* Unlike func_info and line_info, copy and apply each CO-RE
15280  	 * relocation record one at a time.
15281  	 */
15282  	for (i = 0; i < nr_core_relo; i++) {
15283  		/* future proofing when sizeof(bpf_core_relo) changes */
15284  		err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
15285  		if (err) {
15286  			if (err == -E2BIG) {
15287  				verbose(env, "nonzero tailing record in core_relo");
15288  				if (copy_to_bpfptr_offset(uattr,
15289  							  offsetof(union bpf_attr, core_relo_rec_size),
15290  							  &expected_size, sizeof(expected_size)))
15291  					err = -EFAULT;
15292  			}
15293  			break;
15294  		}
15295  
15296  		if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
15297  			err = -EFAULT;
15298  			break;
15299  		}
15300  
15301  		if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
15302  			verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
15303  				i, core_relo.insn_off, prog->len);
15304  			err = -EINVAL;
15305  			break;
15306  		}
15307  
15308  		err = bpf_core_apply(&ctx, &core_relo, i,
15309  				     &prog->insnsi[core_relo.insn_off / 8]);
15310  		if (err)
15311  			break;
15312  		bpfptr_add(&u_core_relo, rec_size);
15313  	}
15314  	return err;
15315  }
15316  
check_btf_info(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)15317  static int check_btf_info(struct bpf_verifier_env *env,
15318  			  const union bpf_attr *attr,
15319  			  bpfptr_t uattr)
15320  {
15321  	struct btf *btf;
15322  	int err;
15323  
15324  	if (!attr->func_info_cnt && !attr->line_info_cnt) {
15325  		if (check_abnormal_return(env))
15326  			return -EINVAL;
15327  		return 0;
15328  	}
15329  
15330  	btf = btf_get_by_fd(attr->prog_btf_fd);
15331  	if (IS_ERR(btf))
15332  		return PTR_ERR(btf);
15333  	if (btf_is_kernel(btf)) {
15334  		btf_put(btf);
15335  		return -EACCES;
15336  	}
15337  	env->prog->aux->btf = btf;
15338  
15339  	err = check_btf_func(env, attr, uattr);
15340  	if (err)
15341  		return err;
15342  
15343  	err = check_btf_line(env, attr, uattr);
15344  	if (err)
15345  		return err;
15346  
15347  	err = check_core_relo(env, attr, uattr);
15348  	if (err)
15349  		return err;
15350  
15351  	return 0;
15352  }
15353  
15354  /* check %cur's range satisfies %old's */
range_within(struct bpf_reg_state * old,struct bpf_reg_state * cur)15355  static bool range_within(struct bpf_reg_state *old,
15356  			 struct bpf_reg_state *cur)
15357  {
15358  	return old->umin_value <= cur->umin_value &&
15359  	       old->umax_value >= cur->umax_value &&
15360  	       old->smin_value <= cur->smin_value &&
15361  	       old->smax_value >= cur->smax_value &&
15362  	       old->u32_min_value <= cur->u32_min_value &&
15363  	       old->u32_max_value >= cur->u32_max_value &&
15364  	       old->s32_min_value <= cur->s32_min_value &&
15365  	       old->s32_max_value >= cur->s32_max_value;
15366  }
15367  
15368  /* If in the old state two registers had the same id, then they need to have
15369   * the same id in the new state as well.  But that id could be different from
15370   * the old state, so we need to track the mapping from old to new ids.
15371   * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
15372   * regs with old id 5 must also have new id 9 for the new state to be safe.  But
15373   * regs with a different old id could still have new id 9, we don't care about
15374   * that.
15375   * So we look through our idmap to see if this old id has been seen before.  If
15376   * so, we require the new id to match; otherwise, we add the id pair to the map.
15377   */
check_ids(u32 old_id,u32 cur_id,struct bpf_idmap * idmap)15378  static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
15379  {
15380  	struct bpf_id_pair *map = idmap->map;
15381  	unsigned int i;
15382  
15383  	/* either both IDs should be set or both should be zero */
15384  	if (!!old_id != !!cur_id)
15385  		return false;
15386  
15387  	if (old_id == 0) /* cur_id == 0 as well */
15388  		return true;
15389  
15390  	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
15391  		if (!map[i].old) {
15392  			/* Reached an empty slot; haven't seen this id before */
15393  			map[i].old = old_id;
15394  			map[i].cur = cur_id;
15395  			return true;
15396  		}
15397  		if (map[i].old == old_id)
15398  			return map[i].cur == cur_id;
15399  		if (map[i].cur == cur_id)
15400  			return false;
15401  	}
15402  	/* We ran out of idmap slots, which should be impossible */
15403  	WARN_ON_ONCE(1);
15404  	return false;
15405  }
15406  
15407  /* Similar to check_ids(), but allocate a unique temporary ID
15408   * for 'old_id' or 'cur_id' of zero.
15409   * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
15410   */
check_scalar_ids(u32 old_id,u32 cur_id,struct bpf_idmap * idmap)15411  static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
15412  {
15413  	old_id = old_id ? old_id : ++idmap->tmp_id_gen;
15414  	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
15415  
15416  	return check_ids(old_id, cur_id, idmap);
15417  }
15418  
clean_func_state(struct bpf_verifier_env * env,struct bpf_func_state * st)15419  static void clean_func_state(struct bpf_verifier_env *env,
15420  			     struct bpf_func_state *st)
15421  {
15422  	enum bpf_reg_liveness live;
15423  	int i, j;
15424  
15425  	for (i = 0; i < BPF_REG_FP; i++) {
15426  		live = st->regs[i].live;
15427  		/* liveness must not touch this register anymore */
15428  		st->regs[i].live |= REG_LIVE_DONE;
15429  		if (!(live & REG_LIVE_READ))
15430  			/* since the register is unused, clear its state
15431  			 * to make further comparison simpler
15432  			 */
15433  			__mark_reg_not_init(env, &st->regs[i]);
15434  	}
15435  
15436  	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
15437  		live = st->stack[i].spilled_ptr.live;
15438  		/* liveness must not touch this stack slot anymore */
15439  		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
15440  		if (!(live & REG_LIVE_READ)) {
15441  			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
15442  			for (j = 0; j < BPF_REG_SIZE; j++)
15443  				st->stack[i].slot_type[j] = STACK_INVALID;
15444  		}
15445  	}
15446  }
15447  
clean_verifier_state(struct bpf_verifier_env * env,struct bpf_verifier_state * st)15448  static void clean_verifier_state(struct bpf_verifier_env *env,
15449  				 struct bpf_verifier_state *st)
15450  {
15451  	int i;
15452  
15453  	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
15454  		/* all regs in this state in all frames were already marked */
15455  		return;
15456  
15457  	for (i = 0; i <= st->curframe; i++)
15458  		clean_func_state(env, st->frame[i]);
15459  }
15460  
15461  /* the parentage chains form a tree.
15462   * the verifier states are added to state lists at given insn and
15463   * pushed into state stack for future exploration.
15464   * when the verifier reaches bpf_exit insn some of the verifer states
15465   * stored in the state lists have their final liveness state already,
15466   * but a lot of states will get revised from liveness point of view when
15467   * the verifier explores other branches.
15468   * Example:
15469   * 1: r0 = 1
15470   * 2: if r1 == 100 goto pc+1
15471   * 3: r0 = 2
15472   * 4: exit
15473   * when the verifier reaches exit insn the register r0 in the state list of
15474   * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
15475   * of insn 2 and goes exploring further. At the insn 4 it will walk the
15476   * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
15477   *
15478   * Since the verifier pushes the branch states as it sees them while exploring
15479   * the program the condition of walking the branch instruction for the second
15480   * time means that all states below this branch were already explored and
15481   * their final liveness marks are already propagated.
15482   * Hence when the verifier completes the search of state list in is_state_visited()
15483   * we can call this clean_live_states() function to mark all liveness states
15484   * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
15485   * will not be used.
15486   * This function also clears the registers and stack for states that !READ
15487   * to simplify state merging.
15488   *
15489   * Important note here that walking the same branch instruction in the callee
15490   * doesn't meant that the states are DONE. The verifier has to compare
15491   * the callsites
15492   */
clean_live_states(struct bpf_verifier_env * env,int insn,struct bpf_verifier_state * cur)15493  static void clean_live_states(struct bpf_verifier_env *env, int insn,
15494  			      struct bpf_verifier_state *cur)
15495  {
15496  	struct bpf_verifier_state_list *sl;
15497  	int i;
15498  
15499  	sl = *explored_state(env, insn);
15500  	while (sl) {
15501  		if (sl->state.branches)
15502  			goto next;
15503  		if (sl->state.insn_idx != insn ||
15504  		    sl->state.curframe != cur->curframe)
15505  			goto next;
15506  		for (i = 0; i <= cur->curframe; i++)
15507  			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
15508  				goto next;
15509  		clean_verifier_state(env, &sl->state);
15510  next:
15511  		sl = sl->next;
15512  	}
15513  }
15514  
regs_exact(const struct bpf_reg_state * rold,const struct bpf_reg_state * rcur,struct bpf_idmap * idmap)15515  static bool regs_exact(const struct bpf_reg_state *rold,
15516  		       const struct bpf_reg_state *rcur,
15517  		       struct bpf_idmap *idmap)
15518  {
15519  	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
15520  	       check_ids(rold->id, rcur->id, idmap) &&
15521  	       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
15522  }
15523  
15524  /* Returns true if (rold safe implies rcur safe) */
regsafe(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur,struct bpf_idmap * idmap)15525  static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
15526  		    struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
15527  {
15528  	if (!(rold->live & REG_LIVE_READ))
15529  		/* explored state didn't use this */
15530  		return true;
15531  	if (rold->type == NOT_INIT)
15532  		/* explored state can't have used this */
15533  		return true;
15534  	if (rcur->type == NOT_INIT)
15535  		return false;
15536  
15537  	/* Enforce that register types have to match exactly, including their
15538  	 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
15539  	 * rule.
15540  	 *
15541  	 * One can make a point that using a pointer register as unbounded
15542  	 * SCALAR would be technically acceptable, but this could lead to
15543  	 * pointer leaks because scalars are allowed to leak while pointers
15544  	 * are not. We could make this safe in special cases if root is
15545  	 * calling us, but it's probably not worth the hassle.
15546  	 *
15547  	 * Also, register types that are *not* MAYBE_NULL could technically be
15548  	 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
15549  	 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
15550  	 * to the same map).
15551  	 * However, if the old MAYBE_NULL register then got NULL checked,
15552  	 * doing so could have affected others with the same id, and we can't
15553  	 * check for that because we lost the id when we converted to
15554  	 * a non-MAYBE_NULL variant.
15555  	 * So, as a general rule we don't allow mixing MAYBE_NULL and
15556  	 * non-MAYBE_NULL registers as well.
15557  	 */
15558  	if (rold->type != rcur->type)
15559  		return false;
15560  
15561  	switch (base_type(rold->type)) {
15562  	case SCALAR_VALUE:
15563  		if (env->explore_alu_limits) {
15564  			/* explore_alu_limits disables tnum_in() and range_within()
15565  			 * logic and requires everything to be strict
15566  			 */
15567  			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
15568  			       check_scalar_ids(rold->id, rcur->id, idmap);
15569  		}
15570  		if (!rold->precise)
15571  			return true;
15572  		/* Why check_ids() for scalar registers?
15573  		 *
15574  		 * Consider the following BPF code:
15575  		 *   1: r6 = ... unbound scalar, ID=a ...
15576  		 *   2: r7 = ... unbound scalar, ID=b ...
15577  		 *   3: if (r6 > r7) goto +1
15578  		 *   4: r6 = r7
15579  		 *   5: if (r6 > X) goto ...
15580  		 *   6: ... memory operation using r7 ...
15581  		 *
15582  		 * First verification path is [1-6]:
15583  		 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
15584  		 * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
15585  		 *   r7 <= X, because r6 and r7 share same id.
15586  		 * Next verification path is [1-4, 6].
15587  		 *
15588  		 * Instruction (6) would be reached in two states:
15589  		 *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
15590  		 *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
15591  		 *
15592  		 * Use check_ids() to distinguish these states.
15593  		 * ---
15594  		 * Also verify that new value satisfies old value range knowledge.
15595  		 */
15596  		return range_within(rold, rcur) &&
15597  		       tnum_in(rold->var_off, rcur->var_off) &&
15598  		       check_scalar_ids(rold->id, rcur->id, idmap);
15599  	case PTR_TO_MAP_KEY:
15600  	case PTR_TO_MAP_VALUE:
15601  	case PTR_TO_MEM:
15602  	case PTR_TO_BUF:
15603  	case PTR_TO_TP_BUFFER:
15604  		/* If the new min/max/var_off satisfy the old ones and
15605  		 * everything else matches, we are OK.
15606  		 */
15607  		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
15608  		       range_within(rold, rcur) &&
15609  		       tnum_in(rold->var_off, rcur->var_off) &&
15610  		       check_ids(rold->id, rcur->id, idmap) &&
15611  		       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
15612  	case PTR_TO_PACKET_META:
15613  	case PTR_TO_PACKET:
15614  		/* We must have at least as much range as the old ptr
15615  		 * did, so that any accesses which were safe before are
15616  		 * still safe.  This is true even if old range < old off,
15617  		 * since someone could have accessed through (ptr - k), or
15618  		 * even done ptr -= k in a register, to get a safe access.
15619  		 */
15620  		if (rold->range > rcur->range)
15621  			return false;
15622  		/* If the offsets don't match, we can't trust our alignment;
15623  		 * nor can we be sure that we won't fall out of range.
15624  		 */
15625  		if (rold->off != rcur->off)
15626  			return false;
15627  		/* id relations must be preserved */
15628  		if (!check_ids(rold->id, rcur->id, idmap))
15629  			return false;
15630  		/* new val must satisfy old val knowledge */
15631  		return range_within(rold, rcur) &&
15632  		       tnum_in(rold->var_off, rcur->var_off);
15633  	case PTR_TO_STACK:
15634  		/* two stack pointers are equal only if they're pointing to
15635  		 * the same stack frame, since fp-8 in foo != fp-8 in bar
15636  		 */
15637  		return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
15638  	default:
15639  		return regs_exact(rold, rcur, idmap);
15640  	}
15641  }
15642  
stacksafe(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur,struct bpf_idmap * idmap)15643  static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
15644  		      struct bpf_func_state *cur, struct bpf_idmap *idmap)
15645  {
15646  	int i, spi;
15647  
15648  	/* walk slots of the explored stack and ignore any additional
15649  	 * slots in the current stack, since explored(safe) state
15650  	 * didn't use them
15651  	 */
15652  	for (i = 0; i < old->allocated_stack; i++) {
15653  		struct bpf_reg_state *old_reg, *cur_reg;
15654  
15655  		spi = i / BPF_REG_SIZE;
15656  
15657  		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
15658  			i += BPF_REG_SIZE - 1;
15659  			/* explored state didn't use this */
15660  			continue;
15661  		}
15662  
15663  		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
15664  			continue;
15665  
15666  		if (env->allow_uninit_stack &&
15667  		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
15668  			continue;
15669  
15670  		/* explored stack has more populated slots than current stack
15671  		 * and these slots were used
15672  		 */
15673  		if (i >= cur->allocated_stack)
15674  			return false;
15675  
15676  		/* if old state was safe with misc data in the stack
15677  		 * it will be safe with zero-initialized stack.
15678  		 * The opposite is not true
15679  		 */
15680  		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
15681  		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
15682  			continue;
15683  		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
15684  		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
15685  			/* Ex: old explored (safe) state has STACK_SPILL in
15686  			 * this stack slot, but current has STACK_MISC ->
15687  			 * this verifier states are not equivalent,
15688  			 * return false to continue verification of this path
15689  			 */
15690  			return false;
15691  		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
15692  			continue;
15693  		/* Both old and cur are having same slot_type */
15694  		switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
15695  		case STACK_SPILL:
15696  			/* when explored and current stack slot are both storing
15697  			 * spilled registers, check that stored pointers types
15698  			 * are the same as well.
15699  			 * Ex: explored safe path could have stored
15700  			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
15701  			 * but current path has stored:
15702  			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
15703  			 * such verifier states are not equivalent.
15704  			 * return false to continue verification of this path
15705  			 */
15706  			if (!regsafe(env, &old->stack[spi].spilled_ptr,
15707  				     &cur->stack[spi].spilled_ptr, idmap))
15708  				return false;
15709  			break;
15710  		case STACK_DYNPTR:
15711  			old_reg = &old->stack[spi].spilled_ptr;
15712  			cur_reg = &cur->stack[spi].spilled_ptr;
15713  			if (old_reg->dynptr.type != cur_reg->dynptr.type ||
15714  			    old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
15715  			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
15716  				return false;
15717  			break;
15718  		case STACK_ITER:
15719  			old_reg = &old->stack[spi].spilled_ptr;
15720  			cur_reg = &cur->stack[spi].spilled_ptr;
15721  			/* iter.depth is not compared between states as it
15722  			 * doesn't matter for correctness and would otherwise
15723  			 * prevent convergence; we maintain it only to prevent
15724  			 * infinite loop check triggering, see
15725  			 * iter_active_depths_differ()
15726  			 */
15727  			if (old_reg->iter.btf != cur_reg->iter.btf ||
15728  			    old_reg->iter.btf_id != cur_reg->iter.btf_id ||
15729  			    old_reg->iter.state != cur_reg->iter.state ||
15730  			    /* ignore {old_reg,cur_reg}->iter.depth, see above */
15731  			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
15732  				return false;
15733  			break;
15734  		case STACK_MISC:
15735  		case STACK_ZERO:
15736  		case STACK_INVALID:
15737  			continue;
15738  		/* Ensure that new unhandled slot types return false by default */
15739  		default:
15740  			return false;
15741  		}
15742  	}
15743  	return true;
15744  }
15745  
refsafe(struct bpf_func_state * old,struct bpf_func_state * cur,struct bpf_idmap * idmap)15746  static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
15747  		    struct bpf_idmap *idmap)
15748  {
15749  	int i;
15750  
15751  	if (old->acquired_refs != cur->acquired_refs)
15752  		return false;
15753  
15754  	for (i = 0; i < old->acquired_refs; i++) {
15755  		if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
15756  			return false;
15757  	}
15758  
15759  	return true;
15760  }
15761  
15762  /* compare two verifier states
15763   *
15764   * all states stored in state_list are known to be valid, since
15765   * verifier reached 'bpf_exit' instruction through them
15766   *
15767   * this function is called when verifier exploring different branches of
15768   * execution popped from the state stack. If it sees an old state that has
15769   * more strict register state and more strict stack state then this execution
15770   * branch doesn't need to be explored further, since verifier already
15771   * concluded that more strict state leads to valid finish.
15772   *
15773   * Therefore two states are equivalent if register state is more conservative
15774   * and explored stack state is more conservative than the current one.
15775   * Example:
15776   *       explored                   current
15777   * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
15778   * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
15779   *
15780   * In other words if current stack state (one being explored) has more
15781   * valid slots than old one that already passed validation, it means
15782   * the verifier can stop exploring and conclude that current state is valid too
15783   *
15784   * Similarly with registers. If explored state has register type as invalid
15785   * whereas register type in current state is meaningful, it means that
15786   * the current state will reach 'bpf_exit' instruction safely
15787   */
func_states_equal(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur)15788  static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
15789  			      struct bpf_func_state *cur)
15790  {
15791  	int i;
15792  
15793  	for (i = 0; i < MAX_BPF_REG; i++)
15794  		if (!regsafe(env, &old->regs[i], &cur->regs[i],
15795  			     &env->idmap_scratch))
15796  			return false;
15797  
15798  	if (!stacksafe(env, old, cur, &env->idmap_scratch))
15799  		return false;
15800  
15801  	if (!refsafe(old, cur, &env->idmap_scratch))
15802  		return false;
15803  
15804  	return true;
15805  }
15806  
states_equal(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur)15807  static bool states_equal(struct bpf_verifier_env *env,
15808  			 struct bpf_verifier_state *old,
15809  			 struct bpf_verifier_state *cur)
15810  {
15811  	int i;
15812  
15813  	if (old->curframe != cur->curframe)
15814  		return false;
15815  
15816  	env->idmap_scratch.tmp_id_gen = env->id_gen;
15817  	memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
15818  
15819  	/* Verification state from speculative execution simulation
15820  	 * must never prune a non-speculative execution one.
15821  	 */
15822  	if (old->speculative && !cur->speculative)
15823  		return false;
15824  
15825  	if (old->active_lock.ptr != cur->active_lock.ptr)
15826  		return false;
15827  
15828  	/* Old and cur active_lock's have to be either both present
15829  	 * or both absent.
15830  	 */
15831  	if (!!old->active_lock.id != !!cur->active_lock.id)
15832  		return false;
15833  
15834  	if (old->active_lock.id &&
15835  	    !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch))
15836  		return false;
15837  
15838  	if (old->active_rcu_lock != cur->active_rcu_lock)
15839  		return false;
15840  
15841  	/* for states to be equal callsites have to be the same
15842  	 * and all frame states need to be equivalent
15843  	 */
15844  	for (i = 0; i <= old->curframe; i++) {
15845  		if (old->frame[i]->callsite != cur->frame[i]->callsite)
15846  			return false;
15847  		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
15848  			return false;
15849  	}
15850  	return true;
15851  }
15852  
15853  /* Return 0 if no propagation happened. Return negative error code if error
15854   * happened. Otherwise, return the propagated bit.
15855   */
propagate_liveness_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct bpf_reg_state * parent_reg)15856  static int propagate_liveness_reg(struct bpf_verifier_env *env,
15857  				  struct bpf_reg_state *reg,
15858  				  struct bpf_reg_state *parent_reg)
15859  {
15860  	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
15861  	u8 flag = reg->live & REG_LIVE_READ;
15862  	int err;
15863  
15864  	/* When comes here, read flags of PARENT_REG or REG could be any of
15865  	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
15866  	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
15867  	 */
15868  	if (parent_flag == REG_LIVE_READ64 ||
15869  	    /* Or if there is no read flag from REG. */
15870  	    !flag ||
15871  	    /* Or if the read flag from REG is the same as PARENT_REG. */
15872  	    parent_flag == flag)
15873  		return 0;
15874  
15875  	err = mark_reg_read(env, reg, parent_reg, flag);
15876  	if (err)
15877  		return err;
15878  
15879  	return flag;
15880  }
15881  
15882  /* A write screens off any subsequent reads; but write marks come from the
15883   * straight-line code between a state and its parent.  When we arrive at an
15884   * equivalent state (jump target or such) we didn't arrive by the straight-line
15885   * code, so read marks in the state must propagate to the parent regardless
15886   * of the state's write marks. That's what 'parent == state->parent' comparison
15887   * in mark_reg_read() is for.
15888   */
propagate_liveness(struct bpf_verifier_env * env,const struct bpf_verifier_state * vstate,struct bpf_verifier_state * vparent)15889  static int propagate_liveness(struct bpf_verifier_env *env,
15890  			      const struct bpf_verifier_state *vstate,
15891  			      struct bpf_verifier_state *vparent)
15892  {
15893  	struct bpf_reg_state *state_reg, *parent_reg;
15894  	struct bpf_func_state *state, *parent;
15895  	int i, frame, err = 0;
15896  
15897  	if (vparent->curframe != vstate->curframe) {
15898  		WARN(1, "propagate_live: parent frame %d current frame %d\n",
15899  		     vparent->curframe, vstate->curframe);
15900  		return -EFAULT;
15901  	}
15902  	/* Propagate read liveness of registers... */
15903  	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
15904  	for (frame = 0; frame <= vstate->curframe; frame++) {
15905  		parent = vparent->frame[frame];
15906  		state = vstate->frame[frame];
15907  		parent_reg = parent->regs;
15908  		state_reg = state->regs;
15909  		/* We don't need to worry about FP liveness, it's read-only */
15910  		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
15911  			err = propagate_liveness_reg(env, &state_reg[i],
15912  						     &parent_reg[i]);
15913  			if (err < 0)
15914  				return err;
15915  			if (err == REG_LIVE_READ64)
15916  				mark_insn_zext(env, &parent_reg[i]);
15917  		}
15918  
15919  		/* Propagate stack slots. */
15920  		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
15921  			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
15922  			parent_reg = &parent->stack[i].spilled_ptr;
15923  			state_reg = &state->stack[i].spilled_ptr;
15924  			err = propagate_liveness_reg(env, state_reg,
15925  						     parent_reg);
15926  			if (err < 0)
15927  				return err;
15928  		}
15929  	}
15930  	return 0;
15931  }
15932  
15933  /* find precise scalars in the previous equivalent state and
15934   * propagate them into the current state
15935   */
propagate_precision(struct bpf_verifier_env * env,const struct bpf_verifier_state * old)15936  static int propagate_precision(struct bpf_verifier_env *env,
15937  			       const struct bpf_verifier_state *old)
15938  {
15939  	struct bpf_reg_state *state_reg;
15940  	struct bpf_func_state *state;
15941  	int i, err = 0, fr;
15942  	bool first;
15943  
15944  	for (fr = old->curframe; fr >= 0; fr--) {
15945  		state = old->frame[fr];
15946  		state_reg = state->regs;
15947  		first = true;
15948  		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
15949  			if (state_reg->type != SCALAR_VALUE ||
15950  			    !state_reg->precise ||
15951  			    !(state_reg->live & REG_LIVE_READ))
15952  				continue;
15953  			if (env->log.level & BPF_LOG_LEVEL2) {
15954  				if (first)
15955  					verbose(env, "frame %d: propagating r%d", fr, i);
15956  				else
15957  					verbose(env, ",r%d", i);
15958  			}
15959  			bt_set_frame_reg(&env->bt, fr, i);
15960  			first = false;
15961  		}
15962  
15963  		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
15964  			if (!is_spilled_reg(&state->stack[i]))
15965  				continue;
15966  			state_reg = &state->stack[i].spilled_ptr;
15967  			if (state_reg->type != SCALAR_VALUE ||
15968  			    !state_reg->precise ||
15969  			    !(state_reg->live & REG_LIVE_READ))
15970  				continue;
15971  			if (env->log.level & BPF_LOG_LEVEL2) {
15972  				if (first)
15973  					verbose(env, "frame %d: propagating fp%d",
15974  						fr, (-i - 1) * BPF_REG_SIZE);
15975  				else
15976  					verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
15977  			}
15978  			bt_set_frame_slot(&env->bt, fr, i);
15979  			first = false;
15980  		}
15981  		if (!first)
15982  			verbose(env, "\n");
15983  	}
15984  
15985  	err = mark_chain_precision_batch(env);
15986  	if (err < 0)
15987  		return err;
15988  
15989  	return 0;
15990  }
15991  
states_maybe_looping(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)15992  static bool states_maybe_looping(struct bpf_verifier_state *old,
15993  				 struct bpf_verifier_state *cur)
15994  {
15995  	struct bpf_func_state *fold, *fcur;
15996  	int i, fr = cur->curframe;
15997  
15998  	if (old->curframe != fr)
15999  		return false;
16000  
16001  	fold = old->frame[fr];
16002  	fcur = cur->frame[fr];
16003  	for (i = 0; i < MAX_BPF_REG; i++)
16004  		if (memcmp(&fold->regs[i], &fcur->regs[i],
16005  			   offsetof(struct bpf_reg_state, parent)))
16006  			return false;
16007  	return true;
16008  }
16009  
is_iter_next_insn(struct bpf_verifier_env * env,int insn_idx)16010  static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
16011  {
16012  	return env->insn_aux_data[insn_idx].is_iter_next;
16013  }
16014  
16015  /* is_state_visited() handles iter_next() (see process_iter_next_call() for
16016   * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
16017   * states to match, which otherwise would look like an infinite loop. So while
16018   * iter_next() calls are taken care of, we still need to be careful and
16019   * prevent erroneous and too eager declaration of "ininite loop", when
16020   * iterators are involved.
16021   *
16022   * Here's a situation in pseudo-BPF assembly form:
16023   *
16024   *   0: again:                          ; set up iter_next() call args
16025   *   1:   r1 = &it                      ; <CHECKPOINT HERE>
16026   *   2:   call bpf_iter_num_next        ; this is iter_next() call
16027   *   3:   if r0 == 0 goto done
16028   *   4:   ... something useful here ...
16029   *   5:   goto again                    ; another iteration
16030   *   6: done:
16031   *   7:   r1 = &it
16032   *   8:   call bpf_iter_num_destroy     ; clean up iter state
16033   *   9:   exit
16034   *
16035   * This is a typical loop. Let's assume that we have a prune point at 1:,
16036   * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
16037   * again`, assuming other heuristics don't get in a way).
16038   *
16039   * When we first time come to 1:, let's say we have some state X. We proceed
16040   * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
16041   * Now we come back to validate that forked ACTIVE state. We proceed through
16042   * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
16043   * are converging. But the problem is that we don't know that yet, as this
16044   * convergence has to happen at iter_next() call site only. So if nothing is
16045   * done, at 1: verifier will use bounded loop logic and declare infinite
16046   * looping (and would be *technically* correct, if not for iterator's
16047   * "eventual sticky NULL" contract, see process_iter_next_call()). But we
16048   * don't want that. So what we do in process_iter_next_call() when we go on
16049   * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
16050   * a different iteration. So when we suspect an infinite loop, we additionally
16051   * check if any of the *ACTIVE* iterator states depths differ. If yes, we
16052   * pretend we are not looping and wait for next iter_next() call.
16053   *
16054   * This only applies to ACTIVE state. In DRAINED state we don't expect to
16055   * loop, because that would actually mean infinite loop, as DRAINED state is
16056   * "sticky", and so we'll keep returning into the same instruction with the
16057   * same state (at least in one of possible code paths).
16058   *
16059   * This approach allows to keep infinite loop heuristic even in the face of
16060   * active iterator. E.g., C snippet below is and will be detected as
16061   * inifintely looping:
16062   *
16063   *   struct bpf_iter_num it;
16064   *   int *p, x;
16065   *
16066   *   bpf_iter_num_new(&it, 0, 10);
16067   *   while ((p = bpf_iter_num_next(&t))) {
16068   *       x = p;
16069   *       while (x--) {} // <<-- infinite loop here
16070   *   }
16071   *
16072   */
iter_active_depths_differ(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)16073  static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
16074  {
16075  	struct bpf_reg_state *slot, *cur_slot;
16076  	struct bpf_func_state *state;
16077  	int i, fr;
16078  
16079  	for (fr = old->curframe; fr >= 0; fr--) {
16080  		state = old->frame[fr];
16081  		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
16082  			if (state->stack[i].slot_type[0] != STACK_ITER)
16083  				continue;
16084  
16085  			slot = &state->stack[i].spilled_ptr;
16086  			if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
16087  				continue;
16088  
16089  			cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
16090  			if (cur_slot->iter.depth != slot->iter.depth)
16091  				return true;
16092  		}
16093  	}
16094  	return false;
16095  }
16096  
is_state_visited(struct bpf_verifier_env * env,int insn_idx)16097  static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
16098  {
16099  	struct bpf_verifier_state_list *new_sl;
16100  	struct bpf_verifier_state_list *sl, **pprev;
16101  	struct bpf_verifier_state *cur = env->cur_state, *new;
16102  	int i, j, err, states_cnt = 0;
16103  	bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
16104  	bool add_new_state = force_new_state;
16105  
16106  	/* bpf progs typically have pruning point every 4 instructions
16107  	 * http://vger.kernel.org/bpfconf2019.html#session-1
16108  	 * Do not add new state for future pruning if the verifier hasn't seen
16109  	 * at least 2 jumps and at least 8 instructions.
16110  	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
16111  	 * In tests that amounts to up to 50% reduction into total verifier
16112  	 * memory consumption and 20% verifier time speedup.
16113  	 */
16114  	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
16115  	    env->insn_processed - env->prev_insn_processed >= 8)
16116  		add_new_state = true;
16117  
16118  	pprev = explored_state(env, insn_idx);
16119  	sl = *pprev;
16120  
16121  	clean_live_states(env, insn_idx, cur);
16122  
16123  	while (sl) {
16124  		states_cnt++;
16125  		if (sl->state.insn_idx != insn_idx)
16126  			goto next;
16127  
16128  		if (sl->state.branches) {
16129  			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
16130  
16131  			if (frame->in_async_callback_fn &&
16132  			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
16133  				/* Different async_entry_cnt means that the verifier is
16134  				 * processing another entry into async callback.
16135  				 * Seeing the same state is not an indication of infinite
16136  				 * loop or infinite recursion.
16137  				 * But finding the same state doesn't mean that it's safe
16138  				 * to stop processing the current state. The previous state
16139  				 * hasn't yet reached bpf_exit, since state.branches > 0.
16140  				 * Checking in_async_callback_fn alone is not enough either.
16141  				 * Since the verifier still needs to catch infinite loops
16142  				 * inside async callbacks.
16143  				 */
16144  				goto skip_inf_loop_check;
16145  			}
16146  			/* BPF open-coded iterators loop detection is special.
16147  			 * states_maybe_looping() logic is too simplistic in detecting
16148  			 * states that *might* be equivalent, because it doesn't know
16149  			 * about ID remapping, so don't even perform it.
16150  			 * See process_iter_next_call() and iter_active_depths_differ()
16151  			 * for overview of the logic. When current and one of parent
16152  			 * states are detected as equivalent, it's a good thing: we prove
16153  			 * convergence and can stop simulating further iterations.
16154  			 * It's safe to assume that iterator loop will finish, taking into
16155  			 * account iter_next() contract of eventually returning
16156  			 * sticky NULL result.
16157  			 */
16158  			if (is_iter_next_insn(env, insn_idx)) {
16159  				if (states_equal(env, &sl->state, cur)) {
16160  					struct bpf_func_state *cur_frame;
16161  					struct bpf_reg_state *iter_state, *iter_reg;
16162  					int spi;
16163  
16164  					cur_frame = cur->frame[cur->curframe];
16165  					/* btf_check_iter_kfuncs() enforces that
16166  					 * iter state pointer is always the first arg
16167  					 */
16168  					iter_reg = &cur_frame->regs[BPF_REG_1];
16169  					/* current state is valid due to states_equal(),
16170  					 * so we can assume valid iter and reg state,
16171  					 * no need for extra (re-)validations
16172  					 */
16173  					spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
16174  					iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
16175  					if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE)
16176  						goto hit;
16177  				}
16178  				goto skip_inf_loop_check;
16179  			}
16180  			/* attempt to detect infinite loop to avoid unnecessary doomed work */
16181  			if (states_maybe_looping(&sl->state, cur) &&
16182  			    states_equal(env, &sl->state, cur) &&
16183  			    !iter_active_depths_differ(&sl->state, cur)) {
16184  				verbose_linfo(env, insn_idx, "; ");
16185  				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
16186  				return -EINVAL;
16187  			}
16188  			/* if the verifier is processing a loop, avoid adding new state
16189  			 * too often, since different loop iterations have distinct
16190  			 * states and may not help future pruning.
16191  			 * This threshold shouldn't be too low to make sure that
16192  			 * a loop with large bound will be rejected quickly.
16193  			 * The most abusive loop will be:
16194  			 * r1 += 1
16195  			 * if r1 < 1000000 goto pc-2
16196  			 * 1M insn_procssed limit / 100 == 10k peak states.
16197  			 * This threshold shouldn't be too high either, since states
16198  			 * at the end of the loop are likely to be useful in pruning.
16199  			 */
16200  skip_inf_loop_check:
16201  			if (!force_new_state &&
16202  			    env->jmps_processed - env->prev_jmps_processed < 20 &&
16203  			    env->insn_processed - env->prev_insn_processed < 100)
16204  				add_new_state = false;
16205  			goto miss;
16206  		}
16207  		if (states_equal(env, &sl->state, cur)) {
16208  hit:
16209  			sl->hit_cnt++;
16210  			/* reached equivalent register/stack state,
16211  			 * prune the search.
16212  			 * Registers read by the continuation are read by us.
16213  			 * If we have any write marks in env->cur_state, they
16214  			 * will prevent corresponding reads in the continuation
16215  			 * from reaching our parent (an explored_state).  Our
16216  			 * own state will get the read marks recorded, but
16217  			 * they'll be immediately forgotten as we're pruning
16218  			 * this state and will pop a new one.
16219  			 */
16220  			err = propagate_liveness(env, &sl->state, cur);
16221  
16222  			/* if previous state reached the exit with precision and
16223  			 * current state is equivalent to it (except precsion marks)
16224  			 * the precision needs to be propagated back in
16225  			 * the current state.
16226  			 */
16227  			err = err ? : push_jmp_history(env, cur);
16228  			err = err ? : propagate_precision(env, &sl->state);
16229  			if (err)
16230  				return err;
16231  			return 1;
16232  		}
16233  miss:
16234  		/* when new state is not going to be added do not increase miss count.
16235  		 * Otherwise several loop iterations will remove the state
16236  		 * recorded earlier. The goal of these heuristics is to have
16237  		 * states from some iterations of the loop (some in the beginning
16238  		 * and some at the end) to help pruning.
16239  		 */
16240  		if (add_new_state)
16241  			sl->miss_cnt++;
16242  		/* heuristic to determine whether this state is beneficial
16243  		 * to keep checking from state equivalence point of view.
16244  		 * Higher numbers increase max_states_per_insn and verification time,
16245  		 * but do not meaningfully decrease insn_processed.
16246  		 */
16247  		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
16248  			/* the state is unlikely to be useful. Remove it to
16249  			 * speed up verification
16250  			 */
16251  			*pprev = sl->next;
16252  			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
16253  				u32 br = sl->state.branches;
16254  
16255  				WARN_ONCE(br,
16256  					  "BUG live_done but branches_to_explore %d\n",
16257  					  br);
16258  				free_verifier_state(&sl->state, false);
16259  				kfree(sl);
16260  				env->peak_states--;
16261  			} else {
16262  				/* cannot free this state, since parentage chain may
16263  				 * walk it later. Add it for free_list instead to
16264  				 * be freed at the end of verification
16265  				 */
16266  				sl->next = env->free_list;
16267  				env->free_list = sl;
16268  			}
16269  			sl = *pprev;
16270  			continue;
16271  		}
16272  next:
16273  		pprev = &sl->next;
16274  		sl = *pprev;
16275  	}
16276  
16277  	if (env->max_states_per_insn < states_cnt)
16278  		env->max_states_per_insn = states_cnt;
16279  
16280  	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
16281  		return 0;
16282  
16283  	if (!add_new_state)
16284  		return 0;
16285  
16286  	/* There were no equivalent states, remember the current one.
16287  	 * Technically the current state is not proven to be safe yet,
16288  	 * but it will either reach outer most bpf_exit (which means it's safe)
16289  	 * or it will be rejected. When there are no loops the verifier won't be
16290  	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
16291  	 * again on the way to bpf_exit.
16292  	 * When looping the sl->state.branches will be > 0 and this state
16293  	 * will not be considered for equivalence until branches == 0.
16294  	 */
16295  	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
16296  	if (!new_sl)
16297  		return -ENOMEM;
16298  	env->total_states++;
16299  	env->peak_states++;
16300  	env->prev_jmps_processed = env->jmps_processed;
16301  	env->prev_insn_processed = env->insn_processed;
16302  
16303  	/* forget precise markings we inherited, see __mark_chain_precision */
16304  	if (env->bpf_capable)
16305  		mark_all_scalars_imprecise(env, cur);
16306  
16307  	/* add new state to the head of linked list */
16308  	new = &new_sl->state;
16309  	err = copy_verifier_state(new, cur);
16310  	if (err) {
16311  		free_verifier_state(new, false);
16312  		kfree(new_sl);
16313  		return err;
16314  	}
16315  	new->insn_idx = insn_idx;
16316  	WARN_ONCE(new->branches != 1,
16317  		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
16318  
16319  	cur->parent = new;
16320  	cur->first_insn_idx = insn_idx;
16321  	clear_jmp_history(cur);
16322  	new_sl->next = *explored_state(env, insn_idx);
16323  	*explored_state(env, insn_idx) = new_sl;
16324  	/* connect new state to parentage chain. Current frame needs all
16325  	 * registers connected. Only r6 - r9 of the callers are alive (pushed
16326  	 * to the stack implicitly by JITs) so in callers' frames connect just
16327  	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
16328  	 * the state of the call instruction (with WRITTEN set), and r0 comes
16329  	 * from callee with its full parentage chain, anyway.
16330  	 */
16331  	/* clear write marks in current state: the writes we did are not writes
16332  	 * our child did, so they don't screen off its reads from us.
16333  	 * (There are no read marks in current state, because reads always mark
16334  	 * their parent and current state never has children yet.  Only
16335  	 * explored_states can get read marks.)
16336  	 */
16337  	for (j = 0; j <= cur->curframe; j++) {
16338  		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
16339  			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
16340  		for (i = 0; i < BPF_REG_FP; i++)
16341  			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
16342  	}
16343  
16344  	/* all stack frames are accessible from callee, clear them all */
16345  	for (j = 0; j <= cur->curframe; j++) {
16346  		struct bpf_func_state *frame = cur->frame[j];
16347  		struct bpf_func_state *newframe = new->frame[j];
16348  
16349  		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
16350  			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
16351  			frame->stack[i].spilled_ptr.parent =
16352  						&newframe->stack[i].spilled_ptr;
16353  		}
16354  	}
16355  	return 0;
16356  }
16357  
16358  /* Return true if it's OK to have the same insn return a different type. */
reg_type_mismatch_ok(enum bpf_reg_type type)16359  static bool reg_type_mismatch_ok(enum bpf_reg_type type)
16360  {
16361  	switch (base_type(type)) {
16362  	case PTR_TO_CTX:
16363  	case PTR_TO_SOCKET:
16364  	case PTR_TO_SOCK_COMMON:
16365  	case PTR_TO_TCP_SOCK:
16366  	case PTR_TO_XDP_SOCK:
16367  	case PTR_TO_BTF_ID:
16368  		return false;
16369  	default:
16370  		return true;
16371  	}
16372  }
16373  
16374  /* If an instruction was previously used with particular pointer types, then we
16375   * need to be careful to avoid cases such as the below, where it may be ok
16376   * for one branch accessing the pointer, but not ok for the other branch:
16377   *
16378   * R1 = sock_ptr
16379   * goto X;
16380   * ...
16381   * R1 = some_other_valid_ptr;
16382   * goto X;
16383   * ...
16384   * R2 = *(u32 *)(R1 + 0);
16385   */
reg_type_mismatch(enum bpf_reg_type src,enum bpf_reg_type prev)16386  static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
16387  {
16388  	return src != prev && (!reg_type_mismatch_ok(src) ||
16389  			       !reg_type_mismatch_ok(prev));
16390  }
16391  
save_aux_ptr_type(struct bpf_verifier_env * env,enum bpf_reg_type type,bool allow_trust_missmatch)16392  static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
16393  			     bool allow_trust_missmatch)
16394  {
16395  	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
16396  
16397  	if (*prev_type == NOT_INIT) {
16398  		/* Saw a valid insn
16399  		 * dst_reg = *(u32 *)(src_reg + off)
16400  		 * save type to validate intersecting paths
16401  		 */
16402  		*prev_type = type;
16403  	} else if (reg_type_mismatch(type, *prev_type)) {
16404  		/* Abuser program is trying to use the same insn
16405  		 * dst_reg = *(u32*) (src_reg + off)
16406  		 * with different pointer types:
16407  		 * src_reg == ctx in one branch and
16408  		 * src_reg == stack|map in some other branch.
16409  		 * Reject it.
16410  		 */
16411  		if (allow_trust_missmatch &&
16412  		    base_type(type) == PTR_TO_BTF_ID &&
16413  		    base_type(*prev_type) == PTR_TO_BTF_ID) {
16414  			/*
16415  			 * Have to support a use case when one path through
16416  			 * the program yields TRUSTED pointer while another
16417  			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
16418  			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
16419  			 */
16420  			*prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
16421  		} else {
16422  			verbose(env, "same insn cannot be used with different pointers\n");
16423  			return -EINVAL;
16424  		}
16425  	}
16426  
16427  	return 0;
16428  }
16429  
do_check(struct bpf_verifier_env * env)16430  static int do_check(struct bpf_verifier_env *env)
16431  {
16432  	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
16433  	struct bpf_verifier_state *state = env->cur_state;
16434  	struct bpf_insn *insns = env->prog->insnsi;
16435  	struct bpf_reg_state *regs;
16436  	int insn_cnt = env->prog->len;
16437  	bool do_print_state = false;
16438  	int prev_insn_idx = -1;
16439  
16440  	for (;;) {
16441  		struct bpf_insn *insn;
16442  		u8 class;
16443  		int err;
16444  
16445  		env->prev_insn_idx = prev_insn_idx;
16446  		if (env->insn_idx >= insn_cnt) {
16447  			verbose(env, "invalid insn idx %d insn_cnt %d\n",
16448  				env->insn_idx, insn_cnt);
16449  			return -EFAULT;
16450  		}
16451  
16452  		insn = &insns[env->insn_idx];
16453  		class = BPF_CLASS(insn->code);
16454  
16455  		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
16456  			verbose(env,
16457  				"BPF program is too large. Processed %d insn\n",
16458  				env->insn_processed);
16459  			return -E2BIG;
16460  		}
16461  
16462  		state->last_insn_idx = env->prev_insn_idx;
16463  
16464  		if (is_prune_point(env, env->insn_idx)) {
16465  			err = is_state_visited(env, env->insn_idx);
16466  			if (err < 0)
16467  				return err;
16468  			if (err == 1) {
16469  				/* found equivalent state, can prune the search */
16470  				if (env->log.level & BPF_LOG_LEVEL) {
16471  					if (do_print_state)
16472  						verbose(env, "\nfrom %d to %d%s: safe\n",
16473  							env->prev_insn_idx, env->insn_idx,
16474  							env->cur_state->speculative ?
16475  							" (speculative execution)" : "");
16476  					else
16477  						verbose(env, "%d: safe\n", env->insn_idx);
16478  				}
16479  				goto process_bpf_exit;
16480  			}
16481  		}
16482  
16483  		if (is_jmp_point(env, env->insn_idx)) {
16484  			err = push_jmp_history(env, state);
16485  			if (err)
16486  				return err;
16487  		}
16488  
16489  		if (signal_pending(current))
16490  			return -EAGAIN;
16491  
16492  		if (need_resched())
16493  			cond_resched();
16494  
16495  		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
16496  			verbose(env, "\nfrom %d to %d%s:",
16497  				env->prev_insn_idx, env->insn_idx,
16498  				env->cur_state->speculative ?
16499  				" (speculative execution)" : "");
16500  			print_verifier_state(env, state->frame[state->curframe], true);
16501  			do_print_state = false;
16502  		}
16503  
16504  		if (env->log.level & BPF_LOG_LEVEL) {
16505  			const struct bpf_insn_cbs cbs = {
16506  				.cb_call	= disasm_kfunc_name,
16507  				.cb_print	= verbose,
16508  				.private_data	= env,
16509  			};
16510  
16511  			if (verifier_state_scratched(env))
16512  				print_insn_state(env, state->frame[state->curframe]);
16513  
16514  			verbose_linfo(env, env->insn_idx, "; ");
16515  			env->prev_log_pos = env->log.end_pos;
16516  			verbose(env, "%d: ", env->insn_idx);
16517  			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
16518  			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
16519  			env->prev_log_pos = env->log.end_pos;
16520  		}
16521  
16522  		if (bpf_prog_is_offloaded(env->prog->aux)) {
16523  			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
16524  							   env->prev_insn_idx);
16525  			if (err)
16526  				return err;
16527  		}
16528  
16529  		regs = cur_regs(env);
16530  		sanitize_mark_insn_seen(env);
16531  		prev_insn_idx = env->insn_idx;
16532  
16533  		if (class == BPF_ALU || class == BPF_ALU64) {
16534  			err = check_alu_op(env, insn);
16535  			if (err)
16536  				return err;
16537  
16538  		} else if (class == BPF_LDX) {
16539  			enum bpf_reg_type src_reg_type;
16540  
16541  			/* check for reserved fields is already done */
16542  
16543  			/* check src operand */
16544  			err = check_reg_arg(env, insn->src_reg, SRC_OP);
16545  			if (err)
16546  				return err;
16547  
16548  			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
16549  			if (err)
16550  				return err;
16551  
16552  			src_reg_type = regs[insn->src_reg].type;
16553  
16554  			/* check that memory (src_reg + off) is readable,
16555  			 * the state of dst_reg will be updated by this func
16556  			 */
16557  			err = check_mem_access(env, env->insn_idx, insn->src_reg,
16558  					       insn->off, BPF_SIZE(insn->code),
16559  					       BPF_READ, insn->dst_reg, false,
16560  					       BPF_MODE(insn->code) == BPF_MEMSX);
16561  			if (err)
16562  				return err;
16563  
16564  			err = save_aux_ptr_type(env, src_reg_type, true);
16565  			if (err)
16566  				return err;
16567  		} else if (class == BPF_STX) {
16568  			enum bpf_reg_type dst_reg_type;
16569  
16570  			if (BPF_MODE(insn->code) == BPF_ATOMIC) {
16571  				err = check_atomic(env, env->insn_idx, insn);
16572  				if (err)
16573  					return err;
16574  				env->insn_idx++;
16575  				continue;
16576  			}
16577  
16578  			if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
16579  				verbose(env, "BPF_STX uses reserved fields\n");
16580  				return -EINVAL;
16581  			}
16582  
16583  			/* check src1 operand */
16584  			err = check_reg_arg(env, insn->src_reg, SRC_OP);
16585  			if (err)
16586  				return err;
16587  			/* check src2 operand */
16588  			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16589  			if (err)
16590  				return err;
16591  
16592  			dst_reg_type = regs[insn->dst_reg].type;
16593  
16594  			/* check that memory (dst_reg + off) is writeable */
16595  			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
16596  					       insn->off, BPF_SIZE(insn->code),
16597  					       BPF_WRITE, insn->src_reg, false, false);
16598  			if (err)
16599  				return err;
16600  
16601  			err = save_aux_ptr_type(env, dst_reg_type, false);
16602  			if (err)
16603  				return err;
16604  		} else if (class == BPF_ST) {
16605  			enum bpf_reg_type dst_reg_type;
16606  
16607  			if (BPF_MODE(insn->code) != BPF_MEM ||
16608  			    insn->src_reg != BPF_REG_0) {
16609  				verbose(env, "BPF_ST uses reserved fields\n");
16610  				return -EINVAL;
16611  			}
16612  			/* check src operand */
16613  			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16614  			if (err)
16615  				return err;
16616  
16617  			dst_reg_type = regs[insn->dst_reg].type;
16618  
16619  			/* check that memory (dst_reg + off) is writeable */
16620  			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
16621  					       insn->off, BPF_SIZE(insn->code),
16622  					       BPF_WRITE, -1, false, false);
16623  			if (err)
16624  				return err;
16625  
16626  			err = save_aux_ptr_type(env, dst_reg_type, false);
16627  			if (err)
16628  				return err;
16629  		} else if (class == BPF_JMP || class == BPF_JMP32) {
16630  			u8 opcode = BPF_OP(insn->code);
16631  
16632  			env->jmps_processed++;
16633  			if (opcode == BPF_CALL) {
16634  				if (BPF_SRC(insn->code) != BPF_K ||
16635  				    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
16636  				     && insn->off != 0) ||
16637  				    (insn->src_reg != BPF_REG_0 &&
16638  				     insn->src_reg != BPF_PSEUDO_CALL &&
16639  				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
16640  				    insn->dst_reg != BPF_REG_0 ||
16641  				    class == BPF_JMP32) {
16642  					verbose(env, "BPF_CALL uses reserved fields\n");
16643  					return -EINVAL;
16644  				}
16645  
16646  				if (env->cur_state->active_lock.ptr) {
16647  					if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
16648  					    (insn->src_reg == BPF_PSEUDO_CALL) ||
16649  					    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
16650  					     (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
16651  						verbose(env, "function calls are not allowed while holding a lock\n");
16652  						return -EINVAL;
16653  					}
16654  				}
16655  				if (insn->src_reg == BPF_PSEUDO_CALL)
16656  					err = check_func_call(env, insn, &env->insn_idx);
16657  				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
16658  					err = check_kfunc_call(env, insn, &env->insn_idx);
16659  				else
16660  					err = check_helper_call(env, insn, &env->insn_idx);
16661  				if (err)
16662  					return err;
16663  
16664  				mark_reg_scratched(env, BPF_REG_0);
16665  			} else if (opcode == BPF_JA) {
16666  				if (BPF_SRC(insn->code) != BPF_K ||
16667  				    insn->src_reg != BPF_REG_0 ||
16668  				    insn->dst_reg != BPF_REG_0 ||
16669  				    (class == BPF_JMP && insn->imm != 0) ||
16670  				    (class == BPF_JMP32 && insn->off != 0)) {
16671  					verbose(env, "BPF_JA uses reserved fields\n");
16672  					return -EINVAL;
16673  				}
16674  
16675  				if (class == BPF_JMP)
16676  					env->insn_idx += insn->off + 1;
16677  				else
16678  					env->insn_idx += insn->imm + 1;
16679  				continue;
16680  
16681  			} else if (opcode == BPF_EXIT) {
16682  				if (BPF_SRC(insn->code) != BPF_K ||
16683  				    insn->imm != 0 ||
16684  				    insn->src_reg != BPF_REG_0 ||
16685  				    insn->dst_reg != BPF_REG_0 ||
16686  				    class == BPF_JMP32) {
16687  					verbose(env, "BPF_EXIT uses reserved fields\n");
16688  					return -EINVAL;
16689  				}
16690  
16691  				if (env->cur_state->active_lock.ptr &&
16692  				    !in_rbtree_lock_required_cb(env)) {
16693  					verbose(env, "bpf_spin_unlock is missing\n");
16694  					return -EINVAL;
16695  				}
16696  
16697  				if (env->cur_state->active_rcu_lock &&
16698  				    !in_rbtree_lock_required_cb(env)) {
16699  					verbose(env, "bpf_rcu_read_unlock is missing\n");
16700  					return -EINVAL;
16701  				}
16702  
16703  				/* We must do check_reference_leak here before
16704  				 * prepare_func_exit to handle the case when
16705  				 * state->curframe > 0, it may be a callback
16706  				 * function, for which reference_state must
16707  				 * match caller reference state when it exits.
16708  				 */
16709  				err = check_reference_leak(env);
16710  				if (err)
16711  					return err;
16712  
16713  				if (state->curframe) {
16714  					/* exit from nested function */
16715  					err = prepare_func_exit(env, &env->insn_idx);
16716  					if (err)
16717  						return err;
16718  					do_print_state = true;
16719  					continue;
16720  				}
16721  
16722  				err = check_return_code(env);
16723  				if (err)
16724  					return err;
16725  process_bpf_exit:
16726  				mark_verifier_state_scratched(env);
16727  				update_branch_counts(env, env->cur_state);
16728  				err = pop_stack(env, &prev_insn_idx,
16729  						&env->insn_idx, pop_log);
16730  				if (err < 0) {
16731  					if (err != -ENOENT)
16732  						return err;
16733  					break;
16734  				} else {
16735  					do_print_state = true;
16736  					continue;
16737  				}
16738  			} else {
16739  				err = check_cond_jmp_op(env, insn, &env->insn_idx);
16740  				if (err)
16741  					return err;
16742  			}
16743  		} else if (class == BPF_LD) {
16744  			u8 mode = BPF_MODE(insn->code);
16745  
16746  			if (mode == BPF_ABS || mode == BPF_IND) {
16747  				err = check_ld_abs(env, insn);
16748  				if (err)
16749  					return err;
16750  
16751  			} else if (mode == BPF_IMM) {
16752  				err = check_ld_imm(env, insn);
16753  				if (err)
16754  					return err;
16755  
16756  				env->insn_idx++;
16757  				sanitize_mark_insn_seen(env);
16758  			} else {
16759  				verbose(env, "invalid BPF_LD mode\n");
16760  				return -EINVAL;
16761  			}
16762  		} else {
16763  			verbose(env, "unknown insn class %d\n", class);
16764  			return -EINVAL;
16765  		}
16766  
16767  		env->insn_idx++;
16768  	}
16769  
16770  	return 0;
16771  }
16772  
find_btf_percpu_datasec(struct btf * btf)16773  static int find_btf_percpu_datasec(struct btf *btf)
16774  {
16775  	const struct btf_type *t;
16776  	const char *tname;
16777  	int i, n;
16778  
16779  	/*
16780  	 * Both vmlinux and module each have their own ".data..percpu"
16781  	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
16782  	 * types to look at only module's own BTF types.
16783  	 */
16784  	n = btf_nr_types(btf);
16785  	if (btf_is_module(btf))
16786  		i = btf_nr_types(btf_vmlinux);
16787  	else
16788  		i = 1;
16789  
16790  	for(; i < n; i++) {
16791  		t = btf_type_by_id(btf, i);
16792  		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
16793  			continue;
16794  
16795  		tname = btf_name_by_offset(btf, t->name_off);
16796  		if (!strcmp(tname, ".data..percpu"))
16797  			return i;
16798  	}
16799  
16800  	return -ENOENT;
16801  }
16802  
16803  /* replace pseudo btf_id with kernel symbol address */
check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux)16804  static int check_pseudo_btf_id(struct bpf_verifier_env *env,
16805  			       struct bpf_insn *insn,
16806  			       struct bpf_insn_aux_data *aux)
16807  {
16808  	const struct btf_var_secinfo *vsi;
16809  	const struct btf_type *datasec;
16810  	struct btf_mod_pair *btf_mod;
16811  	const struct btf_type *t;
16812  	const char *sym_name;
16813  	bool percpu = false;
16814  	u32 type, id = insn->imm;
16815  	struct btf *btf;
16816  	s32 datasec_id;
16817  	u64 addr;
16818  	int i, btf_fd, err;
16819  
16820  	btf_fd = insn[1].imm;
16821  	if (btf_fd) {
16822  		btf = btf_get_by_fd(btf_fd);
16823  		if (IS_ERR(btf)) {
16824  			verbose(env, "invalid module BTF object FD specified.\n");
16825  			return -EINVAL;
16826  		}
16827  	} else {
16828  		if (!btf_vmlinux) {
16829  			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
16830  			return -EINVAL;
16831  		}
16832  		btf = btf_vmlinux;
16833  		btf_get(btf);
16834  	}
16835  
16836  	t = btf_type_by_id(btf, id);
16837  	if (!t) {
16838  		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
16839  		err = -ENOENT;
16840  		goto err_put;
16841  	}
16842  
16843  	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
16844  		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
16845  		err = -EINVAL;
16846  		goto err_put;
16847  	}
16848  
16849  	sym_name = btf_name_by_offset(btf, t->name_off);
16850  	addr = kallsyms_lookup_name(sym_name);
16851  	if (!addr) {
16852  		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
16853  			sym_name);
16854  		err = -ENOENT;
16855  		goto err_put;
16856  	}
16857  	insn[0].imm = (u32)addr;
16858  	insn[1].imm = addr >> 32;
16859  
16860  	if (btf_type_is_func(t)) {
16861  		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
16862  		aux->btf_var.mem_size = 0;
16863  		goto check_btf;
16864  	}
16865  
16866  	datasec_id = find_btf_percpu_datasec(btf);
16867  	if (datasec_id > 0) {
16868  		datasec = btf_type_by_id(btf, datasec_id);
16869  		for_each_vsi(i, datasec, vsi) {
16870  			if (vsi->type == id) {
16871  				percpu = true;
16872  				break;
16873  			}
16874  		}
16875  	}
16876  
16877  	type = t->type;
16878  	t = btf_type_skip_modifiers(btf, type, NULL);
16879  	if (percpu) {
16880  		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
16881  		aux->btf_var.btf = btf;
16882  		aux->btf_var.btf_id = type;
16883  	} else if (!btf_type_is_struct(t)) {
16884  		const struct btf_type *ret;
16885  		const char *tname;
16886  		u32 tsize;
16887  
16888  		/* resolve the type size of ksym. */
16889  		ret = btf_resolve_size(btf, t, &tsize);
16890  		if (IS_ERR(ret)) {
16891  			tname = btf_name_by_offset(btf, t->name_off);
16892  			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
16893  				tname, PTR_ERR(ret));
16894  			err = -EINVAL;
16895  			goto err_put;
16896  		}
16897  		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
16898  		aux->btf_var.mem_size = tsize;
16899  	} else {
16900  		aux->btf_var.reg_type = PTR_TO_BTF_ID;
16901  		aux->btf_var.btf = btf;
16902  		aux->btf_var.btf_id = type;
16903  	}
16904  check_btf:
16905  	/* check whether we recorded this BTF (and maybe module) already */
16906  	for (i = 0; i < env->used_btf_cnt; i++) {
16907  		if (env->used_btfs[i].btf == btf) {
16908  			btf_put(btf);
16909  			return 0;
16910  		}
16911  	}
16912  
16913  	if (env->used_btf_cnt >= MAX_USED_BTFS) {
16914  		err = -E2BIG;
16915  		goto err_put;
16916  	}
16917  
16918  	btf_mod = &env->used_btfs[env->used_btf_cnt];
16919  	btf_mod->btf = btf;
16920  	btf_mod->module = NULL;
16921  
16922  	/* if we reference variables from kernel module, bump its refcount */
16923  	if (btf_is_module(btf)) {
16924  		btf_mod->module = btf_try_get_module(btf);
16925  		if (!btf_mod->module) {
16926  			err = -ENXIO;
16927  			goto err_put;
16928  		}
16929  	}
16930  
16931  	env->used_btf_cnt++;
16932  
16933  	return 0;
16934  err_put:
16935  	btf_put(btf);
16936  	return err;
16937  }
16938  
is_tracing_prog_type(enum bpf_prog_type type)16939  static bool is_tracing_prog_type(enum bpf_prog_type type)
16940  {
16941  	switch (type) {
16942  	case BPF_PROG_TYPE_KPROBE:
16943  	case BPF_PROG_TYPE_TRACEPOINT:
16944  	case BPF_PROG_TYPE_PERF_EVENT:
16945  	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16946  	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
16947  		return true;
16948  	default:
16949  		return false;
16950  	}
16951  }
16952  
check_map_prog_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,struct bpf_prog * prog)16953  static int check_map_prog_compatibility(struct bpf_verifier_env *env,
16954  					struct bpf_map *map,
16955  					struct bpf_prog *prog)
16956  
16957  {
16958  	enum bpf_prog_type prog_type = resolve_prog_type(prog);
16959  
16960  	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
16961  	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
16962  		if (is_tracing_prog_type(prog_type)) {
16963  			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
16964  			return -EINVAL;
16965  		}
16966  	}
16967  
16968  	if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
16969  		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
16970  			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
16971  			return -EINVAL;
16972  		}
16973  
16974  		if (is_tracing_prog_type(prog_type)) {
16975  			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
16976  			return -EINVAL;
16977  		}
16978  	}
16979  
16980  	if (btf_record_has_field(map->record, BPF_TIMER)) {
16981  		if (is_tracing_prog_type(prog_type)) {
16982  			verbose(env, "tracing progs cannot use bpf_timer yet\n");
16983  			return -EINVAL;
16984  		}
16985  	}
16986  
16987  	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
16988  	    !bpf_offload_prog_map_match(prog, map)) {
16989  		verbose(env, "offload device mismatch between prog and map\n");
16990  		return -EINVAL;
16991  	}
16992  
16993  	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
16994  		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
16995  		return -EINVAL;
16996  	}
16997  
16998  	if (prog->aux->sleepable)
16999  		switch (map->map_type) {
17000  		case BPF_MAP_TYPE_HASH:
17001  		case BPF_MAP_TYPE_LRU_HASH:
17002  		case BPF_MAP_TYPE_ARRAY:
17003  		case BPF_MAP_TYPE_PERCPU_HASH:
17004  		case BPF_MAP_TYPE_PERCPU_ARRAY:
17005  		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
17006  		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
17007  		case BPF_MAP_TYPE_HASH_OF_MAPS:
17008  		case BPF_MAP_TYPE_RINGBUF:
17009  		case BPF_MAP_TYPE_USER_RINGBUF:
17010  		case BPF_MAP_TYPE_INODE_STORAGE:
17011  		case BPF_MAP_TYPE_SK_STORAGE:
17012  		case BPF_MAP_TYPE_TASK_STORAGE:
17013  		case BPF_MAP_TYPE_CGRP_STORAGE:
17014  			break;
17015  		default:
17016  			verbose(env,
17017  				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
17018  			return -EINVAL;
17019  		}
17020  
17021  	return 0;
17022  }
17023  
bpf_map_is_cgroup_storage(struct bpf_map * map)17024  static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
17025  {
17026  	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
17027  		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
17028  }
17029  
17030  /* find and rewrite pseudo imm in ld_imm64 instructions:
17031   *
17032   * 1. if it accesses map FD, replace it with actual map pointer.
17033   * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
17034   *
17035   * NOTE: btf_vmlinux is required for converting pseudo btf_id.
17036   */
resolve_pseudo_ldimm64(struct bpf_verifier_env * env)17037  static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
17038  {
17039  	struct bpf_insn *insn = env->prog->insnsi;
17040  	int insn_cnt = env->prog->len;
17041  	int i, j, err;
17042  
17043  	err = bpf_prog_calc_tag(env->prog);
17044  	if (err)
17045  		return err;
17046  
17047  	for (i = 0; i < insn_cnt; i++, insn++) {
17048  		if (BPF_CLASS(insn->code) == BPF_LDX &&
17049  		    ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
17050  		    insn->imm != 0)) {
17051  			verbose(env, "BPF_LDX uses reserved fields\n");
17052  			return -EINVAL;
17053  		}
17054  
17055  		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
17056  			struct bpf_insn_aux_data *aux;
17057  			struct bpf_map *map;
17058  			struct fd f;
17059  			u64 addr;
17060  			u32 fd;
17061  
17062  			if (i == insn_cnt - 1 || insn[1].code != 0 ||
17063  			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
17064  			    insn[1].off != 0) {
17065  				verbose(env, "invalid bpf_ld_imm64 insn\n");
17066  				return -EINVAL;
17067  			}
17068  
17069  			if (insn[0].src_reg == 0)
17070  				/* valid generic load 64-bit imm */
17071  				goto next_insn;
17072  
17073  			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
17074  				aux = &env->insn_aux_data[i];
17075  				err = check_pseudo_btf_id(env, insn, aux);
17076  				if (err)
17077  					return err;
17078  				goto next_insn;
17079  			}
17080  
17081  			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
17082  				aux = &env->insn_aux_data[i];
17083  				aux->ptr_type = PTR_TO_FUNC;
17084  				goto next_insn;
17085  			}
17086  
17087  			/* In final convert_pseudo_ld_imm64() step, this is
17088  			 * converted into regular 64-bit imm load insn.
17089  			 */
17090  			switch (insn[0].src_reg) {
17091  			case BPF_PSEUDO_MAP_VALUE:
17092  			case BPF_PSEUDO_MAP_IDX_VALUE:
17093  				break;
17094  			case BPF_PSEUDO_MAP_FD:
17095  			case BPF_PSEUDO_MAP_IDX:
17096  				if (insn[1].imm == 0)
17097  					break;
17098  				fallthrough;
17099  			default:
17100  				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
17101  				return -EINVAL;
17102  			}
17103  
17104  			switch (insn[0].src_reg) {
17105  			case BPF_PSEUDO_MAP_IDX_VALUE:
17106  			case BPF_PSEUDO_MAP_IDX:
17107  				if (bpfptr_is_null(env->fd_array)) {
17108  					verbose(env, "fd_idx without fd_array is invalid\n");
17109  					return -EPROTO;
17110  				}
17111  				if (copy_from_bpfptr_offset(&fd, env->fd_array,
17112  							    insn[0].imm * sizeof(fd),
17113  							    sizeof(fd)))
17114  					return -EFAULT;
17115  				break;
17116  			default:
17117  				fd = insn[0].imm;
17118  				break;
17119  			}
17120  
17121  			f = fdget(fd);
17122  			map = __bpf_map_get(f);
17123  			if (IS_ERR(map)) {
17124  				verbose(env, "fd %d is not pointing to valid bpf_map\n",
17125  					insn[0].imm);
17126  				return PTR_ERR(map);
17127  			}
17128  
17129  			err = check_map_prog_compatibility(env, map, env->prog);
17130  			if (err) {
17131  				fdput(f);
17132  				return err;
17133  			}
17134  
17135  			aux = &env->insn_aux_data[i];
17136  			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
17137  			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
17138  				addr = (unsigned long)map;
17139  			} else {
17140  				u32 off = insn[1].imm;
17141  
17142  				if (off >= BPF_MAX_VAR_OFF) {
17143  					verbose(env, "direct value offset of %u is not allowed\n", off);
17144  					fdput(f);
17145  					return -EINVAL;
17146  				}
17147  
17148  				if (!map->ops->map_direct_value_addr) {
17149  					verbose(env, "no direct value access support for this map type\n");
17150  					fdput(f);
17151  					return -EINVAL;
17152  				}
17153  
17154  				err = map->ops->map_direct_value_addr(map, &addr, off);
17155  				if (err) {
17156  					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
17157  						map->value_size, off);
17158  					fdput(f);
17159  					return err;
17160  				}
17161  
17162  				aux->map_off = off;
17163  				addr += off;
17164  			}
17165  
17166  			insn[0].imm = (u32)addr;
17167  			insn[1].imm = addr >> 32;
17168  
17169  			/* check whether we recorded this map already */
17170  			for (j = 0; j < env->used_map_cnt; j++) {
17171  				if (env->used_maps[j] == map) {
17172  					aux->map_index = j;
17173  					fdput(f);
17174  					goto next_insn;
17175  				}
17176  			}
17177  
17178  			if (env->used_map_cnt >= MAX_USED_MAPS) {
17179  				fdput(f);
17180  				return -E2BIG;
17181  			}
17182  
17183  			/* hold the map. If the program is rejected by verifier,
17184  			 * the map will be released by release_maps() or it
17185  			 * will be used by the valid program until it's unloaded
17186  			 * and all maps are released in free_used_maps()
17187  			 */
17188  			bpf_map_inc(map);
17189  
17190  			aux->map_index = env->used_map_cnt;
17191  			env->used_maps[env->used_map_cnt++] = map;
17192  
17193  			if (bpf_map_is_cgroup_storage(map) &&
17194  			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
17195  				verbose(env, "only one cgroup storage of each type is allowed\n");
17196  				fdput(f);
17197  				return -EBUSY;
17198  			}
17199  
17200  			fdput(f);
17201  next_insn:
17202  			insn++;
17203  			i++;
17204  			continue;
17205  		}
17206  
17207  		/* Basic sanity check before we invest more work here. */
17208  		if (!bpf_opcode_in_insntable(insn->code)) {
17209  			verbose(env, "unknown opcode %02x\n", insn->code);
17210  			return -EINVAL;
17211  		}
17212  	}
17213  
17214  	/* now all pseudo BPF_LD_IMM64 instructions load valid
17215  	 * 'struct bpf_map *' into a register instead of user map_fd.
17216  	 * These pointers will be used later by verifier to validate map access.
17217  	 */
17218  	return 0;
17219  }
17220  
17221  /* drop refcnt of maps used by the rejected program */
release_maps(struct bpf_verifier_env * env)17222  static void release_maps(struct bpf_verifier_env *env)
17223  {
17224  	__bpf_free_used_maps(env->prog->aux, env->used_maps,
17225  			     env->used_map_cnt);
17226  }
17227  
17228  /* drop refcnt of maps used by the rejected program */
release_btfs(struct bpf_verifier_env * env)17229  static void release_btfs(struct bpf_verifier_env *env)
17230  {
17231  	__bpf_free_used_btfs(env->prog->aux, env->used_btfs,
17232  			     env->used_btf_cnt);
17233  }
17234  
17235  /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
convert_pseudo_ld_imm64(struct bpf_verifier_env * env)17236  static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
17237  {
17238  	struct bpf_insn *insn = env->prog->insnsi;
17239  	int insn_cnt = env->prog->len;
17240  	int i;
17241  
17242  	for (i = 0; i < insn_cnt; i++, insn++) {
17243  		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
17244  			continue;
17245  		if (insn->src_reg == BPF_PSEUDO_FUNC)
17246  			continue;
17247  		insn->src_reg = 0;
17248  	}
17249  }
17250  
17251  /* single env->prog->insni[off] instruction was replaced with the range
17252   * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
17253   * [0, off) and [off, end) to new locations, so the patched range stays zero
17254   */
adjust_insn_aux_data(struct bpf_verifier_env * env,struct bpf_insn_aux_data * new_data,struct bpf_prog * new_prog,u32 off,u32 cnt)17255  static void adjust_insn_aux_data(struct bpf_verifier_env *env,
17256  				 struct bpf_insn_aux_data *new_data,
17257  				 struct bpf_prog *new_prog, u32 off, u32 cnt)
17258  {
17259  	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
17260  	struct bpf_insn *insn = new_prog->insnsi;
17261  	u32 old_seen = old_data[off].seen;
17262  	u32 prog_len;
17263  	int i;
17264  
17265  	/* aux info at OFF always needs adjustment, no matter fast path
17266  	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
17267  	 * original insn at old prog.
17268  	 */
17269  	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
17270  
17271  	if (cnt == 1)
17272  		return;
17273  	prog_len = new_prog->len;
17274  
17275  	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
17276  	memcpy(new_data + off + cnt - 1, old_data + off,
17277  	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
17278  	for (i = off; i < off + cnt - 1; i++) {
17279  		/* Expand insni[off]'s seen count to the patched range. */
17280  		new_data[i].seen = old_seen;
17281  		new_data[i].zext_dst = insn_has_def32(env, insn + i);
17282  	}
17283  	env->insn_aux_data = new_data;
17284  	vfree(old_data);
17285  }
17286  
adjust_subprog_starts(struct bpf_verifier_env * env,u32 off,u32 len)17287  static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
17288  {
17289  	int i;
17290  
17291  	if (len == 1)
17292  		return;
17293  	/* NOTE: fake 'exit' subprog should be updated as well. */
17294  	for (i = 0; i <= env->subprog_cnt; i++) {
17295  		if (env->subprog_info[i].start <= off)
17296  			continue;
17297  		env->subprog_info[i].start += len - 1;
17298  	}
17299  }
17300  
adjust_poke_descs(struct bpf_prog * prog,u32 off,u32 len)17301  static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
17302  {
17303  	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
17304  	int i, sz = prog->aux->size_poke_tab;
17305  	struct bpf_jit_poke_descriptor *desc;
17306  
17307  	for (i = 0; i < sz; i++) {
17308  		desc = &tab[i];
17309  		if (desc->insn_idx <= off)
17310  			continue;
17311  		desc->insn_idx += len - 1;
17312  	}
17313  }
17314  
bpf_patch_insn_data(struct bpf_verifier_env * env,u32 off,const struct bpf_insn * patch,u32 len)17315  static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
17316  					    const struct bpf_insn *patch, u32 len)
17317  {
17318  	struct bpf_prog *new_prog;
17319  	struct bpf_insn_aux_data *new_data = NULL;
17320  
17321  	if (len > 1) {
17322  		new_data = vzalloc(array_size(env->prog->len + len - 1,
17323  					      sizeof(struct bpf_insn_aux_data)));
17324  		if (!new_data)
17325  			return NULL;
17326  	}
17327  
17328  	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
17329  	if (IS_ERR(new_prog)) {
17330  		if (PTR_ERR(new_prog) == -ERANGE)
17331  			verbose(env,
17332  				"insn %d cannot be patched due to 16-bit range\n",
17333  				env->insn_aux_data[off].orig_idx);
17334  		vfree(new_data);
17335  		return NULL;
17336  	}
17337  	adjust_insn_aux_data(env, new_data, new_prog, off, len);
17338  	adjust_subprog_starts(env, off, len);
17339  	adjust_poke_descs(new_prog, off, len);
17340  	return new_prog;
17341  }
17342  
adjust_subprog_starts_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)17343  static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
17344  					      u32 off, u32 cnt)
17345  {
17346  	int i, j;
17347  
17348  	/* find first prog starting at or after off (first to remove) */
17349  	for (i = 0; i < env->subprog_cnt; i++)
17350  		if (env->subprog_info[i].start >= off)
17351  			break;
17352  	/* find first prog starting at or after off + cnt (first to stay) */
17353  	for (j = i; j < env->subprog_cnt; j++)
17354  		if (env->subprog_info[j].start >= off + cnt)
17355  			break;
17356  	/* if j doesn't start exactly at off + cnt, we are just removing
17357  	 * the front of previous prog
17358  	 */
17359  	if (env->subprog_info[j].start != off + cnt)
17360  		j--;
17361  
17362  	if (j > i) {
17363  		struct bpf_prog_aux *aux = env->prog->aux;
17364  		int move;
17365  
17366  		/* move fake 'exit' subprog as well */
17367  		move = env->subprog_cnt + 1 - j;
17368  
17369  		memmove(env->subprog_info + i,
17370  			env->subprog_info + j,
17371  			sizeof(*env->subprog_info) * move);
17372  		env->subprog_cnt -= j - i;
17373  
17374  		/* remove func_info */
17375  		if (aux->func_info) {
17376  			move = aux->func_info_cnt - j;
17377  
17378  			memmove(aux->func_info + i,
17379  				aux->func_info + j,
17380  				sizeof(*aux->func_info) * move);
17381  			aux->func_info_cnt -= j - i;
17382  			/* func_info->insn_off is set after all code rewrites,
17383  			 * in adjust_btf_func() - no need to adjust
17384  			 */
17385  		}
17386  	} else {
17387  		/* convert i from "first prog to remove" to "first to adjust" */
17388  		if (env->subprog_info[i].start == off)
17389  			i++;
17390  	}
17391  
17392  	/* update fake 'exit' subprog as well */
17393  	for (; i <= env->subprog_cnt; i++)
17394  		env->subprog_info[i].start -= cnt;
17395  
17396  	return 0;
17397  }
17398  
bpf_adj_linfo_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)17399  static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
17400  				      u32 cnt)
17401  {
17402  	struct bpf_prog *prog = env->prog;
17403  	u32 i, l_off, l_cnt, nr_linfo;
17404  	struct bpf_line_info *linfo;
17405  
17406  	nr_linfo = prog->aux->nr_linfo;
17407  	if (!nr_linfo)
17408  		return 0;
17409  
17410  	linfo = prog->aux->linfo;
17411  
17412  	/* find first line info to remove, count lines to be removed */
17413  	for (i = 0; i < nr_linfo; i++)
17414  		if (linfo[i].insn_off >= off)
17415  			break;
17416  
17417  	l_off = i;
17418  	l_cnt = 0;
17419  	for (; i < nr_linfo; i++)
17420  		if (linfo[i].insn_off < off + cnt)
17421  			l_cnt++;
17422  		else
17423  			break;
17424  
17425  	/* First live insn doesn't match first live linfo, it needs to "inherit"
17426  	 * last removed linfo.  prog is already modified, so prog->len == off
17427  	 * means no live instructions after (tail of the program was removed).
17428  	 */
17429  	if (prog->len != off && l_cnt &&
17430  	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
17431  		l_cnt--;
17432  		linfo[--i].insn_off = off + cnt;
17433  	}
17434  
17435  	/* remove the line info which refer to the removed instructions */
17436  	if (l_cnt) {
17437  		memmove(linfo + l_off, linfo + i,
17438  			sizeof(*linfo) * (nr_linfo - i));
17439  
17440  		prog->aux->nr_linfo -= l_cnt;
17441  		nr_linfo = prog->aux->nr_linfo;
17442  	}
17443  
17444  	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
17445  	for (i = l_off; i < nr_linfo; i++)
17446  		linfo[i].insn_off -= cnt;
17447  
17448  	/* fix up all subprogs (incl. 'exit') which start >= off */
17449  	for (i = 0; i <= env->subprog_cnt; i++)
17450  		if (env->subprog_info[i].linfo_idx > l_off) {
17451  			/* program may have started in the removed region but
17452  			 * may not be fully removed
17453  			 */
17454  			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
17455  				env->subprog_info[i].linfo_idx -= l_cnt;
17456  			else
17457  				env->subprog_info[i].linfo_idx = l_off;
17458  		}
17459  
17460  	return 0;
17461  }
17462  
verifier_remove_insns(struct bpf_verifier_env * env,u32 off,u32 cnt)17463  static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
17464  {
17465  	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
17466  	unsigned int orig_prog_len = env->prog->len;
17467  	int err;
17468  
17469  	if (bpf_prog_is_offloaded(env->prog->aux))
17470  		bpf_prog_offload_remove_insns(env, off, cnt);
17471  
17472  	err = bpf_remove_insns(env->prog, off, cnt);
17473  	if (err)
17474  		return err;
17475  
17476  	err = adjust_subprog_starts_after_remove(env, off, cnt);
17477  	if (err)
17478  		return err;
17479  
17480  	err = bpf_adj_linfo_after_remove(env, off, cnt);
17481  	if (err)
17482  		return err;
17483  
17484  	memmove(aux_data + off,	aux_data + off + cnt,
17485  		sizeof(*aux_data) * (orig_prog_len - off - cnt));
17486  
17487  	return 0;
17488  }
17489  
17490  /* The verifier does more data flow analysis than llvm and will not
17491   * explore branches that are dead at run time. Malicious programs can
17492   * have dead code too. Therefore replace all dead at-run-time code
17493   * with 'ja -1'.
17494   *
17495   * Just nops are not optimal, e.g. if they would sit at the end of the
17496   * program and through another bug we would manage to jump there, then
17497   * we'd execute beyond program memory otherwise. Returning exception
17498   * code also wouldn't work since we can have subprogs where the dead
17499   * code could be located.
17500   */
sanitize_dead_code(struct bpf_verifier_env * env)17501  static void sanitize_dead_code(struct bpf_verifier_env *env)
17502  {
17503  	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
17504  	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
17505  	struct bpf_insn *insn = env->prog->insnsi;
17506  	const int insn_cnt = env->prog->len;
17507  	int i;
17508  
17509  	for (i = 0; i < insn_cnt; i++) {
17510  		if (aux_data[i].seen)
17511  			continue;
17512  		memcpy(insn + i, &trap, sizeof(trap));
17513  		aux_data[i].zext_dst = false;
17514  	}
17515  }
17516  
insn_is_cond_jump(u8 code)17517  static bool insn_is_cond_jump(u8 code)
17518  {
17519  	u8 op;
17520  
17521  	op = BPF_OP(code);
17522  	if (BPF_CLASS(code) == BPF_JMP32)
17523  		return op != BPF_JA;
17524  
17525  	if (BPF_CLASS(code) != BPF_JMP)
17526  		return false;
17527  
17528  	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
17529  }
17530  
opt_hard_wire_dead_code_branches(struct bpf_verifier_env * env)17531  static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
17532  {
17533  	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
17534  	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
17535  	struct bpf_insn *insn = env->prog->insnsi;
17536  	const int insn_cnt = env->prog->len;
17537  	int i;
17538  
17539  	for (i = 0; i < insn_cnt; i++, insn++) {
17540  		if (!insn_is_cond_jump(insn->code))
17541  			continue;
17542  
17543  		if (!aux_data[i + 1].seen)
17544  			ja.off = insn->off;
17545  		else if (!aux_data[i + 1 + insn->off].seen)
17546  			ja.off = 0;
17547  		else
17548  			continue;
17549  
17550  		if (bpf_prog_is_offloaded(env->prog->aux))
17551  			bpf_prog_offload_replace_insn(env, i, &ja);
17552  
17553  		memcpy(insn, &ja, sizeof(ja));
17554  	}
17555  }
17556  
opt_remove_dead_code(struct bpf_verifier_env * env)17557  static int opt_remove_dead_code(struct bpf_verifier_env *env)
17558  {
17559  	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
17560  	int insn_cnt = env->prog->len;
17561  	int i, err;
17562  
17563  	for (i = 0; i < insn_cnt; i++) {
17564  		int j;
17565  
17566  		j = 0;
17567  		while (i + j < insn_cnt && !aux_data[i + j].seen)
17568  			j++;
17569  		if (!j)
17570  			continue;
17571  
17572  		err = verifier_remove_insns(env, i, j);
17573  		if (err)
17574  			return err;
17575  		insn_cnt = env->prog->len;
17576  	}
17577  
17578  	return 0;
17579  }
17580  
opt_remove_nops(struct bpf_verifier_env * env)17581  static int opt_remove_nops(struct bpf_verifier_env *env)
17582  {
17583  	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
17584  	struct bpf_insn *insn = env->prog->insnsi;
17585  	int insn_cnt = env->prog->len;
17586  	int i, err;
17587  
17588  	for (i = 0; i < insn_cnt; i++) {
17589  		if (memcmp(&insn[i], &ja, sizeof(ja)))
17590  			continue;
17591  
17592  		err = verifier_remove_insns(env, i, 1);
17593  		if (err)
17594  			return err;
17595  		insn_cnt--;
17596  		i--;
17597  	}
17598  
17599  	return 0;
17600  }
17601  
opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env * env,const union bpf_attr * attr)17602  static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
17603  					 const union bpf_attr *attr)
17604  {
17605  	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
17606  	struct bpf_insn_aux_data *aux = env->insn_aux_data;
17607  	int i, patch_len, delta = 0, len = env->prog->len;
17608  	struct bpf_insn *insns = env->prog->insnsi;
17609  	struct bpf_prog *new_prog;
17610  	bool rnd_hi32;
17611  
17612  	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
17613  	zext_patch[1] = BPF_ZEXT_REG(0);
17614  	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
17615  	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
17616  	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
17617  	for (i = 0; i < len; i++) {
17618  		int adj_idx = i + delta;
17619  		struct bpf_insn insn;
17620  		int load_reg;
17621  
17622  		insn = insns[adj_idx];
17623  		load_reg = insn_def_regno(&insn);
17624  		if (!aux[adj_idx].zext_dst) {
17625  			u8 code, class;
17626  			u32 imm_rnd;
17627  
17628  			if (!rnd_hi32)
17629  				continue;
17630  
17631  			code = insn.code;
17632  			class = BPF_CLASS(code);
17633  			if (load_reg == -1)
17634  				continue;
17635  
17636  			/* NOTE: arg "reg" (the fourth one) is only used for
17637  			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
17638  			 *       here.
17639  			 */
17640  			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
17641  				if (class == BPF_LD &&
17642  				    BPF_MODE(code) == BPF_IMM)
17643  					i++;
17644  				continue;
17645  			}
17646  
17647  			/* ctx load could be transformed into wider load. */
17648  			if (class == BPF_LDX &&
17649  			    aux[adj_idx].ptr_type == PTR_TO_CTX)
17650  				continue;
17651  
17652  			imm_rnd = get_random_u32();
17653  			rnd_hi32_patch[0] = insn;
17654  			rnd_hi32_patch[1].imm = imm_rnd;
17655  			rnd_hi32_patch[3].dst_reg = load_reg;
17656  			patch = rnd_hi32_patch;
17657  			patch_len = 4;
17658  			goto apply_patch_buffer;
17659  		}
17660  
17661  		/* Add in an zero-extend instruction if a) the JIT has requested
17662  		 * it or b) it's a CMPXCHG.
17663  		 *
17664  		 * The latter is because: BPF_CMPXCHG always loads a value into
17665  		 * R0, therefore always zero-extends. However some archs'
17666  		 * equivalent instruction only does this load when the
17667  		 * comparison is successful. This detail of CMPXCHG is
17668  		 * orthogonal to the general zero-extension behaviour of the
17669  		 * CPU, so it's treated independently of bpf_jit_needs_zext.
17670  		 */
17671  		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
17672  			continue;
17673  
17674  		/* Zero-extension is done by the caller. */
17675  		if (bpf_pseudo_kfunc_call(&insn))
17676  			continue;
17677  
17678  		if (WARN_ON(load_reg == -1)) {
17679  			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
17680  			return -EFAULT;
17681  		}
17682  
17683  		zext_patch[0] = insn;
17684  		zext_patch[1].dst_reg = load_reg;
17685  		zext_patch[1].src_reg = load_reg;
17686  		patch = zext_patch;
17687  		patch_len = 2;
17688  apply_patch_buffer:
17689  		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
17690  		if (!new_prog)
17691  			return -ENOMEM;
17692  		env->prog = new_prog;
17693  		insns = new_prog->insnsi;
17694  		aux = env->insn_aux_data;
17695  		delta += patch_len - 1;
17696  	}
17697  
17698  	return 0;
17699  }
17700  
17701  /* convert load instructions that access fields of a context type into a
17702   * sequence of instructions that access fields of the underlying structure:
17703   *     struct __sk_buff    -> struct sk_buff
17704   *     struct bpf_sock_ops -> struct sock
17705   */
convert_ctx_accesses(struct bpf_verifier_env * env)17706  static int convert_ctx_accesses(struct bpf_verifier_env *env)
17707  {
17708  	const struct bpf_verifier_ops *ops = env->ops;
17709  	int i, cnt, size, ctx_field_size, delta = 0;
17710  	const int insn_cnt = env->prog->len;
17711  	struct bpf_insn insn_buf[16], *insn;
17712  	u32 target_size, size_default, off;
17713  	struct bpf_prog *new_prog;
17714  	enum bpf_access_type type;
17715  	bool is_narrower_load;
17716  
17717  	if (ops->gen_prologue || env->seen_direct_write) {
17718  		if (!ops->gen_prologue) {
17719  			verbose(env, "bpf verifier is misconfigured\n");
17720  			return -EINVAL;
17721  		}
17722  		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
17723  					env->prog);
17724  		if (cnt >= ARRAY_SIZE(insn_buf)) {
17725  			verbose(env, "bpf verifier is misconfigured\n");
17726  			return -EINVAL;
17727  		} else if (cnt) {
17728  			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
17729  			if (!new_prog)
17730  				return -ENOMEM;
17731  
17732  			env->prog = new_prog;
17733  			delta += cnt - 1;
17734  		}
17735  	}
17736  
17737  	if (bpf_prog_is_offloaded(env->prog->aux))
17738  		return 0;
17739  
17740  	insn = env->prog->insnsi + delta;
17741  
17742  	for (i = 0; i < insn_cnt; i++, insn++) {
17743  		bpf_convert_ctx_access_t convert_ctx_access;
17744  		u8 mode;
17745  
17746  		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
17747  		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
17748  		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
17749  		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
17750  		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
17751  		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
17752  		    insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
17753  			type = BPF_READ;
17754  		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
17755  			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
17756  			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
17757  			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
17758  			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
17759  			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
17760  			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
17761  			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
17762  			type = BPF_WRITE;
17763  		} else {
17764  			continue;
17765  		}
17766  
17767  		if (type == BPF_WRITE &&
17768  		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
17769  			struct bpf_insn patch[] = {
17770  				*insn,
17771  				BPF_ST_NOSPEC(),
17772  			};
17773  
17774  			cnt = ARRAY_SIZE(patch);
17775  			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
17776  			if (!new_prog)
17777  				return -ENOMEM;
17778  
17779  			delta    += cnt - 1;
17780  			env->prog = new_prog;
17781  			insn      = new_prog->insnsi + i + delta;
17782  			continue;
17783  		}
17784  
17785  		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
17786  		case PTR_TO_CTX:
17787  			if (!ops->convert_ctx_access)
17788  				continue;
17789  			convert_ctx_access = ops->convert_ctx_access;
17790  			break;
17791  		case PTR_TO_SOCKET:
17792  		case PTR_TO_SOCK_COMMON:
17793  			convert_ctx_access = bpf_sock_convert_ctx_access;
17794  			break;
17795  		case PTR_TO_TCP_SOCK:
17796  			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
17797  			break;
17798  		case PTR_TO_XDP_SOCK:
17799  			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
17800  			break;
17801  		case PTR_TO_BTF_ID:
17802  		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
17803  		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
17804  		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
17805  		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
17806  		 * any faults for loads into such types. BPF_WRITE is disallowed
17807  		 * for this case.
17808  		 */
17809  		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
17810  			if (type == BPF_READ) {
17811  				if (BPF_MODE(insn->code) == BPF_MEM)
17812  					insn->code = BPF_LDX | BPF_PROBE_MEM |
17813  						     BPF_SIZE((insn)->code);
17814  				else
17815  					insn->code = BPF_LDX | BPF_PROBE_MEMSX |
17816  						     BPF_SIZE((insn)->code);
17817  				env->prog->aux->num_exentries++;
17818  			}
17819  			continue;
17820  		default:
17821  			continue;
17822  		}
17823  
17824  		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
17825  		size = BPF_LDST_BYTES(insn);
17826  		mode = BPF_MODE(insn->code);
17827  
17828  		/* If the read access is a narrower load of the field,
17829  		 * convert to a 4/8-byte load, to minimum program type specific
17830  		 * convert_ctx_access changes. If conversion is successful,
17831  		 * we will apply proper mask to the result.
17832  		 */
17833  		is_narrower_load = size < ctx_field_size;
17834  		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
17835  		off = insn->off;
17836  		if (is_narrower_load) {
17837  			u8 size_code;
17838  
17839  			if (type == BPF_WRITE) {
17840  				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
17841  				return -EINVAL;
17842  			}
17843  
17844  			size_code = BPF_H;
17845  			if (ctx_field_size == 4)
17846  				size_code = BPF_W;
17847  			else if (ctx_field_size == 8)
17848  				size_code = BPF_DW;
17849  
17850  			insn->off = off & ~(size_default - 1);
17851  			insn->code = BPF_LDX | BPF_MEM | size_code;
17852  		}
17853  
17854  		target_size = 0;
17855  		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
17856  					 &target_size);
17857  		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
17858  		    (ctx_field_size && !target_size)) {
17859  			verbose(env, "bpf verifier is misconfigured\n");
17860  			return -EINVAL;
17861  		}
17862  
17863  		if (is_narrower_load && size < target_size) {
17864  			u8 shift = bpf_ctx_narrow_access_offset(
17865  				off, size, size_default) * 8;
17866  			if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
17867  				verbose(env, "bpf verifier narrow ctx load misconfigured\n");
17868  				return -EINVAL;
17869  			}
17870  			if (ctx_field_size <= 4) {
17871  				if (shift)
17872  					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
17873  									insn->dst_reg,
17874  									shift);
17875  				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
17876  								(1 << size * 8) - 1);
17877  			} else {
17878  				if (shift)
17879  					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
17880  									insn->dst_reg,
17881  									shift);
17882  				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
17883  								(1ULL << size * 8) - 1);
17884  			}
17885  		}
17886  		if (mode == BPF_MEMSX)
17887  			insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
17888  						       insn->dst_reg, insn->dst_reg,
17889  						       size * 8, 0);
17890  
17891  		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17892  		if (!new_prog)
17893  			return -ENOMEM;
17894  
17895  		delta += cnt - 1;
17896  
17897  		/* keep walking new program and skip insns we just inserted */
17898  		env->prog = new_prog;
17899  		insn      = new_prog->insnsi + i + delta;
17900  	}
17901  
17902  	return 0;
17903  }
17904  
jit_subprogs(struct bpf_verifier_env * env)17905  static int jit_subprogs(struct bpf_verifier_env *env)
17906  {
17907  	struct bpf_prog *prog = env->prog, **func, *tmp;
17908  	int i, j, subprog_start, subprog_end = 0, len, subprog;
17909  	struct bpf_map *map_ptr;
17910  	struct bpf_insn *insn;
17911  	void *old_bpf_func;
17912  	int err, num_exentries;
17913  
17914  	if (env->subprog_cnt <= 1)
17915  		return 0;
17916  
17917  	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
17918  		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
17919  			continue;
17920  
17921  		/* Upon error here we cannot fall back to interpreter but
17922  		 * need a hard reject of the program. Thus -EFAULT is
17923  		 * propagated in any case.
17924  		 */
17925  		subprog = find_subprog(env, i + insn->imm + 1);
17926  		if (subprog < 0) {
17927  			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
17928  				  i + insn->imm + 1);
17929  			return -EFAULT;
17930  		}
17931  		/* temporarily remember subprog id inside insn instead of
17932  		 * aux_data, since next loop will split up all insns into funcs
17933  		 */
17934  		insn->off = subprog;
17935  		/* remember original imm in case JIT fails and fallback
17936  		 * to interpreter will be needed
17937  		 */
17938  		env->insn_aux_data[i].call_imm = insn->imm;
17939  		/* point imm to __bpf_call_base+1 from JITs point of view */
17940  		insn->imm = 1;
17941  		if (bpf_pseudo_func(insn))
17942  			/* jit (e.g. x86_64) may emit fewer instructions
17943  			 * if it learns a u32 imm is the same as a u64 imm.
17944  			 * Force a non zero here.
17945  			 */
17946  			insn[1].imm = 1;
17947  	}
17948  
17949  	err = bpf_prog_alloc_jited_linfo(prog);
17950  	if (err)
17951  		goto out_undo_insn;
17952  
17953  	err = -ENOMEM;
17954  	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
17955  	if (!func)
17956  		goto out_undo_insn;
17957  
17958  	for (i = 0; i < env->subprog_cnt; i++) {
17959  		subprog_start = subprog_end;
17960  		subprog_end = env->subprog_info[i + 1].start;
17961  
17962  		len = subprog_end - subprog_start;
17963  		/* bpf_prog_run() doesn't call subprogs directly,
17964  		 * hence main prog stats include the runtime of subprogs.
17965  		 * subprogs don't have IDs and not reachable via prog_get_next_id
17966  		 * func[i]->stats will never be accessed and stays NULL
17967  		 */
17968  		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
17969  		if (!func[i])
17970  			goto out_free;
17971  		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
17972  		       len * sizeof(struct bpf_insn));
17973  		func[i]->type = prog->type;
17974  		func[i]->len = len;
17975  		if (bpf_prog_calc_tag(func[i]))
17976  			goto out_free;
17977  		func[i]->is_func = 1;
17978  		func[i]->aux->func_idx = i;
17979  		/* Below members will be freed only at prog->aux */
17980  		func[i]->aux->btf = prog->aux->btf;
17981  		func[i]->aux->func_info = prog->aux->func_info;
17982  		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
17983  		func[i]->aux->poke_tab = prog->aux->poke_tab;
17984  		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
17985  
17986  		for (j = 0; j < prog->aux->size_poke_tab; j++) {
17987  			struct bpf_jit_poke_descriptor *poke;
17988  
17989  			poke = &prog->aux->poke_tab[j];
17990  			if (poke->insn_idx < subprog_end &&
17991  			    poke->insn_idx >= subprog_start)
17992  				poke->aux = func[i]->aux;
17993  		}
17994  
17995  		func[i]->aux->name[0] = 'F';
17996  		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
17997  		func[i]->jit_requested = 1;
17998  		func[i]->blinding_requested = prog->blinding_requested;
17999  		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
18000  		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
18001  		func[i]->aux->linfo = prog->aux->linfo;
18002  		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
18003  		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
18004  		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
18005  		num_exentries = 0;
18006  		insn = func[i]->insnsi;
18007  		for (j = 0; j < func[i]->len; j++, insn++) {
18008  			if (BPF_CLASS(insn->code) == BPF_LDX &&
18009  			    (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
18010  			     BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
18011  				num_exentries++;
18012  		}
18013  		func[i]->aux->num_exentries = num_exentries;
18014  		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
18015  		func[i] = bpf_int_jit_compile(func[i]);
18016  		if (!func[i]->jited) {
18017  			err = -ENOTSUPP;
18018  			goto out_free;
18019  		}
18020  		cond_resched();
18021  	}
18022  
18023  	/* at this point all bpf functions were successfully JITed
18024  	 * now populate all bpf_calls with correct addresses and
18025  	 * run last pass of JIT
18026  	 */
18027  	for (i = 0; i < env->subprog_cnt; i++) {
18028  		insn = func[i]->insnsi;
18029  		for (j = 0; j < func[i]->len; j++, insn++) {
18030  			if (bpf_pseudo_func(insn)) {
18031  				subprog = insn->off;
18032  				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
18033  				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
18034  				continue;
18035  			}
18036  			if (!bpf_pseudo_call(insn))
18037  				continue;
18038  			subprog = insn->off;
18039  			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
18040  		}
18041  
18042  		/* we use the aux data to keep a list of the start addresses
18043  		 * of the JITed images for each function in the program
18044  		 *
18045  		 * for some architectures, such as powerpc64, the imm field
18046  		 * might not be large enough to hold the offset of the start
18047  		 * address of the callee's JITed image from __bpf_call_base
18048  		 *
18049  		 * in such cases, we can lookup the start address of a callee
18050  		 * by using its subprog id, available from the off field of
18051  		 * the call instruction, as an index for this list
18052  		 */
18053  		func[i]->aux->func = func;
18054  		func[i]->aux->func_cnt = env->subprog_cnt;
18055  	}
18056  	for (i = 0; i < env->subprog_cnt; i++) {
18057  		old_bpf_func = func[i]->bpf_func;
18058  		tmp = bpf_int_jit_compile(func[i]);
18059  		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
18060  			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
18061  			err = -ENOTSUPP;
18062  			goto out_free;
18063  		}
18064  		cond_resched();
18065  	}
18066  
18067  	/* finally lock prog and jit images for all functions and
18068  	 * populate kallsysm. Begin at the first subprogram, since
18069  	 * bpf_prog_load will add the kallsyms for the main program.
18070  	 */
18071  	for (i = 1; i < env->subprog_cnt; i++) {
18072  		bpf_prog_lock_ro(func[i]);
18073  		bpf_prog_kallsyms_add(func[i]);
18074  	}
18075  
18076  	/* Last step: make now unused interpreter insns from main
18077  	 * prog consistent for later dump requests, so they can
18078  	 * later look the same as if they were interpreted only.
18079  	 */
18080  	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
18081  		if (bpf_pseudo_func(insn)) {
18082  			insn[0].imm = env->insn_aux_data[i].call_imm;
18083  			insn[1].imm = insn->off;
18084  			insn->off = 0;
18085  			continue;
18086  		}
18087  		if (!bpf_pseudo_call(insn))
18088  			continue;
18089  		insn->off = env->insn_aux_data[i].call_imm;
18090  		subprog = find_subprog(env, i + insn->off + 1);
18091  		insn->imm = subprog;
18092  	}
18093  
18094  	prog->jited = 1;
18095  	prog->bpf_func = func[0]->bpf_func;
18096  	prog->jited_len = func[0]->jited_len;
18097  	prog->aux->extable = func[0]->aux->extable;
18098  	prog->aux->num_exentries = func[0]->aux->num_exentries;
18099  	prog->aux->func = func;
18100  	prog->aux->func_cnt = env->subprog_cnt;
18101  	bpf_prog_jit_attempt_done(prog);
18102  	return 0;
18103  out_free:
18104  	/* We failed JIT'ing, so at this point we need to unregister poke
18105  	 * descriptors from subprogs, so that kernel is not attempting to
18106  	 * patch it anymore as we're freeing the subprog JIT memory.
18107  	 */
18108  	for (i = 0; i < prog->aux->size_poke_tab; i++) {
18109  		map_ptr = prog->aux->poke_tab[i].tail_call.map;
18110  		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
18111  	}
18112  	/* At this point we're guaranteed that poke descriptors are not
18113  	 * live anymore. We can just unlink its descriptor table as it's
18114  	 * released with the main prog.
18115  	 */
18116  	for (i = 0; i < env->subprog_cnt; i++) {
18117  		if (!func[i])
18118  			continue;
18119  		func[i]->aux->poke_tab = NULL;
18120  		bpf_jit_free(func[i]);
18121  	}
18122  	kfree(func);
18123  out_undo_insn:
18124  	/* cleanup main prog to be interpreted */
18125  	prog->jit_requested = 0;
18126  	prog->blinding_requested = 0;
18127  	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
18128  		if (!bpf_pseudo_call(insn))
18129  			continue;
18130  		insn->off = 0;
18131  		insn->imm = env->insn_aux_data[i].call_imm;
18132  	}
18133  	bpf_prog_jit_attempt_done(prog);
18134  	return err;
18135  }
18136  
fixup_call_args(struct bpf_verifier_env * env)18137  static int fixup_call_args(struct bpf_verifier_env *env)
18138  {
18139  #ifndef CONFIG_BPF_JIT_ALWAYS_ON
18140  	struct bpf_prog *prog = env->prog;
18141  	struct bpf_insn *insn = prog->insnsi;
18142  	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
18143  	int i, depth;
18144  #endif
18145  	int err = 0;
18146  
18147  	if (env->prog->jit_requested &&
18148  	    !bpf_prog_is_offloaded(env->prog->aux)) {
18149  		err = jit_subprogs(env);
18150  		if (err == 0)
18151  			return 0;
18152  		if (err == -EFAULT)
18153  			return err;
18154  	}
18155  #ifndef CONFIG_BPF_JIT_ALWAYS_ON
18156  	if (has_kfunc_call) {
18157  		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
18158  		return -EINVAL;
18159  	}
18160  	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
18161  		/* When JIT fails the progs with bpf2bpf calls and tail_calls
18162  		 * have to be rejected, since interpreter doesn't support them yet.
18163  		 */
18164  		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
18165  		return -EINVAL;
18166  	}
18167  	for (i = 0; i < prog->len; i++, insn++) {
18168  		if (bpf_pseudo_func(insn)) {
18169  			/* When JIT fails the progs with callback calls
18170  			 * have to be rejected, since interpreter doesn't support them yet.
18171  			 */
18172  			verbose(env, "callbacks are not allowed in non-JITed programs\n");
18173  			return -EINVAL;
18174  		}
18175  
18176  		if (!bpf_pseudo_call(insn))
18177  			continue;
18178  		depth = get_callee_stack_depth(env, insn, i);
18179  		if (depth < 0)
18180  			return depth;
18181  		bpf_patch_call_args(insn, depth);
18182  	}
18183  	err = 0;
18184  #endif
18185  	return err;
18186  }
18187  
18188  /* replace a generic kfunc with a specialized version if necessary */
specialize_kfunc(struct bpf_verifier_env * env,u32 func_id,u16 offset,unsigned long * addr)18189  static void specialize_kfunc(struct bpf_verifier_env *env,
18190  			     u32 func_id, u16 offset, unsigned long *addr)
18191  {
18192  	struct bpf_prog *prog = env->prog;
18193  	bool seen_direct_write;
18194  	void *xdp_kfunc;
18195  	bool is_rdonly;
18196  
18197  	if (bpf_dev_bound_kfunc_id(func_id)) {
18198  		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
18199  		if (xdp_kfunc) {
18200  			*addr = (unsigned long)xdp_kfunc;
18201  			return;
18202  		}
18203  		/* fallback to default kfunc when not supported by netdev */
18204  	}
18205  
18206  	if (offset)
18207  		return;
18208  
18209  	if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
18210  		seen_direct_write = env->seen_direct_write;
18211  		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
18212  
18213  		if (is_rdonly)
18214  			*addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
18215  
18216  		/* restore env->seen_direct_write to its original value, since
18217  		 * may_access_direct_pkt_data mutates it
18218  		 */
18219  		env->seen_direct_write = seen_direct_write;
18220  	}
18221  }
18222  
__fixup_collection_insert_kfunc(struct bpf_insn_aux_data * insn_aux,u16 struct_meta_reg,u16 node_offset_reg,struct bpf_insn * insn,struct bpf_insn * insn_buf,int * cnt)18223  static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
18224  					    u16 struct_meta_reg,
18225  					    u16 node_offset_reg,
18226  					    struct bpf_insn *insn,
18227  					    struct bpf_insn *insn_buf,
18228  					    int *cnt)
18229  {
18230  	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
18231  	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
18232  
18233  	insn_buf[0] = addr[0];
18234  	insn_buf[1] = addr[1];
18235  	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
18236  	insn_buf[3] = *insn;
18237  	*cnt = 4;
18238  }
18239  
fixup_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn * insn_buf,int insn_idx,int * cnt)18240  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
18241  			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
18242  {
18243  	const struct bpf_kfunc_desc *desc;
18244  
18245  	if (!insn->imm) {
18246  		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
18247  		return -EINVAL;
18248  	}
18249  
18250  	*cnt = 0;
18251  
18252  	/* insn->imm has the btf func_id. Replace it with an offset relative to
18253  	 * __bpf_call_base, unless the JIT needs to call functions that are
18254  	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
18255  	 */
18256  	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
18257  	if (!desc) {
18258  		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
18259  			insn->imm);
18260  		return -EFAULT;
18261  	}
18262  
18263  	if (!bpf_jit_supports_far_kfunc_call())
18264  		insn->imm = BPF_CALL_IMM(desc->addr);
18265  	if (insn->off)
18266  		return 0;
18267  	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
18268  		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
18269  		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
18270  		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
18271  
18272  		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
18273  		insn_buf[1] = addr[0];
18274  		insn_buf[2] = addr[1];
18275  		insn_buf[3] = *insn;
18276  		*cnt = 4;
18277  	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
18278  		   desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
18279  		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
18280  		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
18281  
18282  		if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
18283  		    !kptr_struct_meta) {
18284  			verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
18285  				insn_idx);
18286  			return -EFAULT;
18287  		}
18288  
18289  		insn_buf[0] = addr[0];
18290  		insn_buf[1] = addr[1];
18291  		insn_buf[2] = *insn;
18292  		*cnt = 3;
18293  	} else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
18294  		   desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
18295  		   desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
18296  		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
18297  		int struct_meta_reg = BPF_REG_3;
18298  		int node_offset_reg = BPF_REG_4;
18299  
18300  		/* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
18301  		if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
18302  			struct_meta_reg = BPF_REG_4;
18303  			node_offset_reg = BPF_REG_5;
18304  		}
18305  
18306  		if (!kptr_struct_meta) {
18307  			verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
18308  				insn_idx);
18309  			return -EFAULT;
18310  		}
18311  
18312  		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
18313  						node_offset_reg, insn, insn_buf, cnt);
18314  	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
18315  		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
18316  		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
18317  		*cnt = 1;
18318  	}
18319  	return 0;
18320  }
18321  
18322  /* Do various post-verification rewrites in a single program pass.
18323   * These rewrites simplify JIT and interpreter implementations.
18324   */
do_misc_fixups(struct bpf_verifier_env * env)18325  static int do_misc_fixups(struct bpf_verifier_env *env)
18326  {
18327  	struct bpf_prog *prog = env->prog;
18328  	enum bpf_attach_type eatype = prog->expected_attach_type;
18329  	enum bpf_prog_type prog_type = resolve_prog_type(prog);
18330  	struct bpf_insn *insn = prog->insnsi;
18331  	const struct bpf_func_proto *fn;
18332  	const int insn_cnt = prog->len;
18333  	const struct bpf_map_ops *ops;
18334  	struct bpf_insn_aux_data *aux;
18335  	struct bpf_insn insn_buf[16];
18336  	struct bpf_prog *new_prog;
18337  	struct bpf_map *map_ptr;
18338  	int i, ret, cnt, delta = 0;
18339  
18340  	for (i = 0; i < insn_cnt; i++, insn++) {
18341  		/* Make divide-by-zero exceptions impossible. */
18342  		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
18343  		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
18344  		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
18345  		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
18346  			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
18347  			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
18348  			struct bpf_insn *patchlet;
18349  			struct bpf_insn chk_and_div[] = {
18350  				/* [R,W]x div 0 -> 0 */
18351  				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
18352  					     BPF_JNE | BPF_K, insn->src_reg,
18353  					     0, 2, 0),
18354  				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
18355  				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
18356  				*insn,
18357  			};
18358  			struct bpf_insn chk_and_mod[] = {
18359  				/* [R,W]x mod 0 -> [R,W]x */
18360  				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
18361  					     BPF_JEQ | BPF_K, insn->src_reg,
18362  					     0, 1 + (is64 ? 0 : 1), 0),
18363  				*insn,
18364  				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
18365  				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
18366  			};
18367  
18368  			patchlet = isdiv ? chk_and_div : chk_and_mod;
18369  			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
18370  				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
18371  
18372  			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
18373  			if (!new_prog)
18374  				return -ENOMEM;
18375  
18376  			delta    += cnt - 1;
18377  			env->prog = prog = new_prog;
18378  			insn      = new_prog->insnsi + i + delta;
18379  			continue;
18380  		}
18381  
18382  		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
18383  		if (BPF_CLASS(insn->code) == BPF_LD &&
18384  		    (BPF_MODE(insn->code) == BPF_ABS ||
18385  		     BPF_MODE(insn->code) == BPF_IND)) {
18386  			cnt = env->ops->gen_ld_abs(insn, insn_buf);
18387  			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
18388  				verbose(env, "bpf verifier is misconfigured\n");
18389  				return -EINVAL;
18390  			}
18391  
18392  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18393  			if (!new_prog)
18394  				return -ENOMEM;
18395  
18396  			delta    += cnt - 1;
18397  			env->prog = prog = new_prog;
18398  			insn      = new_prog->insnsi + i + delta;
18399  			continue;
18400  		}
18401  
18402  		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
18403  		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
18404  		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
18405  			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
18406  			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
18407  			struct bpf_insn *patch = &insn_buf[0];
18408  			bool issrc, isneg, isimm;
18409  			u32 off_reg;
18410  
18411  			aux = &env->insn_aux_data[i + delta];
18412  			if (!aux->alu_state ||
18413  			    aux->alu_state == BPF_ALU_NON_POINTER)
18414  				continue;
18415  
18416  			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
18417  			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
18418  				BPF_ALU_SANITIZE_SRC;
18419  			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
18420  
18421  			off_reg = issrc ? insn->src_reg : insn->dst_reg;
18422  			if (isimm) {
18423  				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
18424  			} else {
18425  				if (isneg)
18426  					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
18427  				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
18428  				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
18429  				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
18430  				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
18431  				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
18432  				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
18433  			}
18434  			if (!issrc)
18435  				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
18436  			insn->src_reg = BPF_REG_AX;
18437  			if (isneg)
18438  				insn->code = insn->code == code_add ?
18439  					     code_sub : code_add;
18440  			*patch++ = *insn;
18441  			if (issrc && isneg && !isimm)
18442  				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
18443  			cnt = patch - insn_buf;
18444  
18445  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18446  			if (!new_prog)
18447  				return -ENOMEM;
18448  
18449  			delta    += cnt - 1;
18450  			env->prog = prog = new_prog;
18451  			insn      = new_prog->insnsi + i + delta;
18452  			continue;
18453  		}
18454  
18455  		if (insn->code != (BPF_JMP | BPF_CALL))
18456  			continue;
18457  		if (insn->src_reg == BPF_PSEUDO_CALL)
18458  			continue;
18459  		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
18460  			ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
18461  			if (ret)
18462  				return ret;
18463  			if (cnt == 0)
18464  				continue;
18465  
18466  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18467  			if (!new_prog)
18468  				return -ENOMEM;
18469  
18470  			delta	 += cnt - 1;
18471  			env->prog = prog = new_prog;
18472  			insn	  = new_prog->insnsi + i + delta;
18473  			continue;
18474  		}
18475  
18476  		if (insn->imm == BPF_FUNC_get_route_realm)
18477  			prog->dst_needed = 1;
18478  		if (insn->imm == BPF_FUNC_get_prandom_u32)
18479  			bpf_user_rnd_init_once();
18480  		if (insn->imm == BPF_FUNC_override_return)
18481  			prog->kprobe_override = 1;
18482  		if (insn->imm == BPF_FUNC_tail_call) {
18483  			/* If we tail call into other programs, we
18484  			 * cannot make any assumptions since they can
18485  			 * be replaced dynamically during runtime in
18486  			 * the program array.
18487  			 */
18488  			prog->cb_access = 1;
18489  			if (!allow_tail_call_in_subprogs(env))
18490  				prog->aux->stack_depth = MAX_BPF_STACK;
18491  			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
18492  
18493  			/* mark bpf_tail_call as different opcode to avoid
18494  			 * conditional branch in the interpreter for every normal
18495  			 * call and to prevent accidental JITing by JIT compiler
18496  			 * that doesn't support bpf_tail_call yet
18497  			 */
18498  			insn->imm = 0;
18499  			insn->code = BPF_JMP | BPF_TAIL_CALL;
18500  
18501  			aux = &env->insn_aux_data[i + delta];
18502  			if (env->bpf_capable && !prog->blinding_requested &&
18503  			    prog->jit_requested &&
18504  			    !bpf_map_key_poisoned(aux) &&
18505  			    !bpf_map_ptr_poisoned(aux) &&
18506  			    !bpf_map_ptr_unpriv(aux)) {
18507  				struct bpf_jit_poke_descriptor desc = {
18508  					.reason = BPF_POKE_REASON_TAIL_CALL,
18509  					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
18510  					.tail_call.key = bpf_map_key_immediate(aux),
18511  					.insn_idx = i + delta,
18512  				};
18513  
18514  				ret = bpf_jit_add_poke_descriptor(prog, &desc);
18515  				if (ret < 0) {
18516  					verbose(env, "adding tail call poke descriptor failed\n");
18517  					return ret;
18518  				}
18519  
18520  				insn->imm = ret + 1;
18521  				continue;
18522  			}
18523  
18524  			if (!bpf_map_ptr_unpriv(aux))
18525  				continue;
18526  
18527  			/* instead of changing every JIT dealing with tail_call
18528  			 * emit two extra insns:
18529  			 * if (index >= max_entries) goto out;
18530  			 * index &= array->index_mask;
18531  			 * to avoid out-of-bounds cpu speculation
18532  			 */
18533  			if (bpf_map_ptr_poisoned(aux)) {
18534  				verbose(env, "tail_call abusing map_ptr\n");
18535  				return -EINVAL;
18536  			}
18537  
18538  			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
18539  			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
18540  						  map_ptr->max_entries, 2);
18541  			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
18542  						    container_of(map_ptr,
18543  								 struct bpf_array,
18544  								 map)->index_mask);
18545  			insn_buf[2] = *insn;
18546  			cnt = 3;
18547  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18548  			if (!new_prog)
18549  				return -ENOMEM;
18550  
18551  			delta    += cnt - 1;
18552  			env->prog = prog = new_prog;
18553  			insn      = new_prog->insnsi + i + delta;
18554  			continue;
18555  		}
18556  
18557  		if (insn->imm == BPF_FUNC_timer_set_callback) {
18558  			/* The verifier will process callback_fn as many times as necessary
18559  			 * with different maps and the register states prepared by
18560  			 * set_timer_callback_state will be accurate.
18561  			 *
18562  			 * The following use case is valid:
18563  			 *   map1 is shared by prog1, prog2, prog3.
18564  			 *   prog1 calls bpf_timer_init for some map1 elements
18565  			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
18566  			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
18567  			 *   prog3 calls bpf_timer_start for some map1 elements.
18568  			 *     Those that were not both bpf_timer_init-ed and
18569  			 *     bpf_timer_set_callback-ed will return -EINVAL.
18570  			 */
18571  			struct bpf_insn ld_addrs[2] = {
18572  				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
18573  			};
18574  
18575  			insn_buf[0] = ld_addrs[0];
18576  			insn_buf[1] = ld_addrs[1];
18577  			insn_buf[2] = *insn;
18578  			cnt = 3;
18579  
18580  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18581  			if (!new_prog)
18582  				return -ENOMEM;
18583  
18584  			delta    += cnt - 1;
18585  			env->prog = prog = new_prog;
18586  			insn      = new_prog->insnsi + i + delta;
18587  			goto patch_call_imm;
18588  		}
18589  
18590  		if (is_storage_get_function(insn->imm)) {
18591  			if (!env->prog->aux->sleepable ||
18592  			    env->insn_aux_data[i + delta].storage_get_func_atomic)
18593  				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
18594  			else
18595  				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
18596  			insn_buf[1] = *insn;
18597  			cnt = 2;
18598  
18599  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18600  			if (!new_prog)
18601  				return -ENOMEM;
18602  
18603  			delta += cnt - 1;
18604  			env->prog = prog = new_prog;
18605  			insn = new_prog->insnsi + i + delta;
18606  			goto patch_call_imm;
18607  		}
18608  
18609  		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
18610  		 * and other inlining handlers are currently limited to 64 bit
18611  		 * only.
18612  		 */
18613  		if (prog->jit_requested && BITS_PER_LONG == 64 &&
18614  		    (insn->imm == BPF_FUNC_map_lookup_elem ||
18615  		     insn->imm == BPF_FUNC_map_update_elem ||
18616  		     insn->imm == BPF_FUNC_map_delete_elem ||
18617  		     insn->imm == BPF_FUNC_map_push_elem   ||
18618  		     insn->imm == BPF_FUNC_map_pop_elem    ||
18619  		     insn->imm == BPF_FUNC_map_peek_elem   ||
18620  		     insn->imm == BPF_FUNC_redirect_map    ||
18621  		     insn->imm == BPF_FUNC_for_each_map_elem ||
18622  		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
18623  			aux = &env->insn_aux_data[i + delta];
18624  			if (bpf_map_ptr_poisoned(aux))
18625  				goto patch_call_imm;
18626  
18627  			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
18628  			ops = map_ptr->ops;
18629  			if (insn->imm == BPF_FUNC_map_lookup_elem &&
18630  			    ops->map_gen_lookup) {
18631  				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
18632  				if (cnt == -EOPNOTSUPP)
18633  					goto patch_map_ops_generic;
18634  				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
18635  					verbose(env, "bpf verifier is misconfigured\n");
18636  					return -EINVAL;
18637  				}
18638  
18639  				new_prog = bpf_patch_insn_data(env, i + delta,
18640  							       insn_buf, cnt);
18641  				if (!new_prog)
18642  					return -ENOMEM;
18643  
18644  				delta    += cnt - 1;
18645  				env->prog = prog = new_prog;
18646  				insn      = new_prog->insnsi + i + delta;
18647  				continue;
18648  			}
18649  
18650  			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
18651  				     (void *(*)(struct bpf_map *map, void *key))NULL));
18652  			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
18653  				     (long (*)(struct bpf_map *map, void *key))NULL));
18654  			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
18655  				     (long (*)(struct bpf_map *map, void *key, void *value,
18656  					      u64 flags))NULL));
18657  			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
18658  				     (long (*)(struct bpf_map *map, void *value,
18659  					      u64 flags))NULL));
18660  			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
18661  				     (long (*)(struct bpf_map *map, void *value))NULL));
18662  			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
18663  				     (long (*)(struct bpf_map *map, void *value))NULL));
18664  			BUILD_BUG_ON(!__same_type(ops->map_redirect,
18665  				     (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
18666  			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
18667  				     (long (*)(struct bpf_map *map,
18668  					      bpf_callback_t callback_fn,
18669  					      void *callback_ctx,
18670  					      u64 flags))NULL));
18671  			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
18672  				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
18673  
18674  patch_map_ops_generic:
18675  			switch (insn->imm) {
18676  			case BPF_FUNC_map_lookup_elem:
18677  				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
18678  				continue;
18679  			case BPF_FUNC_map_update_elem:
18680  				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
18681  				continue;
18682  			case BPF_FUNC_map_delete_elem:
18683  				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
18684  				continue;
18685  			case BPF_FUNC_map_push_elem:
18686  				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
18687  				continue;
18688  			case BPF_FUNC_map_pop_elem:
18689  				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
18690  				continue;
18691  			case BPF_FUNC_map_peek_elem:
18692  				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
18693  				continue;
18694  			case BPF_FUNC_redirect_map:
18695  				insn->imm = BPF_CALL_IMM(ops->map_redirect);
18696  				continue;
18697  			case BPF_FUNC_for_each_map_elem:
18698  				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
18699  				continue;
18700  			case BPF_FUNC_map_lookup_percpu_elem:
18701  				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
18702  				continue;
18703  			}
18704  
18705  			goto patch_call_imm;
18706  		}
18707  
18708  		/* Implement bpf_jiffies64 inline. */
18709  		if (prog->jit_requested && BITS_PER_LONG == 64 &&
18710  		    insn->imm == BPF_FUNC_jiffies64) {
18711  			struct bpf_insn ld_jiffies_addr[2] = {
18712  				BPF_LD_IMM64(BPF_REG_0,
18713  					     (unsigned long)&jiffies),
18714  			};
18715  
18716  			insn_buf[0] = ld_jiffies_addr[0];
18717  			insn_buf[1] = ld_jiffies_addr[1];
18718  			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
18719  						  BPF_REG_0, 0);
18720  			cnt = 3;
18721  
18722  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
18723  						       cnt);
18724  			if (!new_prog)
18725  				return -ENOMEM;
18726  
18727  			delta    += cnt - 1;
18728  			env->prog = prog = new_prog;
18729  			insn      = new_prog->insnsi + i + delta;
18730  			continue;
18731  		}
18732  
18733  		/* Implement bpf_get_func_arg inline. */
18734  		if (prog_type == BPF_PROG_TYPE_TRACING &&
18735  		    insn->imm == BPF_FUNC_get_func_arg) {
18736  			/* Load nr_args from ctx - 8 */
18737  			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
18738  			insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
18739  			insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
18740  			insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
18741  			insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
18742  			insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
18743  			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
18744  			insn_buf[7] = BPF_JMP_A(1);
18745  			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
18746  			cnt = 9;
18747  
18748  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18749  			if (!new_prog)
18750  				return -ENOMEM;
18751  
18752  			delta    += cnt - 1;
18753  			env->prog = prog = new_prog;
18754  			insn      = new_prog->insnsi + i + delta;
18755  			continue;
18756  		}
18757  
18758  		/* Implement bpf_get_func_ret inline. */
18759  		if (prog_type == BPF_PROG_TYPE_TRACING &&
18760  		    insn->imm == BPF_FUNC_get_func_ret) {
18761  			if (eatype == BPF_TRACE_FEXIT ||
18762  			    eatype == BPF_MODIFY_RETURN) {
18763  				/* Load nr_args from ctx - 8 */
18764  				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
18765  				insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
18766  				insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
18767  				insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
18768  				insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
18769  				insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
18770  				cnt = 6;
18771  			} else {
18772  				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
18773  				cnt = 1;
18774  			}
18775  
18776  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18777  			if (!new_prog)
18778  				return -ENOMEM;
18779  
18780  			delta    += cnt - 1;
18781  			env->prog = prog = new_prog;
18782  			insn      = new_prog->insnsi + i + delta;
18783  			continue;
18784  		}
18785  
18786  		/* Implement get_func_arg_cnt inline. */
18787  		if (prog_type == BPF_PROG_TYPE_TRACING &&
18788  		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
18789  			/* Load nr_args from ctx - 8 */
18790  			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
18791  
18792  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
18793  			if (!new_prog)
18794  				return -ENOMEM;
18795  
18796  			env->prog = prog = new_prog;
18797  			insn      = new_prog->insnsi + i + delta;
18798  			continue;
18799  		}
18800  
18801  		/* Implement bpf_get_func_ip inline. */
18802  		if (prog_type == BPF_PROG_TYPE_TRACING &&
18803  		    insn->imm == BPF_FUNC_get_func_ip) {
18804  			/* Load IP address from ctx - 16 */
18805  			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
18806  
18807  			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
18808  			if (!new_prog)
18809  				return -ENOMEM;
18810  
18811  			env->prog = prog = new_prog;
18812  			insn      = new_prog->insnsi + i + delta;
18813  			continue;
18814  		}
18815  
18816  patch_call_imm:
18817  		fn = env->ops->get_func_proto(insn->imm, env->prog);
18818  		/* all functions that have prototype and verifier allowed
18819  		 * programs to call them, must be real in-kernel functions
18820  		 */
18821  		if (!fn->func) {
18822  			verbose(env,
18823  				"kernel subsystem misconfigured func %s#%d\n",
18824  				func_id_name(insn->imm), insn->imm);
18825  			return -EFAULT;
18826  		}
18827  		insn->imm = fn->func - __bpf_call_base;
18828  	}
18829  
18830  	/* Since poke tab is now finalized, publish aux to tracker. */
18831  	for (i = 0; i < prog->aux->size_poke_tab; i++) {
18832  		map_ptr = prog->aux->poke_tab[i].tail_call.map;
18833  		if (!map_ptr->ops->map_poke_track ||
18834  		    !map_ptr->ops->map_poke_untrack ||
18835  		    !map_ptr->ops->map_poke_run) {
18836  			verbose(env, "bpf verifier is misconfigured\n");
18837  			return -EINVAL;
18838  		}
18839  
18840  		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
18841  		if (ret < 0) {
18842  			verbose(env, "tracking tail call prog failed\n");
18843  			return ret;
18844  		}
18845  	}
18846  
18847  	sort_kfunc_descs_by_imm_off(env->prog);
18848  
18849  	return 0;
18850  }
18851  
inline_bpf_loop(struct bpf_verifier_env * env,int position,s32 stack_base,u32 callback_subprogno,u32 * cnt)18852  static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
18853  					int position,
18854  					s32 stack_base,
18855  					u32 callback_subprogno,
18856  					u32 *cnt)
18857  {
18858  	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
18859  	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
18860  	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
18861  	int reg_loop_max = BPF_REG_6;
18862  	int reg_loop_cnt = BPF_REG_7;
18863  	int reg_loop_ctx = BPF_REG_8;
18864  
18865  	struct bpf_prog *new_prog;
18866  	u32 callback_start;
18867  	u32 call_insn_offset;
18868  	s32 callback_offset;
18869  
18870  	/* This represents an inlined version of bpf_iter.c:bpf_loop,
18871  	 * be careful to modify this code in sync.
18872  	 */
18873  	struct bpf_insn insn_buf[] = {
18874  		/* Return error and jump to the end of the patch if
18875  		 * expected number of iterations is too big.
18876  		 */
18877  		BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
18878  		BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
18879  		BPF_JMP_IMM(BPF_JA, 0, 0, 16),
18880  		/* spill R6, R7, R8 to use these as loop vars */
18881  		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
18882  		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
18883  		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
18884  		/* initialize loop vars */
18885  		BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
18886  		BPF_MOV32_IMM(reg_loop_cnt, 0),
18887  		BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
18888  		/* loop header,
18889  		 * if reg_loop_cnt >= reg_loop_max skip the loop body
18890  		 */
18891  		BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
18892  		/* callback call,
18893  		 * correct callback offset would be set after patching
18894  		 */
18895  		BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
18896  		BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
18897  		BPF_CALL_REL(0),
18898  		/* increment loop counter */
18899  		BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
18900  		/* jump to loop header if callback returned 0 */
18901  		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
18902  		/* return value of bpf_loop,
18903  		 * set R0 to the number of iterations
18904  		 */
18905  		BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
18906  		/* restore original values of R6, R7, R8 */
18907  		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
18908  		BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
18909  		BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
18910  	};
18911  
18912  	*cnt = ARRAY_SIZE(insn_buf);
18913  	new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
18914  	if (!new_prog)
18915  		return new_prog;
18916  
18917  	/* callback start is known only after patching */
18918  	callback_start = env->subprog_info[callback_subprogno].start;
18919  	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
18920  	call_insn_offset = position + 12;
18921  	callback_offset = callback_start - call_insn_offset - 1;
18922  	new_prog->insnsi[call_insn_offset].imm = callback_offset;
18923  
18924  	return new_prog;
18925  }
18926  
is_bpf_loop_call(struct bpf_insn * insn)18927  static bool is_bpf_loop_call(struct bpf_insn *insn)
18928  {
18929  	return insn->code == (BPF_JMP | BPF_CALL) &&
18930  		insn->src_reg == 0 &&
18931  		insn->imm == BPF_FUNC_loop;
18932  }
18933  
18934  /* For all sub-programs in the program (including main) check
18935   * insn_aux_data to see if there are bpf_loop calls that require
18936   * inlining. If such calls are found the calls are replaced with a
18937   * sequence of instructions produced by `inline_bpf_loop` function and
18938   * subprog stack_depth is increased by the size of 3 registers.
18939   * This stack space is used to spill values of the R6, R7, R8.  These
18940   * registers are used to store the loop bound, counter and context
18941   * variables.
18942   */
optimize_bpf_loop(struct bpf_verifier_env * env)18943  static int optimize_bpf_loop(struct bpf_verifier_env *env)
18944  {
18945  	struct bpf_subprog_info *subprogs = env->subprog_info;
18946  	int i, cur_subprog = 0, cnt, delta = 0;
18947  	struct bpf_insn *insn = env->prog->insnsi;
18948  	int insn_cnt = env->prog->len;
18949  	u16 stack_depth = subprogs[cur_subprog].stack_depth;
18950  	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
18951  	u16 stack_depth_extra = 0;
18952  
18953  	for (i = 0; i < insn_cnt; i++, insn++) {
18954  		struct bpf_loop_inline_state *inline_state =
18955  			&env->insn_aux_data[i + delta].loop_inline_state;
18956  
18957  		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
18958  			struct bpf_prog *new_prog;
18959  
18960  			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
18961  			new_prog = inline_bpf_loop(env,
18962  						   i + delta,
18963  						   -(stack_depth + stack_depth_extra),
18964  						   inline_state->callback_subprogno,
18965  						   &cnt);
18966  			if (!new_prog)
18967  				return -ENOMEM;
18968  
18969  			delta     += cnt - 1;
18970  			env->prog  = new_prog;
18971  			insn       = new_prog->insnsi + i + delta;
18972  		}
18973  
18974  		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
18975  			subprogs[cur_subprog].stack_depth += stack_depth_extra;
18976  			cur_subprog++;
18977  			stack_depth = subprogs[cur_subprog].stack_depth;
18978  			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
18979  			stack_depth_extra = 0;
18980  		}
18981  	}
18982  
18983  	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18984  
18985  	return 0;
18986  }
18987  
free_states(struct bpf_verifier_env * env)18988  static void free_states(struct bpf_verifier_env *env)
18989  {
18990  	struct bpf_verifier_state_list *sl, *sln;
18991  	int i;
18992  
18993  	sl = env->free_list;
18994  	while (sl) {
18995  		sln = sl->next;
18996  		free_verifier_state(&sl->state, false);
18997  		kfree(sl);
18998  		sl = sln;
18999  	}
19000  	env->free_list = NULL;
19001  
19002  	if (!env->explored_states)
19003  		return;
19004  
19005  	for (i = 0; i < state_htab_size(env); i++) {
19006  		sl = env->explored_states[i];
19007  
19008  		while (sl) {
19009  			sln = sl->next;
19010  			free_verifier_state(&sl->state, false);
19011  			kfree(sl);
19012  			sl = sln;
19013  		}
19014  		env->explored_states[i] = NULL;
19015  	}
19016  }
19017  
do_check_common(struct bpf_verifier_env * env,int subprog)19018  static int do_check_common(struct bpf_verifier_env *env, int subprog)
19019  {
19020  	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
19021  	struct bpf_verifier_state *state;
19022  	struct bpf_reg_state *regs;
19023  	int ret, i;
19024  
19025  	env->prev_linfo = NULL;
19026  	env->pass_cnt++;
19027  
19028  	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
19029  	if (!state)
19030  		return -ENOMEM;
19031  	state->curframe = 0;
19032  	state->speculative = false;
19033  	state->branches = 1;
19034  	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
19035  	if (!state->frame[0]) {
19036  		kfree(state);
19037  		return -ENOMEM;
19038  	}
19039  	env->cur_state = state;
19040  	init_func_state(env, state->frame[0],
19041  			BPF_MAIN_FUNC /* callsite */,
19042  			0 /* frameno */,
19043  			subprog);
19044  	state->first_insn_idx = env->subprog_info[subprog].start;
19045  	state->last_insn_idx = -1;
19046  
19047  	regs = state->frame[state->curframe]->regs;
19048  	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
19049  		ret = btf_prepare_func_args(env, subprog, regs);
19050  		if (ret)
19051  			goto out;
19052  		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
19053  			if (regs[i].type == PTR_TO_CTX)
19054  				mark_reg_known_zero(env, regs, i);
19055  			else if (regs[i].type == SCALAR_VALUE)
19056  				mark_reg_unknown(env, regs, i);
19057  			else if (base_type(regs[i].type) == PTR_TO_MEM) {
19058  				const u32 mem_size = regs[i].mem_size;
19059  
19060  				mark_reg_known_zero(env, regs, i);
19061  				regs[i].mem_size = mem_size;
19062  				regs[i].id = ++env->id_gen;
19063  			}
19064  		}
19065  	} else {
19066  		/* 1st arg to a function */
19067  		regs[BPF_REG_1].type = PTR_TO_CTX;
19068  		mark_reg_known_zero(env, regs, BPF_REG_1);
19069  		ret = btf_check_subprog_arg_match(env, subprog, regs);
19070  		if (ret == -EFAULT)
19071  			/* unlikely verifier bug. abort.
19072  			 * ret == 0 and ret < 0 are sadly acceptable for
19073  			 * main() function due to backward compatibility.
19074  			 * Like socket filter program may be written as:
19075  			 * int bpf_prog(struct pt_regs *ctx)
19076  			 * and never dereference that ctx in the program.
19077  			 * 'struct pt_regs' is a type mismatch for socket
19078  			 * filter that should be using 'struct __sk_buff'.
19079  			 */
19080  			goto out;
19081  	}
19082  
19083  	ret = do_check(env);
19084  out:
19085  	/* check for NULL is necessary, since cur_state can be freed inside
19086  	 * do_check() under memory pressure.
19087  	 */
19088  	if (env->cur_state) {
19089  		free_verifier_state(env->cur_state, true);
19090  		env->cur_state = NULL;
19091  	}
19092  	while (!pop_stack(env, NULL, NULL, false));
19093  	if (!ret && pop_log)
19094  		bpf_vlog_reset(&env->log, 0);
19095  	free_states(env);
19096  	return ret;
19097  }
19098  
19099  /* Verify all global functions in a BPF program one by one based on their BTF.
19100   * All global functions must pass verification. Otherwise the whole program is rejected.
19101   * Consider:
19102   * int bar(int);
19103   * int foo(int f)
19104   * {
19105   *    return bar(f);
19106   * }
19107   * int bar(int b)
19108   * {
19109   *    ...
19110   * }
19111   * foo() will be verified first for R1=any_scalar_value. During verification it
19112   * will be assumed that bar() already verified successfully and call to bar()
19113   * from foo() will be checked for type match only. Later bar() will be verified
19114   * independently to check that it's safe for R1=any_scalar_value.
19115   */
do_check_subprogs(struct bpf_verifier_env * env)19116  static int do_check_subprogs(struct bpf_verifier_env *env)
19117  {
19118  	struct bpf_prog_aux *aux = env->prog->aux;
19119  	int i, ret;
19120  
19121  	if (!aux->func_info)
19122  		return 0;
19123  
19124  	for (i = 1; i < env->subprog_cnt; i++) {
19125  		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
19126  			continue;
19127  		env->insn_idx = env->subprog_info[i].start;
19128  		WARN_ON_ONCE(env->insn_idx == 0);
19129  		ret = do_check_common(env, i);
19130  		if (ret) {
19131  			return ret;
19132  		} else if (env->log.level & BPF_LOG_LEVEL) {
19133  			verbose(env,
19134  				"Func#%d is safe for any args that match its prototype\n",
19135  				i);
19136  		}
19137  	}
19138  	return 0;
19139  }
19140  
do_check_main(struct bpf_verifier_env * env)19141  static int do_check_main(struct bpf_verifier_env *env)
19142  {
19143  	int ret;
19144  
19145  	env->insn_idx = 0;
19146  	ret = do_check_common(env, 0);
19147  	if (!ret)
19148  		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
19149  	return ret;
19150  }
19151  
19152  
print_verification_stats(struct bpf_verifier_env * env)19153  static void print_verification_stats(struct bpf_verifier_env *env)
19154  {
19155  	int i;
19156  
19157  	if (env->log.level & BPF_LOG_STATS) {
19158  		verbose(env, "verification time %lld usec\n",
19159  			div_u64(env->verification_time, 1000));
19160  		verbose(env, "stack depth ");
19161  		for (i = 0; i < env->subprog_cnt; i++) {
19162  			u32 depth = env->subprog_info[i].stack_depth;
19163  
19164  			verbose(env, "%d", depth);
19165  			if (i + 1 < env->subprog_cnt)
19166  				verbose(env, "+");
19167  		}
19168  		verbose(env, "\n");
19169  	}
19170  	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
19171  		"total_states %d peak_states %d mark_read %d\n",
19172  		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
19173  		env->max_states_per_insn, env->total_states,
19174  		env->peak_states, env->longest_mark_read_walk);
19175  }
19176  
check_struct_ops_btf_id(struct bpf_verifier_env * env)19177  static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
19178  {
19179  	const struct btf_type *t, *func_proto;
19180  	const struct bpf_struct_ops *st_ops;
19181  	const struct btf_member *member;
19182  	struct bpf_prog *prog = env->prog;
19183  	u32 btf_id, member_idx;
19184  	const char *mname;
19185  
19186  	if (!prog->gpl_compatible) {
19187  		verbose(env, "struct ops programs must have a GPL compatible license\n");
19188  		return -EINVAL;
19189  	}
19190  
19191  	btf_id = prog->aux->attach_btf_id;
19192  	st_ops = bpf_struct_ops_find(btf_id);
19193  	if (!st_ops) {
19194  		verbose(env, "attach_btf_id %u is not a supported struct\n",
19195  			btf_id);
19196  		return -ENOTSUPP;
19197  	}
19198  
19199  	t = st_ops->type;
19200  	member_idx = prog->expected_attach_type;
19201  	if (member_idx >= btf_type_vlen(t)) {
19202  		verbose(env, "attach to invalid member idx %u of struct %s\n",
19203  			member_idx, st_ops->name);
19204  		return -EINVAL;
19205  	}
19206  
19207  	member = &btf_type_member(t)[member_idx];
19208  	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
19209  	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
19210  					       NULL);
19211  	if (!func_proto) {
19212  		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
19213  			mname, member_idx, st_ops->name);
19214  		return -EINVAL;
19215  	}
19216  
19217  	if (st_ops->check_member) {
19218  		int err = st_ops->check_member(t, member, prog);
19219  
19220  		if (err) {
19221  			verbose(env, "attach to unsupported member %s of struct %s\n",
19222  				mname, st_ops->name);
19223  			return err;
19224  		}
19225  	}
19226  
19227  	prog->aux->attach_func_proto = func_proto;
19228  	prog->aux->attach_func_name = mname;
19229  	env->ops = st_ops->verifier_ops;
19230  
19231  	return 0;
19232  }
19233  #define SECURITY_PREFIX "security_"
19234  
check_attach_modify_return(unsigned long addr,const char * func_name)19235  static int check_attach_modify_return(unsigned long addr, const char *func_name)
19236  {
19237  	if (within_error_injection_list(addr) ||
19238  	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
19239  		return 0;
19240  
19241  	return -EINVAL;
19242  }
19243  
19244  /* list of non-sleepable functions that are otherwise on
19245   * ALLOW_ERROR_INJECTION list
19246   */
19247  BTF_SET_START(btf_non_sleepable_error_inject)
19248  /* Three functions below can be called from sleepable and non-sleepable context.
19249   * Assume non-sleepable from bpf safety point of view.
19250   */
BTF_ID(func,__filemap_add_folio)19251  BTF_ID(func, __filemap_add_folio)
19252  BTF_ID(func, should_fail_alloc_page)
19253  BTF_ID(func, should_failslab)
19254  BTF_SET_END(btf_non_sleepable_error_inject)
19255  
19256  static int check_non_sleepable_error_inject(u32 btf_id)
19257  {
19258  	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
19259  }
19260  
bpf_check_attach_target(struct bpf_verifier_log * log,const struct bpf_prog * prog,const struct bpf_prog * tgt_prog,u32 btf_id,struct bpf_attach_target_info * tgt_info)19261  int bpf_check_attach_target(struct bpf_verifier_log *log,
19262  			    const struct bpf_prog *prog,
19263  			    const struct bpf_prog *tgt_prog,
19264  			    u32 btf_id,
19265  			    struct bpf_attach_target_info *tgt_info)
19266  {
19267  	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
19268  	const char prefix[] = "btf_trace_";
19269  	int ret = 0, subprog = -1, i;
19270  	const struct btf_type *t;
19271  	bool conservative = true;
19272  	const char *tname;
19273  	struct btf *btf;
19274  	long addr = 0;
19275  	struct module *mod = NULL;
19276  
19277  	if (!btf_id) {
19278  		bpf_log(log, "Tracing programs must provide btf_id\n");
19279  		return -EINVAL;
19280  	}
19281  	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
19282  	if (!btf) {
19283  		bpf_log(log,
19284  			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
19285  		return -EINVAL;
19286  	}
19287  	t = btf_type_by_id(btf, btf_id);
19288  	if (!t) {
19289  		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
19290  		return -EINVAL;
19291  	}
19292  	tname = btf_name_by_offset(btf, t->name_off);
19293  	if (!tname) {
19294  		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
19295  		return -EINVAL;
19296  	}
19297  	if (tgt_prog) {
19298  		struct bpf_prog_aux *aux = tgt_prog->aux;
19299  
19300  		if (bpf_prog_is_dev_bound(prog->aux) &&
19301  		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
19302  			bpf_log(log, "Target program bound device mismatch");
19303  			return -EINVAL;
19304  		}
19305  
19306  		for (i = 0; i < aux->func_info_cnt; i++)
19307  			if (aux->func_info[i].type_id == btf_id) {
19308  				subprog = i;
19309  				break;
19310  			}
19311  		if (subprog == -1) {
19312  			bpf_log(log, "Subprog %s doesn't exist\n", tname);
19313  			return -EINVAL;
19314  		}
19315  		conservative = aux->func_info_aux[subprog].unreliable;
19316  		if (prog_extension) {
19317  			if (conservative) {
19318  				bpf_log(log,
19319  					"Cannot replace static functions\n");
19320  				return -EINVAL;
19321  			}
19322  			if (!prog->jit_requested) {
19323  				bpf_log(log,
19324  					"Extension programs should be JITed\n");
19325  				return -EINVAL;
19326  			}
19327  		}
19328  		if (!tgt_prog->jited) {
19329  			bpf_log(log, "Can attach to only JITed progs\n");
19330  			return -EINVAL;
19331  		}
19332  		if (tgt_prog->type == prog->type) {
19333  			/* Cannot fentry/fexit another fentry/fexit program.
19334  			 * Cannot attach program extension to another extension.
19335  			 * It's ok to attach fentry/fexit to extension program.
19336  			 */
19337  			bpf_log(log, "Cannot recursively attach\n");
19338  			return -EINVAL;
19339  		}
19340  		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
19341  		    prog_extension &&
19342  		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
19343  		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
19344  			/* Program extensions can extend all program types
19345  			 * except fentry/fexit. The reason is the following.
19346  			 * The fentry/fexit programs are used for performance
19347  			 * analysis, stats and can be attached to any program
19348  			 * type except themselves. When extension program is
19349  			 * replacing XDP function it is necessary to allow
19350  			 * performance analysis of all functions. Both original
19351  			 * XDP program and its program extension. Hence
19352  			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
19353  			 * allowed. If extending of fentry/fexit was allowed it
19354  			 * would be possible to create long call chain
19355  			 * fentry->extension->fentry->extension beyond
19356  			 * reasonable stack size. Hence extending fentry is not
19357  			 * allowed.
19358  			 */
19359  			bpf_log(log, "Cannot extend fentry/fexit\n");
19360  			return -EINVAL;
19361  		}
19362  	} else {
19363  		if (prog_extension) {
19364  			bpf_log(log, "Cannot replace kernel functions\n");
19365  			return -EINVAL;
19366  		}
19367  	}
19368  
19369  	switch (prog->expected_attach_type) {
19370  	case BPF_TRACE_RAW_TP:
19371  		if (tgt_prog) {
19372  			bpf_log(log,
19373  				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
19374  			return -EINVAL;
19375  		}
19376  		if (!btf_type_is_typedef(t)) {
19377  			bpf_log(log, "attach_btf_id %u is not a typedef\n",
19378  				btf_id);
19379  			return -EINVAL;
19380  		}
19381  		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
19382  			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
19383  				btf_id, tname);
19384  			return -EINVAL;
19385  		}
19386  		tname += sizeof(prefix) - 1;
19387  		t = btf_type_by_id(btf, t->type);
19388  		if (!btf_type_is_ptr(t))
19389  			/* should never happen in valid vmlinux build */
19390  			return -EINVAL;
19391  		t = btf_type_by_id(btf, t->type);
19392  		if (!btf_type_is_func_proto(t))
19393  			/* should never happen in valid vmlinux build */
19394  			return -EINVAL;
19395  
19396  		break;
19397  	case BPF_TRACE_ITER:
19398  		if (!btf_type_is_func(t)) {
19399  			bpf_log(log, "attach_btf_id %u is not a function\n",
19400  				btf_id);
19401  			return -EINVAL;
19402  		}
19403  		t = btf_type_by_id(btf, t->type);
19404  		if (!btf_type_is_func_proto(t))
19405  			return -EINVAL;
19406  		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19407  		if (ret)
19408  			return ret;
19409  		break;
19410  	default:
19411  		if (!prog_extension)
19412  			return -EINVAL;
19413  		fallthrough;
19414  	case BPF_MODIFY_RETURN:
19415  	case BPF_LSM_MAC:
19416  	case BPF_LSM_CGROUP:
19417  	case BPF_TRACE_FENTRY:
19418  	case BPF_TRACE_FEXIT:
19419  		if (!btf_type_is_func(t)) {
19420  			bpf_log(log, "attach_btf_id %u is not a function\n",
19421  				btf_id);
19422  			return -EINVAL;
19423  		}
19424  		if (prog_extension &&
19425  		    btf_check_type_match(log, prog, btf, t))
19426  			return -EINVAL;
19427  		t = btf_type_by_id(btf, t->type);
19428  		if (!btf_type_is_func_proto(t))
19429  			return -EINVAL;
19430  
19431  		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
19432  		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
19433  		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
19434  			return -EINVAL;
19435  
19436  		if (tgt_prog && conservative)
19437  			t = NULL;
19438  
19439  		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19440  		if (ret < 0)
19441  			return ret;
19442  
19443  		if (tgt_prog) {
19444  			if (subprog == 0)
19445  				addr = (long) tgt_prog->bpf_func;
19446  			else
19447  				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
19448  		} else {
19449  			if (btf_is_module(btf)) {
19450  				mod = btf_try_get_module(btf);
19451  				if (mod)
19452  					addr = find_kallsyms_symbol_value(mod, tname);
19453  				else
19454  					addr = 0;
19455  			} else {
19456  				addr = kallsyms_lookup_name(tname);
19457  			}
19458  			if (!addr) {
19459  				module_put(mod);
19460  				bpf_log(log,
19461  					"The address of function %s cannot be found\n",
19462  					tname);
19463  				return -ENOENT;
19464  			}
19465  		}
19466  
19467  		if (prog->aux->sleepable) {
19468  			ret = -EINVAL;
19469  			switch (prog->type) {
19470  			case BPF_PROG_TYPE_TRACING:
19471  
19472  				/* fentry/fexit/fmod_ret progs can be sleepable if they are
19473  				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
19474  				 */
19475  				if (!check_non_sleepable_error_inject(btf_id) &&
19476  				    within_error_injection_list(addr))
19477  					ret = 0;
19478  				/* fentry/fexit/fmod_ret progs can also be sleepable if they are
19479  				 * in the fmodret id set with the KF_SLEEPABLE flag.
19480  				 */
19481  				else {
19482  					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
19483  										prog);
19484  
19485  					if (flags && (*flags & KF_SLEEPABLE))
19486  						ret = 0;
19487  				}
19488  				break;
19489  			case BPF_PROG_TYPE_LSM:
19490  				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
19491  				 * Only some of them are sleepable.
19492  				 */
19493  				if (bpf_lsm_is_sleepable_hook(btf_id))
19494  					ret = 0;
19495  				break;
19496  			default:
19497  				break;
19498  			}
19499  			if (ret) {
19500  				module_put(mod);
19501  				bpf_log(log, "%s is not sleepable\n", tname);
19502  				return ret;
19503  			}
19504  		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
19505  			if (tgt_prog) {
19506  				module_put(mod);
19507  				bpf_log(log, "can't modify return codes of BPF programs\n");
19508  				return -EINVAL;
19509  			}
19510  			ret = -EINVAL;
19511  			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
19512  			    !check_attach_modify_return(addr, tname))
19513  				ret = 0;
19514  			if (ret) {
19515  				module_put(mod);
19516  				bpf_log(log, "%s() is not modifiable\n", tname);
19517  				return ret;
19518  			}
19519  		}
19520  
19521  		break;
19522  	}
19523  	tgt_info->tgt_addr = addr;
19524  	tgt_info->tgt_name = tname;
19525  	tgt_info->tgt_type = t;
19526  	tgt_info->tgt_mod = mod;
19527  	return 0;
19528  }
19529  
BTF_SET_START(btf_id_deny)19530  BTF_SET_START(btf_id_deny)
19531  BTF_ID_UNUSED
19532  #ifdef CONFIG_SMP
19533  BTF_ID(func, migrate_disable)
19534  BTF_ID(func, migrate_enable)
19535  #endif
19536  #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
19537  BTF_ID(func, rcu_read_unlock_strict)
19538  #endif
19539  #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
19540  BTF_ID(func, preempt_count_add)
19541  BTF_ID(func, preempt_count_sub)
19542  #endif
19543  #ifdef CONFIG_PREEMPT_RCU
19544  BTF_ID(func, __rcu_read_lock)
19545  BTF_ID(func, __rcu_read_unlock)
19546  #endif
19547  BTF_SET_END(btf_id_deny)
19548  
19549  static bool can_be_sleepable(struct bpf_prog *prog)
19550  {
19551  	if (prog->type == BPF_PROG_TYPE_TRACING) {
19552  		switch (prog->expected_attach_type) {
19553  		case BPF_TRACE_FENTRY:
19554  		case BPF_TRACE_FEXIT:
19555  		case BPF_MODIFY_RETURN:
19556  		case BPF_TRACE_ITER:
19557  			return true;
19558  		default:
19559  			return false;
19560  		}
19561  	}
19562  	return prog->type == BPF_PROG_TYPE_LSM ||
19563  	       prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
19564  	       prog->type == BPF_PROG_TYPE_STRUCT_OPS;
19565  }
19566  
check_attach_btf_id(struct bpf_verifier_env * env)19567  static int check_attach_btf_id(struct bpf_verifier_env *env)
19568  {
19569  	struct bpf_prog *prog = env->prog;
19570  	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
19571  	struct bpf_attach_target_info tgt_info = {};
19572  	u32 btf_id = prog->aux->attach_btf_id;
19573  	struct bpf_trampoline *tr;
19574  	int ret;
19575  	u64 key;
19576  
19577  	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
19578  		if (prog->aux->sleepable)
19579  			/* attach_btf_id checked to be zero already */
19580  			return 0;
19581  		verbose(env, "Syscall programs can only be sleepable\n");
19582  		return -EINVAL;
19583  	}
19584  
19585  	if (prog->aux->sleepable && !can_be_sleepable(prog)) {
19586  		verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
19587  		return -EINVAL;
19588  	}
19589  
19590  	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
19591  		return check_struct_ops_btf_id(env);
19592  
19593  	if (prog->type != BPF_PROG_TYPE_TRACING &&
19594  	    prog->type != BPF_PROG_TYPE_LSM &&
19595  	    prog->type != BPF_PROG_TYPE_EXT)
19596  		return 0;
19597  
19598  	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
19599  	if (ret)
19600  		return ret;
19601  
19602  	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
19603  		/* to make freplace equivalent to their targets, they need to
19604  		 * inherit env->ops and expected_attach_type for the rest of the
19605  		 * verification
19606  		 */
19607  		env->ops = bpf_verifier_ops[tgt_prog->type];
19608  		prog->expected_attach_type = tgt_prog->expected_attach_type;
19609  	}
19610  
19611  	/* store info about the attachment target that will be used later */
19612  	prog->aux->attach_func_proto = tgt_info.tgt_type;
19613  	prog->aux->attach_func_name = tgt_info.tgt_name;
19614  	prog->aux->mod = tgt_info.tgt_mod;
19615  
19616  	if (tgt_prog) {
19617  		prog->aux->saved_dst_prog_type = tgt_prog->type;
19618  		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
19619  	}
19620  
19621  	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
19622  		prog->aux->attach_btf_trace = true;
19623  		return 0;
19624  	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
19625  		if (!bpf_iter_prog_supported(prog))
19626  			return -EINVAL;
19627  		return 0;
19628  	}
19629  
19630  	if (prog->type == BPF_PROG_TYPE_LSM) {
19631  		ret = bpf_lsm_verify_prog(&env->log, prog);
19632  		if (ret < 0)
19633  			return ret;
19634  	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
19635  		   btf_id_set_contains(&btf_id_deny, btf_id)) {
19636  		return -EINVAL;
19637  	}
19638  
19639  	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
19640  	tr = bpf_trampoline_get(key, &tgt_info);
19641  	if (!tr)
19642  		return -ENOMEM;
19643  
19644  	prog->aux->dst_trampoline = tr;
19645  	return 0;
19646  }
19647  
bpf_get_btf_vmlinux(void)19648  struct btf *bpf_get_btf_vmlinux(void)
19649  {
19650  	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
19651  		mutex_lock(&bpf_verifier_lock);
19652  		if (!btf_vmlinux)
19653  			btf_vmlinux = btf_parse_vmlinux();
19654  		mutex_unlock(&bpf_verifier_lock);
19655  	}
19656  	return btf_vmlinux;
19657  }
19658  
bpf_check(struct bpf_prog ** prog,union bpf_attr * attr,bpfptr_t uattr,__u32 uattr_size)19659  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
19660  {
19661  	u64 start_time = ktime_get_ns();
19662  	struct bpf_verifier_env *env;
19663  	int i, len, ret = -EINVAL, err;
19664  	u32 log_true_size;
19665  	bool is_priv;
19666  
19667  	/* no program is valid */
19668  	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
19669  		return -EINVAL;
19670  
19671  	/* 'struct bpf_verifier_env' can be global, but since it's not small,
19672  	 * allocate/free it every time bpf_check() is called
19673  	 */
19674  	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
19675  	if (!env)
19676  		return -ENOMEM;
19677  
19678  	env->bt.env = env;
19679  
19680  	len = (*prog)->len;
19681  	env->insn_aux_data =
19682  		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
19683  	ret = -ENOMEM;
19684  	if (!env->insn_aux_data)
19685  		goto err_free_env;
19686  	for (i = 0; i < len; i++)
19687  		env->insn_aux_data[i].orig_idx = i;
19688  	env->prog = *prog;
19689  	env->ops = bpf_verifier_ops[env->prog->type];
19690  	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
19691  	is_priv = bpf_capable();
19692  
19693  	bpf_get_btf_vmlinux();
19694  
19695  	/* grab the mutex to protect few globals used by verifier */
19696  	if (!is_priv)
19697  		mutex_lock(&bpf_verifier_lock);
19698  
19699  	/* user could have requested verbose verifier output
19700  	 * and supplied buffer to store the verification trace
19701  	 */
19702  	ret = bpf_vlog_init(&env->log, attr->log_level,
19703  			    (char __user *) (unsigned long) attr->log_buf,
19704  			    attr->log_size);
19705  	if (ret)
19706  		goto err_unlock;
19707  
19708  	mark_verifier_state_clean(env);
19709  
19710  	if (IS_ERR(btf_vmlinux)) {
19711  		/* Either gcc or pahole or kernel are broken. */
19712  		verbose(env, "in-kernel BTF is malformed\n");
19713  		ret = PTR_ERR(btf_vmlinux);
19714  		goto skip_full_check;
19715  	}
19716  
19717  	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
19718  	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
19719  		env->strict_alignment = true;
19720  	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
19721  		env->strict_alignment = false;
19722  
19723  	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
19724  	env->allow_uninit_stack = bpf_allow_uninit_stack();
19725  	env->bypass_spec_v1 = bpf_bypass_spec_v1();
19726  	env->bypass_spec_v4 = bpf_bypass_spec_v4();
19727  	env->bpf_capable = bpf_capable();
19728  
19729  	if (is_priv)
19730  		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
19731  
19732  	env->explored_states = kvcalloc(state_htab_size(env),
19733  				       sizeof(struct bpf_verifier_state_list *),
19734  				       GFP_USER);
19735  	ret = -ENOMEM;
19736  	if (!env->explored_states)
19737  		goto skip_full_check;
19738  
19739  	ret = add_subprog_and_kfunc(env);
19740  	if (ret < 0)
19741  		goto skip_full_check;
19742  
19743  	ret = check_subprogs(env);
19744  	if (ret < 0)
19745  		goto skip_full_check;
19746  
19747  	ret = check_btf_info(env, attr, uattr);
19748  	if (ret < 0)
19749  		goto skip_full_check;
19750  
19751  	ret = check_attach_btf_id(env);
19752  	if (ret)
19753  		goto skip_full_check;
19754  
19755  	ret = resolve_pseudo_ldimm64(env);
19756  	if (ret < 0)
19757  		goto skip_full_check;
19758  
19759  	if (bpf_prog_is_offloaded(env->prog->aux)) {
19760  		ret = bpf_prog_offload_verifier_prep(env->prog);
19761  		if (ret)
19762  			goto skip_full_check;
19763  	}
19764  
19765  	ret = check_cfg(env);
19766  	if (ret < 0)
19767  		goto skip_full_check;
19768  
19769  	ret = do_check_subprogs(env);
19770  	ret = ret ?: do_check_main(env);
19771  
19772  	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
19773  		ret = bpf_prog_offload_finalize(env);
19774  
19775  skip_full_check:
19776  	kvfree(env->explored_states);
19777  
19778  	if (ret == 0)
19779  		ret = check_max_stack_depth(env);
19780  
19781  	/* instruction rewrites happen after this point */
19782  	if (ret == 0)
19783  		ret = optimize_bpf_loop(env);
19784  
19785  	if (is_priv) {
19786  		if (ret == 0)
19787  			opt_hard_wire_dead_code_branches(env);
19788  		if (ret == 0)
19789  			ret = opt_remove_dead_code(env);
19790  		if (ret == 0)
19791  			ret = opt_remove_nops(env);
19792  	} else {
19793  		if (ret == 0)
19794  			sanitize_dead_code(env);
19795  	}
19796  
19797  	if (ret == 0)
19798  		/* program is valid, convert *(u32*)(ctx + off) accesses */
19799  		ret = convert_ctx_accesses(env);
19800  
19801  	if (ret == 0)
19802  		ret = do_misc_fixups(env);
19803  
19804  	/* do 32-bit optimization after insn patching has done so those patched
19805  	 * insns could be handled correctly.
19806  	 */
19807  	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
19808  		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
19809  		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
19810  								     : false;
19811  	}
19812  
19813  	if (ret == 0)
19814  		ret = fixup_call_args(env);
19815  
19816  	env->verification_time = ktime_get_ns() - start_time;
19817  	print_verification_stats(env);
19818  	env->prog->aux->verified_insns = env->insn_processed;
19819  
19820  	/* preserve original error even if log finalization is successful */
19821  	err = bpf_vlog_finalize(&env->log, &log_true_size);
19822  	if (err)
19823  		ret = err;
19824  
19825  	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
19826  	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
19827  				  &log_true_size, sizeof(log_true_size))) {
19828  		ret = -EFAULT;
19829  		goto err_release_maps;
19830  	}
19831  
19832  	if (ret)
19833  		goto err_release_maps;
19834  
19835  	if (env->used_map_cnt) {
19836  		/* if program passed verifier, update used_maps in bpf_prog_info */
19837  		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
19838  							  sizeof(env->used_maps[0]),
19839  							  GFP_KERNEL);
19840  
19841  		if (!env->prog->aux->used_maps) {
19842  			ret = -ENOMEM;
19843  			goto err_release_maps;
19844  		}
19845  
19846  		memcpy(env->prog->aux->used_maps, env->used_maps,
19847  		       sizeof(env->used_maps[0]) * env->used_map_cnt);
19848  		env->prog->aux->used_map_cnt = env->used_map_cnt;
19849  	}
19850  	if (env->used_btf_cnt) {
19851  		/* if program passed verifier, update used_btfs in bpf_prog_aux */
19852  		env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
19853  							  sizeof(env->used_btfs[0]),
19854  							  GFP_KERNEL);
19855  		if (!env->prog->aux->used_btfs) {
19856  			ret = -ENOMEM;
19857  			goto err_release_maps;
19858  		}
19859  
19860  		memcpy(env->prog->aux->used_btfs, env->used_btfs,
19861  		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
19862  		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
19863  	}
19864  	if (env->used_map_cnt || env->used_btf_cnt) {
19865  		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
19866  		 * bpf_ld_imm64 instructions
19867  		 */
19868  		convert_pseudo_ld_imm64(env);
19869  	}
19870  
19871  	adjust_btf_func(env);
19872  
19873  err_release_maps:
19874  	if (!env->prog->aux->used_maps)
19875  		/* if we didn't copy map pointers into bpf_prog_info, release
19876  		 * them now. Otherwise free_used_maps() will release them.
19877  		 */
19878  		release_maps(env);
19879  	if (!env->prog->aux->used_btfs)
19880  		release_btfs(env);
19881  
19882  	/* extension progs temporarily inherit the attach_type of their targets
19883  	   for verification purposes, so set it back to zero before returning
19884  	 */
19885  	if (env->prog->type == BPF_PROG_TYPE_EXT)
19886  		env->prog->expected_attach_type = 0;
19887  
19888  	*prog = env->prog;
19889  err_unlock:
19890  	if (!is_priv)
19891  		mutex_unlock(&bpf_verifier_lock);
19892  	vfree(env->insn_aux_data);
19893  err_free_env:
19894  	kfree(env);
19895  	return ret;
19896  }
19897