diff options
Diffstat (limited to 'include/linux')
58 files changed, 2900 insertions, 383 deletions
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 77bc5522e61c..4226379a232d 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -1208,15 +1208,21 @@ arch_atomic_inc_and_test(atomic_t *v) #define arch_atomic_inc_and_test arch_atomic_inc_and_test #endif +#ifndef arch_atomic_add_negative_relaxed +#ifdef arch_atomic_add_negative +#define arch_atomic_add_negative_acquire arch_atomic_add_negative +#define arch_atomic_add_negative_release arch_atomic_add_negative +#define arch_atomic_add_negative_relaxed arch_atomic_add_negative +#endif /* arch_atomic_add_negative */ + #ifndef arch_atomic_add_negative /** - * arch_atomic_add_negative - add and test if negative + * arch_atomic_add_negative - Add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. + * Atomically adds @i to @v and returns true if the result is negative, + * or false when the result is greater than or equal to zero. */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) @@ -1226,6 +1232,95 @@ arch_atomic_add_negative(int i, atomic_t *v) #define arch_atomic_add_negative arch_atomic_add_negative #endif +#ifndef arch_atomic_add_negative_acquire +/** + * arch_atomic_add_negative_acquire - Add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true if the result is negative, + * or false when the result is greater than or equal to zero. + */ +static __always_inline bool +arch_atomic_add_negative_acquire(int i, atomic_t *v) +{ + return arch_atomic_add_return_acquire(i, v) < 0; +} +#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire +#endif + +#ifndef arch_atomic_add_negative_release +/** + * arch_atomic_add_negative_release - Add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true if the result is negative, + * or false when the result is greater than or equal to zero. + */ +static __always_inline bool +arch_atomic_add_negative_release(int i, atomic_t *v) +{ + return arch_atomic_add_return_release(i, v) < 0; +} +#define arch_atomic_add_negative_release arch_atomic_add_negative_release +#endif + +#ifndef arch_atomic_add_negative_relaxed +/** + * arch_atomic_add_negative_relaxed - Add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true if the result is negative, + * or false when the result is greater than or equal to zero. + */ +static __always_inline bool +arch_atomic_add_negative_relaxed(int i, atomic_t *v) +{ + return arch_atomic_add_return_relaxed(i, v) < 0; +} +#define arch_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed +#endif + +#else /* arch_atomic_add_negative_relaxed */ + +#ifndef arch_atomic_add_negative_acquire +static __always_inline bool +arch_atomic_add_negative_acquire(int i, atomic_t *v) +{ + bool ret = arch_atomic_add_negative_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire +#endif + +#ifndef arch_atomic_add_negative_release +static __always_inline bool +arch_atomic_add_negative_release(int i, atomic_t *v) +{ + __atomic_release_fence(); + return arch_atomic_add_negative_relaxed(i, v); +} +#define arch_atomic_add_negative_release arch_atomic_add_negative_release +#endif + +#ifndef arch_atomic_add_negative +static __always_inline bool +arch_atomic_add_negative(int i, atomic_t *v) +{ + bool ret; + __atomic_pre_full_fence(); + ret = arch_atomic_add_negative_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define arch_atomic_add_negative arch_atomic_add_negative +#endif + +#endif /* arch_atomic_add_negative_relaxed */ + #ifndef arch_atomic_fetch_add_unless /** * arch_atomic_fetch_add_unless - add unless the number is already a given value @@ -2329,15 +2424,21 @@ arch_atomic64_inc_and_test(atomic64_t *v) #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test #endif +#ifndef arch_atomic64_add_negative_relaxed +#ifdef arch_atomic64_add_negative +#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative +#define arch_atomic64_add_negative_release arch_atomic64_add_negative +#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative +#endif /* arch_atomic64_add_negative */ + #ifndef arch_atomic64_add_negative /** - * arch_atomic64_add_negative - add and test if negative + * arch_atomic64_add_negative - Add and test if negative * @i: integer value to add * @v: pointer of type atomic64_t * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. + * Atomically adds @i to @v and returns true if the result is negative, + * or false when the result is greater than or equal to zero. */ static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) @@ -2347,6 +2448,95 @@ arch_atomic64_add_negative(s64 i, atomic64_t *v) #define arch_atomic64_add_negative arch_atomic64_add_negative #endif +#ifndef arch_atomic64_add_negative_acquire +/** + * arch_atomic64_add_negative_acquire - Add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v and returns true if the result is negative, + * or false when the result is greater than or equal to zero. + */ +static __always_inline bool +arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v) +{ + return arch_atomic64_add_return_acquire(i, v) < 0; +} +#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire +#endif + +#ifndef arch_atomic64_add_negative_release +/** + * arch_atomic64_add_negative_release - Add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v and returns true if the result is negative, + * or false when the result is greater than or equal to zero. + */ +static __always_inline bool +arch_atomic64_add_negative_release(s64 i, atomic64_t *v) +{ + return arch_atomic64_add_return_release(i, v) < 0; +} +#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release +#endif + +#ifndef arch_atomic64_add_negative_relaxed +/** + * arch_atomic64_add_negative_relaxed - Add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v and returns true if the result is negative, + * or false when the result is greater than or equal to zero. + */ +static __always_inline bool +arch_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) +{ + return arch_atomic64_add_return_relaxed(i, v) < 0; +} +#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed +#endif + +#else /* arch_atomic64_add_negative_relaxed */ + +#ifndef arch_atomic64_add_negative_acquire +static __always_inline bool +arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v) +{ + bool ret = arch_atomic64_add_negative_relaxed(i, v); + __atomic_acquire_fence(); + return ret; +} +#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire +#endif + +#ifndef arch_atomic64_add_negative_release +static __always_inline bool +arch_atomic64_add_negative_release(s64 i, atomic64_t *v) +{ + __atomic_release_fence(); + return arch_atomic64_add_negative_relaxed(i, v); +} +#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release +#endif + +#ifndef arch_atomic64_add_negative +static __always_inline bool +arch_atomic64_add_negative(s64 i, atomic64_t *v) +{ + bool ret; + __atomic_pre_full_fence(); + ret = arch_atomic64_add_negative_relaxed(i, v); + __atomic_post_full_fence(); + return ret; +} +#define arch_atomic64_add_negative arch_atomic64_add_negative +#endif + +#endif /* arch_atomic64_add_negative_relaxed */ + #ifndef arch_atomic64_fetch_add_unless /** * arch_atomic64_fetch_add_unless - add unless the number is already a given value @@ -2456,4 +2646,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// b5e87bdd5ede61470c29f7a7e4de781af3770f09 +// 00071fffa021cec66f6290d706d69c91df87bade diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index 7a139ec030b0..0496816738ca 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -592,6 +592,28 @@ atomic_add_negative(int i, atomic_t *v) return arch_atomic_add_negative(i, v); } +static __always_inline bool +atomic_add_negative_acquire(int i, atomic_t *v) +{ + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic_add_negative_acquire(i, v); +} + +static __always_inline bool +atomic_add_negative_release(int i, atomic_t *v) +{ + kcsan_release(); + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic_add_negative_release(i, v); +} + +static __always_inline bool +atomic_add_negative_relaxed(int i, atomic_t *v) +{ + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic_add_negative_relaxed(i, v); +} + static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { @@ -1211,6 +1233,28 @@ atomic64_add_negative(s64 i, atomic64_t *v) return arch_atomic64_add_negative(i, v); } +static __always_inline bool +atomic64_add_negative_acquire(s64 i, atomic64_t *v) +{ + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic64_add_negative_acquire(i, v); +} + +static __always_inline bool +atomic64_add_negative_release(s64 i, atomic64_t *v) +{ + kcsan_release(); + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic64_add_negative_release(i, v); +} + +static __always_inline bool +atomic64_add_negative_relaxed(s64 i, atomic64_t *v) +{ + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic64_add_negative_relaxed(i, v); +} + static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { @@ -1830,6 +1874,28 @@ atomic_long_add_negative(long i, atomic_long_t *v) return arch_atomic_long_add_negative(i, v); } +static __always_inline bool +atomic_long_add_negative_acquire(long i, atomic_long_t *v) +{ + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic_long_add_negative_acquire(i, v); +} + +static __always_inline bool +atomic_long_add_negative_release(long i, atomic_long_t *v) +{ + kcsan_release(); + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic_long_add_negative_release(i, v); +} + +static __always_inline bool +atomic_long_add_negative_relaxed(long i, atomic_long_t *v) +{ + instrument_atomic_read_write(v, sizeof(*v)); + return arch_atomic_long_add_negative_relaxed(i, v); +} + static __always_inline long atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) { @@ -2083,4 +2149,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) }) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 764f741eb77a7ad565dc8d99ce2837d5542e8aee +// 1b485de9cbaa4900de59e14ee2084357eaeb1c3a diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h index 800b8c35992d..2fc51ba66beb 100644 --- a/include/linux/atomic/atomic-long.h +++ b/include/linux/atomic/atomic-long.h @@ -479,6 +479,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v) return arch_atomic64_add_negative(i, v); } +static __always_inline bool +arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v) +{ + return arch_atomic64_add_negative_acquire(i, v); +} + +static __always_inline bool +arch_atomic_long_add_negative_release(long i, atomic_long_t *v) +{ + return arch_atomic64_add_negative_release(i, v); +} + +static __always_inline bool +arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) +{ + return arch_atomic64_add_negative_relaxed(i, v); +} + static __always_inline long arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) { @@ -973,6 +991,24 @@ arch_atomic_long_add_negative(long i, atomic_long_t *v) return arch_atomic_add_negative(i, v); } +static __always_inline bool +arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v) +{ + return arch_atomic_add_negative_acquire(i, v); +} + +static __always_inline bool +arch_atomic_long_add_negative_release(long i, atomic_long_t *v) +{ + return arch_atomic_add_negative_release(i, v); +} + +static __always_inline bool +arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) +{ + return arch_atomic_add_negative_relaxed(i, v); +} + static __always_inline long arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) { @@ -1011,4 +1047,4 @@ arch_atomic_long_dec_if_positive(atomic_long_t *v) #endif /* CONFIG_64BIT */ #endif /* _LINUX_ATOMIC_LONG_H */ -// e8f0e08ff072b74d180eabe2ad001282b38c2c88 +// a194c07d7d2f4b0e178d3c118c919775d5d65f50 diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 520b238abd5a..e53ceee1df37 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -96,11 +96,11 @@ struct bpf_map_ops { /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); - int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); - int (*map_delete_elem)(struct bpf_map *map, void *key); - int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); - int (*map_pop_elem)(struct bpf_map *map, void *value); - int (*map_peek_elem)(struct bpf_map *map, void *value); + long (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); + long (*map_delete_elem)(struct bpf_map *map, void *key); + long (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + long (*map_pop_elem)(struct bpf_map *map, void *value); + long (*map_peek_elem)(struct bpf_map *map, void *value); void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ @@ -139,7 +139,7 @@ struct bpf_map_ops { struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* Misc helpers.*/ - int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); + long (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure @@ -157,10 +157,12 @@ struct bpf_map_ops { int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); - int (*map_for_each_callback)(struct bpf_map *map, + long (*map_for_each_callback)(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags); + u64 (*map_mem_usage)(const struct bpf_map *map); + /* BTF id of struct allocated by map_alloc */ int *map_btf_id; @@ -185,11 +187,17 @@ enum btf_field_type { BPF_RB_NODE = (1 << 7), BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | BPF_RB_NODE | BPF_RB_ROOT, + BPF_REFCOUNT = (1 << 8), }; +typedef void (*btf_dtor_kfunc_t)(void *); + struct btf_field_kptr { struct btf *btf; struct module *module; + /* dtor used if btf_is_kernel(btf), otherwise the type is + * program-allocated, dtor is NULL, and __bpf_obj_drop_impl is used + */ btf_dtor_kfunc_t dtor; u32 btf_id; }; @@ -203,6 +211,7 @@ struct btf_field_graph_root { struct btf_field { u32 offset; + u32 size; enum btf_field_type type; union { struct btf_field_kptr kptr; @@ -215,15 +224,10 @@ struct btf_record { u32 field_mask; int spin_lock_off; int timer_off; + int refcount_off; struct btf_field fields[]; }; -struct btf_field_offs { - u32 cnt; - u32 field_off[BTF_FIELDS_MAX]; - u8 field_sz[BTF_FIELDS_MAX]; -}; - struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -250,7 +254,6 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - struct btf_field_offs *field_offs; /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ @@ -292,6 +295,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_rb_root"; case BPF_RB_NODE: return "bpf_rb_node"; + case BPF_REFCOUNT: + return "bpf_refcount"; default: WARN_ON_ONCE(1); return "unknown"; @@ -316,6 +321,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_rb_root); case BPF_RB_NODE: return sizeof(struct bpf_rb_node); + case BPF_REFCOUNT: + return sizeof(struct bpf_refcount); default: WARN_ON_ONCE(1); return 0; @@ -340,12 +347,42 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_rb_root); case BPF_RB_NODE: return __alignof__(struct bpf_rb_node); + case BPF_REFCOUNT: + return __alignof__(struct bpf_refcount); default: WARN_ON_ONCE(1); return 0; } } +static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) +{ + memset(addr, 0, field->size); + + switch (field->type) { + case BPF_REFCOUNT: + refcount_set((refcount_t *)addr, 1); + break; + case BPF_RB_NODE: + RB_CLEAR_NODE((struct rb_node *)addr); + break; + case BPF_LIST_HEAD: + case BPF_LIST_NODE: + INIT_LIST_HEAD((struct list_head *)addr); + break; + case BPF_RB_ROOT: + /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ + case BPF_SPIN_LOCK: + case BPF_TIMER: + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + break; + default: + WARN_ON_ONCE(1); + return; + } +} + static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type) { if (IS_ERR_OR_NULL(rec)) @@ -353,14 +390,14 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f return rec->field_mask & type; } -static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) +static inline void bpf_obj_init(const struct btf_record *rec, void *obj) { int i; - if (!foffs) + if (IS_ERR_OR_NULL(rec)) return; - for (i = 0; i < foffs->cnt; i++) - memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]); + for (i = 0; i < rec->cnt; i++) + bpf_obj_init_field(&rec->fields[i], obj + rec->fields[i].offset); } /* 'dst' must be a temporary buffer and should not point to memory that is being @@ -372,7 +409,7 @@ static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) */ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { - bpf_obj_init(map->field_offs, dst); + bpf_obj_init(map->record, dst); } /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and @@ -392,14 +429,14 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ -static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, +static inline void bpf_obj_memcpy(struct btf_record *rec, void *dst, void *src, u32 size, bool long_memcpy) { u32 curr_off = 0; int i; - if (likely(!foffs)) { + if (IS_ERR_OR_NULL(rec)) { if (long_memcpy) bpf_long_memcpy(dst, src, round_up(size, 8)); else @@ -407,49 +444,49 @@ static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, return; } - for (i = 0; i < foffs->cnt; i++) { - u32 next_off = foffs->field_off[i]; + for (i = 0; i < rec->cnt; i++) { + u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memcpy(dst + curr_off, src + curr_off, sz); - curr_off += foffs->field_sz[i] + sz; + curr_off += rec->fields[i].size + sz; } memcpy(dst + curr_off, src + curr_off, size - curr_off); } static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false); + bpf_obj_memcpy(map->record, dst, src, map->value_size, false); } static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) { - bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true); + bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } -static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size) +static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; int i; - if (likely(!foffs)) { + if (IS_ERR_OR_NULL(rec)) { memset(dst, 0, size); return; } - for (i = 0; i < foffs->cnt; i++) { - u32 next_off = foffs->field_off[i]; + for (i = 0; i < rec->cnt; i++) { + u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memset(dst + curr_off, 0, sz); - curr_off += foffs->field_sz[i] + sz; + curr_off += rec->fields[i].size + sz; } memset(dst + curr_off, 0, size - curr_off); } static inline void zero_map_value(struct bpf_map *map, void *dst) { - bpf_obj_memzero(map->field_offs, dst, map->value_size); + bpf_obj_memzero(map->record, dst, map->value_size); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, @@ -607,11 +644,18 @@ enum bpf_type_flag { */ NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to sk_buff */ + DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to xdp_buff */ + DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; -#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ + | DYNPTR_TYPE_XDP) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -879,8 +923,7 @@ struct bpf_verifier_ops { struct bpf_prog *prog, u32 *target_size); int (*btf_struct_access)(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + int off, int size); }; struct bpf_prog_offload_ops { @@ -1089,6 +1132,7 @@ struct bpf_trampoline { struct bpf_attach_target_info { struct btf_func_model fmodel; long tgt_addr; + struct module *tgt_mod; const char *tgt_name; const struct btf_type *tgt_type; }; @@ -1124,6 +1168,37 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( return bpf_func(ctx, insnsi); } +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, + /* Underlying data is a sk_buff */ + BPF_DYNPTR_TYPE_SKB, + /* Underlying data is a xdp_buff */ + BPF_DYNPTR_TYPE_XDP, +}; + +int bpf_dynptr_check_size(u32 size); +u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr); + #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); @@ -1361,6 +1436,7 @@ struct bpf_prog_aux { * main prog always has linfo_idx == 0 */ u32 linfo_idx; + struct module *mod; u32 num_exentries; struct exception_table_entry *extable; union { @@ -1429,6 +1505,8 @@ struct bpf_link_ops { void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); int (*fill_link_info)(const struct bpf_link *link, struct bpf_link_info *info); + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *old_map); }; struct bpf_tramp_link { @@ -1471,6 +1549,8 @@ struct bpf_struct_ops { void *kdata, const void *udata); int (*reg)(void *kdata); void (*unreg)(void *kdata); + int (*update)(void *kdata, void *old_kdata); + int (*validate)(void *kdata); const struct btf_type *type; const struct btf_type *value_type; const char *name; @@ -1505,6 +1585,7 @@ static inline void bpf_module_put(const void *data, struct module *owner) else module_put(owner); } +int bpf_struct_ops_link_create(union bpf_attr *attr); #ifdef CONFIG_NET /* Define it here to avoid the use of forward declaration */ @@ -1545,6 +1626,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +static inline int bpf_struct_ops_link_create(union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + #endif #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) @@ -1577,8 +1663,12 @@ struct bpf_array { #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 -/* Maximum number of loops for bpf_loop */ -#define BPF_MAX_LOOPS BIT(23) +/* Maximum number of loops for bpf_loop and bpf_iter_num. + * It's enum to expose it (and thus make it discoverable) through BTF. + */ +enum { + BPF_MAX_LOOPS = 8 * 1024 * 1024, +}; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ @@ -1881,7 +1971,7 @@ void bpf_prog_free_id(struct bpf_prog *prog); void bpf_map_free_id(struct bpf_map *map); struct btf_field *btf_record_find(const struct btf_record *rec, - u32 offset, enum btf_field_type type); + u32 offset, u32 field_mask); void btf_record_free(struct btf_record *rec); void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); @@ -1894,6 +1984,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); @@ -2114,7 +2205,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size); #ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); @@ -2173,6 +2264,9 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_nf(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -2202,7 +2296,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id, @@ -2234,6 +2328,9 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, + u16 btf_fd_idx, u8 **func_addr); + struct bpf_core_ctx { struct bpf_verifier_log *log; const struct btf *btf; @@ -2241,7 +2338,7 @@ struct bpf_core_ctx { bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off); + const char *field_name, u32 btf_id, const char *suffix); bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, const struct btf *reg_btf, u32 reg_id, @@ -2266,6 +2363,11 @@ static inline bool has_current_bpf_ctx(void) } void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog); + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2451,7 +2553,8 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) static inline int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + u32 *next_btf_id, enum bpf_type_flag *flag, + const char **field_name) { return -EACCES; } @@ -2478,6 +2581,13 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, return NULL; } +static inline int +bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, + u16 btf_fd_idx, u8 **func_addr) +{ + return -ENOTSUPP; +} + static inline bool unprivileged_ebpf_enabled(void) { return false; @@ -2495,6 +2605,19 @@ static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) static inline void bpf_cgrp_storage_free(struct cgroup *cgroup) { } + +static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size) +{ +} + +static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) +{ +} + +static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2566,6 +2689,7 @@ static inline bool bpf_map_is_offloaded(struct bpf_map *map) struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); void bpf_map_offload_map_free(struct bpf_map *map); +u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map); int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -2637,6 +2761,11 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) { } +static inline u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map) +{ + return 0; +} + static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) @@ -2801,6 +2930,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); +int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, + struct bpf_dynptr_kern *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -2822,6 +2953,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } +static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, + struct bpf_dynptr_kern *ptr) +{ + return -EOPNOTSUPP; +} #endif #ifdef CONFIG_INET @@ -2913,36 +3049,6 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data); void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); -/* the implementation of the opaque uapi struct bpf_dynptr */ -struct bpf_dynptr_kern { - void *data; - /* Size represents the number of usable bytes of dynptr data. - * If for example the offset is at 4 for a local dynptr whose data is - * of type u64, the number of usable bytes is 4. - * - * The upper 8 bits are reserved. It is as follows: - * Bits 0 - 23 = size - * Bits 24 - 30 = dynptr type - * Bit 31 = whether dynptr is read-only - */ - u32 size; - u32 offset; -} __aligned(8); - -enum bpf_dynptr_type { - BPF_DYNPTR_TYPE_INVALID, - /* Points to memory that is local to the bpf program */ - BPF_DYNPTR_TYPE_LOCAL, - /* Underlying data is a kernel-produced ringbuf record */ - BPF_DYNPTR_TYPE_RINGBUF, -}; - -void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, - enum bpf_dynptr_type type, u32 offset, u32 size); -void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); -int bpf_dynptr_check_size(u32 size); -u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr); - #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); void bpf_cgroup_atype_put(int cgroup_atype); diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 6d37a40cd90e..173ec7f43ed1 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/hash.h> #include <linux/types.h> +#include <linux/bpf_mem_alloc.h> #include <uapi/linux/btf.h> #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 @@ -55,6 +56,9 @@ struct bpf_local_storage_map { u32 bucket_log; u16 elem_size; u16 cache_idx; + struct bpf_mem_alloc selem_ma; + struct bpf_mem_alloc storage_ma; + bool bpf_ma; }; struct bpf_local_storage_data { @@ -83,6 +87,7 @@ struct bpf_local_storage_elem { struct bpf_local_storage { struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; + struct bpf_local_storage_map __rcu *smap; struct hlist_head list; /* List of bpf_local_storage_elem */ void *owner; /* The object that owns the above "list" of * bpf_local_storage_elem. @@ -121,14 +126,15 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); struct bpf_map * bpf_local_storage_map_alloc(union bpf_attr *attr, - struct bpf_local_storage_cache *cache); + struct bpf_local_storage_cache *cache, + bool bpf_ma); struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage); +void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, struct bpf_local_storage_cache *cache, @@ -142,17 +148,19 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); +void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now); void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem); - struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, bool charge_mem, gfp_t gfp_flags); +void bpf_selem_free(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + bool reuse_now); + int bpf_local_storage_alloc(void *owner, struct bpf_local_storage_map *smap, @@ -163,6 +171,6 @@ struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, void *value, u64 map_flags, gfp_t gfp_flags); -void bpf_local_storage_free_rcu(struct rcu_head *rcu); +u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map); #endif /* _BPF_LOCAL_STORAGE_H */ diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index 3e164b8efaa9..3929be5743f4 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -14,6 +14,13 @@ struct bpf_mem_alloc { struct work_struct work; }; +/* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects. + * Alloc and free are done with bpf_mem_cache_{alloc,free}(). + * + * 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects. + * Alloc and free are done with bpf_mem_{alloc,free}() and the size of + * the returned object is given by the size argument of bpf_mem_alloc(). + */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); @@ -24,5 +31,7 @@ void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr); /* kmem_cache_alloc/free equivalent: */ void *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma); void bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr); +void bpf_mem_cache_raw_free(void *ptr); +void *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags); #endif /* _BPF_MEM_ALLOC_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index d4ee3ccd3753..fc0d6f32c687 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -79,6 +79,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, #endif BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall, void *, void *) +#ifdef CONFIG_NETFILTER_BPF_LINK +BPF_PROG_TYPE(BPF_PROG_TYPE_NETFILTER, netfilter, + struct bpf_nf_ctx, struct bpf_nf_ctx) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index cf1bb1cf4a7b..3dd29a53b711 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -59,6 +59,14 @@ struct bpf_active_lock { u32 id; }; +#define ITER_PREFIX "bpf_iter_" + +enum bpf_iter_state { + BPF_ITER_STATE_INVALID, /* for non-first slot */ + BPF_ITER_STATE_ACTIVE, + BPF_ITER_STATE_DRAINED, +}; + struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; @@ -103,6 +111,18 @@ struct bpf_reg_state { bool first_slot; } dynptr; + /* For bpf_iter stack slots */ + struct { + /* BTF container and BTF type ID describing + * struct bpf_iter_<type> of an iterator state + */ + struct btf *btf; + u32 btf_id; + /* packing following two fields to fit iter state into 16 bytes */ + enum bpf_iter_state state:2; + int depth:30; + } iter; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -141,6 +161,8 @@ struct bpf_reg_state { * same reference to the socket, to determine proper reference freeing. * For stack slots that are dynptrs, this is used to track references to * the dynptr to determine proper reference freeing. + * Similarly to dynptrs, we use ID to track "belonging" of a reference + * to a specific instance of bpf_iter. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -211,9 +233,11 @@ enum bpf_stack_slot_type { * is stored in bpf_stack_state->spilled_ptr.dynptr.type */ STACK_DYNPTR, + STACK_ITER, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + #define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) #define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) @@ -440,7 +464,12 @@ struct bpf_insn_aux_data { */ struct bpf_loop_inline_state loop_inline_state; }; - u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */ + union { + /* remember the size of type passed to bpf_obj_new to rewrite R1 */ + u64 obj_new_size; + /* remember the offset of node field within type to rewrite */ + u64 insert_off; + }; struct btf_struct_meta *kptr_struct_meta; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ @@ -448,12 +477,17 @@ struct bpf_insn_aux_data { bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ + bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ - bool prune_point; bool jmp_point; + bool prune_point; + /* ensure we check state equivalence and save state checkpoint and + * this instruction, regardless of any heuristics + */ + bool force_checkpoint; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -462,39 +496,36 @@ struct bpf_insn_aux_data { #define BPF_VERIFIER_TMP_LOG_SIZE 1024 struct bpf_verifier_log { - u32 level; - char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; + /* Logical start and end positions of a "log window" of the verifier log. + * start_pos == 0 means we haven't truncated anything. + * Once truncation starts to happen, start_pos + len_total == end_pos, + * except during log reset situations, in which (end_pos - start_pos) + * might get smaller than len_total (see bpf_vlog_reset()). + * Generally, (end_pos - start_pos) gives number of useful data in + * user log buffer. + */ + u64 start_pos; + u64 end_pos; char __user *ubuf; - u32 len_used; + u32 level; u32 len_total; + u32 len_max; + char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; }; -static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) -{ - return log->len_used >= log->len_total - 1; -} - #define BPF_LOG_LEVEL1 1 #define BPF_LOG_LEVEL2 2 #define BPF_LOG_STATS 4 +#define BPF_LOG_FIXED 8 #define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) -#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) +#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS | BPF_LOG_FIXED) #define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */ #define BPF_LOG_MIN_ALIGNMENT 8U #define BPF_LOG_ALIGNMENT 40U static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { - return log && - ((log->level && log->ubuf && !bpf_verifier_log_full(log)) || - log->level == BPF_LOG_KERNEL); -} - -static inline bool -bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) -{ - return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && - log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); + return log && log->level; } #define BPF_MAX_SUBPROGS 256 @@ -537,7 +568,6 @@ struct bpf_verifier_env { bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; - bool rcu_tag_supported; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; @@ -575,7 +605,7 @@ struct bpf_verifier_env { u32 scratched_regs; /* Same as scratched_regs but for stack slots */ u64 scratched_stack_slots; - u32 prev_log_len, prev_insn_print_len; + u64 prev_log_pos, prev_insn_print_pos; /* buffer used in reg_type_str() to generate reg_type string */ char type_str_buf[TYPE_STR_BUF_LEN]; }; @@ -586,6 +616,10 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, const char *fmt, ...); +int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, + char __user *log_buf, u32 log_size); +void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos); +int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual); static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { @@ -616,9 +650,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, enum bpf_arg_type arg_type); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); -struct bpf_call_arg_meta; -int process_dynptr_func(struct bpf_verifier_env *env, int regno, - enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta); /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, diff --git a/include/linux/btf.h b/include/linux/btf.h index 49e0fe6d8274..508199e38415 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -18,7 +18,6 @@ #define KF_ACQUIRE (1 << 0) /* kfunc is an acquire function */ #define KF_RELEASE (1 << 1) /* kfunc is a release function */ #define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ -#define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ /* Trusted arguments are those which are guaranteed to be valid when passed to * the kfunc. It is used to enforce that pointers obtained from either acquire * kfuncs, or from the main kernel on a tracepoint or struct_ops callback @@ -70,7 +69,11 @@ #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ -#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */ +#define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */ +/* only one of KF_ITER_{NEW,NEXT,DESTROY} could be specified per kfunc */ +#define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */ +#define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ +#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the @@ -109,7 +112,6 @@ struct btf_id_dtor_kfunc { struct btf_struct_meta { u32 btf_id; struct btf_record *record; - struct btf_field_offs *field_offs; }; struct btf_struct_metas { @@ -117,13 +119,11 @@ struct btf_struct_metas { struct btf_struct_meta types[]; }; -typedef void (*btf_dtor_kfunc_t)(void *); - extern const struct file_operations btf_fops; void btf_get(struct btf *btf); void btf_put(struct btf *btf); -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr); +int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); struct btf *btf_get_by_fd(int fd); int btf_get_info_by_fd(const struct btf *btf, const union bpf_attr *attr, @@ -205,7 +205,6 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t); struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size); int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec); -struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 3a4f7cd882ca..00950cc03bff 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -204,7 +204,7 @@ extern struct btf_id_set8 name; #else -#define BTF_ID_LIST(name) static u32 __maybe_unused name[16]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[64]; #define BTF_ID(prefix, name) #define BTF_ID_FLAGS(prefix, name, ...) #define BTF_ID_UNUSED diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index be8aea04d023..cae324d10965 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -16,14 +16,13 @@ * struct cpu_rmap - CPU affinity reverse-map * @refcount: kref for object * @size: Number of objects to be reverse-mapped - * @used: Number of objects added * @obj: Pointer to array of object pointers * @near: For each CPU, the index and distance to the nearest object, * based on affinity masks */ struct cpu_rmap { struct kref refcount; - u16 size, used; + u16 size; void **obj; struct { u16 index; @@ -61,6 +60,7 @@ static inline struct cpu_rmap *alloc_irq_cpu_rmap(unsigned int size) } extern void free_irq_cpu_rmap(struct cpu_rmap *rmap); +int irq_cpu_rmap_remove(struct cpu_rmap *rmap, int irq); extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq); #endif /* __LINUX_CPU_RMAP_H */ diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 07e547c02fd8..325af611909f 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -305,10 +305,8 @@ struct dccp_sock { struct timer_list dccps_xmit_timer; }; -static inline struct dccp_sock *dccp_sk(const struct sock *sk) -{ - return (struct dccp_sock *)sk; -} +#define dccp_sk(ptr) container_of_const(ptr, struct dccp_sock, \ + dccps_inet_connection.icsk_inet.sk) static inline const char *dccp_role(const struct sock *sk) { diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 2792185dda22..62b61527bcc4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -75,6 +75,8 @@ enum { * @tx_push: The flag of tx push mode * @rx_push: The flag of rx push mode * @cqe_size: Size of TX/RX completion queue event + * @tx_push_buf_len: Size of TX push buffer + * @tx_push_buf_max_len: Maximum allowed size of TX push buffer */ struct kernel_ethtool_ringparam { u32 rx_buf_len; @@ -82,6 +84,8 @@ struct kernel_ethtool_ringparam { u8 tx_push; u8 rx_push; u32 cqe_size; + u32 tx_push_buf_len; + u32 tx_push_buf_max_len; }; /** @@ -90,12 +94,14 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push + * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len */ enum ethtool_supported_ring_param { - ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), - ETHTOOL_RING_USE_CQE_SIZE = BIT(1), - ETHTOOL_RING_USE_TX_PUSH = BIT(2), - ETHTOOL_RING_USE_RX_PUSH = BIT(3), + ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), + ETHTOOL_RING_USE_CQE_SIZE = BIT(1), + ETHTOOL_RING_USE_TX_PUSH = BIT(2), + ETHTOOL_RING_USE_RX_PUSH = BIT(3), + ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -705,6 +711,7 @@ struct ethtool_mm_stats { * @get_dump_data: Get dump data. * @set_dump: Set dump specific flags to the device. * @get_ts_info: Get the time stamping and PTP hardware clock capabilities. + * It may be called with RCU, or rtnl or reference on the device. * Drivers supporting transmit time stamps in software should set this to * ethtool_op_get_ts_info(). * @get_module_info: Get the size and type of the eeprom contained within diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index 17003b385756..fae0dfb9a9c8 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -39,6 +39,7 @@ void ethtool_aggregate_pause_stats(struct net_device *dev, struct ethtool_pause_stats *pause_stats); void ethtool_aggregate_rmon_stats(struct net_device *dev, struct ethtool_rmon_stats *rmon_stats); +bool ethtool_dev_mm_supported(struct net_device *dev); #else static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd) @@ -112,5 +113,10 @@ ethtool_aggregate_rmon_stats(struct net_device *dev, { } +static inline bool ethtool_dev_mm_supported(struct net_device *dev) +{ + return false; +} + #endif /* IS_ENABLED(CONFIG_ETHTOOL_NETLINK) */ #endif /* _LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 1727898f1641..bbce89937fde 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -571,8 +571,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); extern struct mutex nf_conn_btf_access_lock; extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + int off, int size); typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, const struct bpf_insn *insnsi, @@ -921,6 +920,7 @@ void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); +bool bpf_jit_supports_far_kfunc_call(void); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) @@ -1504,9 +1504,9 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, } #endif /* IS_ENABLED(CONFIG_IPV6) */ -static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index, - u64 flags, const u64 flag_mask, - void *lookup_elem(struct bpf_map *map, u32 key)) +static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 index, + u64 flags, const u64 flag_mask, + void *lookup_elem(struct bpf_map *map, u32 key)) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX; @@ -1542,4 +1542,50 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index return XDP_REDIRECT; } +#ifdef CONFIG_NET +int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len); +int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, + u32 len, u64 flags); +int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); +int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); +void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len); +void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, + void *buf, unsigned long len, bool flush); +#else /* CONFIG_NET */ +static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, + void *to, u32 len) +{ + return -EOPNOTSUPP; +} + +static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, + const void *from, u32 len, u64 flags) +{ + return -EOPNOTSUPP; +} + +static inline int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, + void *buf, u32 len) +{ + return -EOPNOTSUPP; +} + +static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, + void *buf, u32 len) +{ + return -EOPNOTSUPP; +} + +static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) +{ + return NULL; +} + +static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, + unsigned long len, bool flush) +{ + return NULL; +} +#endif /* CONFIG_NET */ + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 2463bdd2a382..c4cf296e7eaf 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2022 Intel Corporation + * Copyright (c) 2018 - 2023 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -783,20 +783,6 @@ static inline bool ieee80211_is_any_nullfunc(__le16 fc) } /** - * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU - * @fc: frame control field in little-endian byteorder - */ -static inline bool ieee80211_is_bufferable_mmpdu(__le16 fc) -{ - /* IEEE 802.11-2012, definition of "bufferable management frame"; - * note that this ignores the IBSS special case. */ - return ieee80211_is_mgmt(fc) && - (ieee80211_is_action(fc) || - ieee80211_is_disassoc(fc) || - ieee80211_is_deauth(fc)); -} - -/** * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder */ @@ -3557,11 +3543,6 @@ enum ieee80211_unprotected_wnm_actioncode { WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, }; -/* Public action codes */ -enum ieee80211_public_actioncode { - WLAN_PUBLIC_ACTION_FTM_RESPONSE = 33, -}; - /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, @@ -3653,7 +3634,7 @@ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_NETWORK_CHANNEL_CONTROL = 30, WLAN_PUB_ACTION_WHITE_SPACE_MAP_ANN = 31, WLAN_PUB_ACTION_FTM_REQUEST = 32, - WLAN_PUB_ACTION_FTM = 33, + WLAN_PUB_ACTION_FTM_RESPONSE = 33, WLAN_PUB_ACTION_FILS_DISCOVERY = 34, }; @@ -4138,6 +4119,44 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) } /** + * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU + * @skb: the skb to check, starting with the 802.11 header + */ +static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + __le16 fc = mgmt->frame_control; + + /* + * IEEE 802.11 REVme D2.0 definition of bufferable MMPDU; + * note that this ignores the IBSS special case. + */ + if (!ieee80211_is_mgmt(fc)) + return false; + + if (ieee80211_is_disassoc(fc) || ieee80211_is_deauth(fc)) + return true; + + if (!ieee80211_is_action(fc)) + return false; + + if (skb->len < offsetofend(typeof(*mgmt), u.action.u.ftm.action_code)) + return true; + + /* action frame - additionally check for non-bufferable FTM */ + + if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC && + mgmt->u.action.category != WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) + return true; + + if (mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_REQUEST || + mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_RESPONSE) + return false; + + return true; +} + +/** * _ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame * @hdr: the frame (buffer must include at least the first octet of payload) */ @@ -4383,7 +4402,7 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) return false; if (mgmt->u.action.u.ftm.action_code == - WLAN_PUBLIC_ACTION_FTM_RESPONSE && + WLAN_PUB_ACTION_FTM_RESPONSE && skb->len >= offsetofend(typeof(*mgmt), u.action.u.ftm)) return true; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 1668ac4d7adc..3ff96ae31bf6 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -60,6 +60,7 @@ struct br_ip_list { #define BR_TX_FWD_OFFLOAD BIT(20) #define BR_PORT_LOCKED BIT(21) #define BR_PORT_MAB BIT(22) +#define BR_NEIGH_VLAN_SUPPRESS BIT(23) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 6864b89ef868..0f40f379d75c 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -62,6 +62,14 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) return (struct vlan_ethhdr *)skb_mac_header(skb); } +/* Prefer this version in TX path, instead of + * skb_reset_mac_header() + vlan_eth_hdr() + */ +static inline struct vlan_ethhdr *skb_vlan_eth_hdr(const struct sk_buff *skb) +{ + return (struct vlan_ethhdr *)skb->data; +} + #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ #define VLAN_PRIO_SHIFT 13 #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator / Drop Eligible Indicator */ @@ -351,7 +359,8 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, /* Move the mac header sans proto to the beginning of the new header. */ if (likely(mac_len > ETH_TLEN)) memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN); - skb->mac_header -= VLAN_HLEN; + if (skb_mac_header_was_set(skb)) + skb->mac_header -= VLAN_HLEN; veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN); @@ -528,7 +537,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, */ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { - struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data; + struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb); if (!eth_type_vlan(veth->h_vlan_proto)) return -EINVAL; @@ -677,6 +686,27 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, } /** + * vlan_remove_tag - remove outer VLAN tag from payload + * @skb: skbuff to remove tag from + * @vlan_tci: buffer to store value + * + * Expects the skb to contain a VLAN tag in the payload, and to have skb->data + * pointing at the MAC header. + * + * Returns a new pointer to skb->data, or NULL on failure to pull. + */ +static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) +{ + struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); + + *vlan_tci = ntohs(vhdr->h_vlan_TCI); + + memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); + vlan_set_encap_proto(skb, vhdr); + return __skb_pull(skb, VLAN_HLEN); +} + +/** * skb_vlan_tagged - check if skb is vlan tagged. * @skb: skbuff to query * @@ -712,7 +742,7 @@ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) return false; - veh = (struct vlan_ethhdr *)skb->data; + veh = skb_vlan_eth_hdr(skb); protocol = veh->h_vlan_encapsulated_proto; } diff --git a/include/linux/igmp.h b/include/linux/igmp.h index b19d3284551f..ebf4349a53af 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -122,7 +122,7 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, sockptr_t optval, sockptr_t optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, sockptr_t optval, size_t offset); -extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, +extern int ip_mc_sf_allow(const struct sock *sk, __be32 local, __be32 rmt, int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); extern void ip_mc_destroy_dev(struct in_device *); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 37dfdcfcdd54..839247a4f48e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -336,10 +336,7 @@ static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } -static inline struct raw6_sock *raw6_sk(const struct sock *sk) -{ - return (struct raw6_sock *)sk; -} +#define raw6_sk(ptr) container_of_const(ptr, struct raw6_sock, inet.sk) #define ipv6_only_sock(sk) (sk->sk_ipv6only) #define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ diff --git a/include/linux/leds.h b/include/linux/leds.h index d71201a968b6..aa48e643f655 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -82,7 +82,15 @@ struct led_init_data { bool devname_mandatory; }; +#if IS_ENABLED(CONFIG_NEW_LEDS) enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode); +#else +static inline enum led_default_state +led_init_default_state_get(struct fwnode_handle *fwnode) +{ + return LEDS_DEFSTATE_OFF; +} +#endif struct led_hw_trigger_type { int dummy; @@ -217,9 +225,19 @@ static inline int led_classdev_register(struct device *parent, return led_classdev_register_ext(parent, led_cdev, NULL); } +#if IS_ENABLED(CONFIG_LEDS_CLASS) int devm_led_classdev_register_ext(struct device *parent, struct led_classdev *led_cdev, struct led_init_data *init_data); +#else +static inline int +devm_led_classdev_register_ext(struct device *parent, + struct led_classdev *led_cdev, + struct led_init_data *init_data) +{ + return 0; +} +#endif static inline int devm_led_classdev_register(struct device *parent, struct led_classdev *led_cdev) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1db19a9d26e3..c0af74efd3cb 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -443,6 +443,8 @@ enum { MLX5_OPCODE_UMR = 0x25, + MLX5_OPCODE_FLOW_TBL_ACCESS = 0x2c, + MLX5_OPCODE_ACCESS_ASO = 0x2d, }; @@ -1367,6 +1369,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap) +#define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE(mdev, ft_field_support_2_esw_fdb.cap) + +#define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2_MAX(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, ft_field_support_2_esw_fdb.cap) + #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7e225e41d55b..a4c4f737f9c1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -134,6 +134,7 @@ enum { MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MTMP = 0x900A, MLX5_REG_MCIA = 0x9014, MLX5_REG_MFRL = 0x9028, MLX5_REG_MLCR = 0x902b, @@ -438,6 +439,7 @@ struct mlx5_core_health { struct work_struct report_work; struct devlink_health_reporter *fw_reporter; struct devlink_health_reporter *fw_fatal_reporter; + struct devlink_health_reporter *vnic_reporter; struct delayed_work update_fw_log_ts_work; }; @@ -731,6 +733,7 @@ struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_geneve; struct mlx5_hv_vhca; +struct mlx5_thermal; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) @@ -749,6 +752,7 @@ enum { struct mlx5_profile { u64 mask; u8 log_max_qp; + u8 num_cmd_caches; struct { int size; int limit; @@ -808,6 +812,7 @@ struct mlx5_core_dev { struct mlx5_rsc_dump *rsc_dump; u32 vsc_addr; struct mlx5_hv_vhca *hv_vhca; + struct mlx5_thermal *thermal; }; struct mlx5_db { @@ -1303,4 +1308,10 @@ enum { MLX5_OCTWORD = 16, }; +struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, + irqreturn_t (*handler)(int, void *), + const struct irq_affinity_desc *affdesc, + const char *name); +void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map); + #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 66d76e97a087..b42696d74c9f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -69,7 +69,7 @@ enum { MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20, - MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25, + MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION = 0x25, }; enum { @@ -78,12 +78,15 @@ enum { enum { MLX5_OBJ_TYPE_SW_ICM = 0x0008, + MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT = 0x23, }; enum { MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM), MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11), MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13), + MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT = + (1ULL << MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT), MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39), }; @@ -321,6 +324,10 @@ enum { MLX5_FT_NIC_TX_RDMA_2_NIC_TX = BIT(1), }; +enum { + MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT = 0x1, +}; + struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; @@ -404,10 +411,13 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 metadata_reg_c_0[0x1]; }; +/* Table 2170 - Flow Table Fields Supported 2 Format */ struct mlx5_ifc_flow_table_fields_supported_2_bits { u8 reserved_at_0[0xe]; u8 bth_opcode[0x1]; - u8 reserved_at_f[0x11]; + u8 reserved_at_f[0x1]; + u8 tunnel_header_0_1[0x1]; + u8 reserved_at_11[0xf]; u8 reserved_at_20[0x60]; }; @@ -453,9 +463,11 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 max_ft_level[0x8]; u8 reformat_add_esp_trasport[0x1]; - u8 reserved_at_41[0x2]; + u8 reformat_l2_to_l3_esp_tunnel[0x1]; + u8 reserved_at_42[0x1]; u8 reformat_del_esp_trasport[0x1]; - u8 reserved_at_44[0x2]; + u8 reformat_l3_esp_tunnel_to_l2[0x1]; + u8 reserved_at_45[0x1]; u8 execute_aso[0x1]; u8 reserved_at_47[0x19]; @@ -877,7 +889,12 @@ enum { struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 fdb_to_vport_reg_c_id[0x8]; - u8 reserved_at_8[0xd]; + u8 reserved_at_8[0x5]; + u8 fdb_uplink_hairpin[0x1]; + u8 fdb_multi_path_any_table_limit_regc[0x1]; + u8 reserved_at_f[0x3]; + u8 fdb_multi_path_any_table[0x1]; + u8 reserved_at_13[0x2]; u8 fdb_modify_header_fwd_to_table[0x1]; u8 fdb_ipv4_ttl_modify[0x1]; u8 flow_source[0x1]; @@ -895,7 +912,13 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress; - u8 reserved_at_800[0x1000]; + u8 reserved_at_800[0xC00]; + + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_esw_fdb; + + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_bitmask_support_2_esw_fdb; + + u8 reserved_at_1500[0x300]; u8 sw_steering_fdb_action_drop_icm_address_rx[0x40]; @@ -1913,7 +1936,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_750[0x4]; u8 max_dynamic_vf_msix_table_size[0xc]; - u8 reserved_at_760[0x20]; + u8 reserved_at_760[0x3]; + u8 log_max_num_header_modify_argument[0x5]; + u8 reserved_at_768[0x4]; + u8 log_header_modify_argument_granularity[0x4]; + u8 reserved_at_770[0x3]; + u8 log_header_modify_argument_max_alloc[0x5]; + u8 reserved_at_778[0x8]; + u8 vhca_tunnel_commands[0x40]; u8 match_definer_format_supported[0x40]; }; @@ -6347,6 +6377,18 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_modify_header_arg_bits { + u8 reserved_at_0[0x80]; + + u8 reserved_at_80[0x8]; + u8 access_pd[0x18]; +}; + +struct mlx5_ifc_create_modify_header_arg_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_modify_header_arg_bits arg; +}; + struct mlx5_ifc_create_match_definer_in_bits { struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; @@ -6590,7 +6632,9 @@ enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3, MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5, + MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6, MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8, + MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb, MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf, MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10, @@ -10869,6 +10913,31 @@ struct mlx5_ifc_mrtc_reg_bits { u8 time_l[0x20]; }; +struct mlx5_ifc_mtmp_reg_bits { + u8 reserved_at_0[0x14]; + u8 sensor_index[0xc]; + + u8 reserved_at_20[0x10]; + u8 temperature[0x10]; + + u8 mte[0x1]; + u8 mtr[0x1]; + u8 reserved_at_42[0xe]; + u8 max_temperature[0x10]; + + u8 tee[0x2]; + u8 reserved_at_62[0xe]; + u8 temp_threshold_hi[0x10]; + + u8 reserved_at_80[0x10]; + u8 temp_threshold_lo[0x10]; + + u8 reserved_at_a0[0x20]; + + u8 sensor_name_hi[0x20]; + u8 sensor_name_lo[0x20]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -10931,6 +11000,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mfrl_reg_bits mfrl_reg; struct mlx5_ifc_mtutc_reg_bits mtutc_reg; struct mlx5_ifc_mrtc_reg_bits mrtc_reg; + struct mlx5_ifc_mtmp_reg_bits mtmp_reg; u8 reserved_at_0[0x60e0]; }; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e96ee1e348cb..98b2e1e149f9 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -141,6 +141,12 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; +struct mlx5_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + #define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ @@ -218,4 +224,14 @@ int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5_port_eth_proto *eproto); +bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev); +u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, + bool force_legacy); +u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, + bool force_legacy); +int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); + #endif /* __MLX5_PORT_H__ */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index df55fbb65717..bd53cf4be7bd 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -499,6 +499,16 @@ struct mlx5_stride_block_ctrl_seg { __be16 num_entries; }; +struct mlx5_wqe_flow_update_ctrl_seg { + __be32 flow_idx_update; + __be32 dest_handle; + u8 reserved0[40]; +}; + +struct mlx5_wqe_header_modify_argument_update_seg { + u8 argument_list[64]; +}; + struct mlx5_core_qp { struct mlx5_core_rsc_common common; /* must be first */ void (*event) (struct mlx5_core_qp *, int); diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 0e4ef9c5127a..c653accdc7fd 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -74,10 +74,13 @@ #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 #define SDIO_DEVICE_ID_BROADCOM_43364 0xa9a4 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 -#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43439 0xa9af +#define SDIO_DEVICE_ID_BROADCOM_43439 0xa9af #define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf #define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752 0xaae8 +#define SDIO_VENDOR_ID_CYPRESS 0x04b4 +#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43439 0xbd3d + #define SDIO_VENDOR_ID_MARVELL 0x02df #define SDIO_DEVICE_ID_MARVELL_LIBERTAS 0x9103 #define SDIO_DEVICE_ID_MARVELL_8688_WLAN 0x9104 @@ -112,6 +115,15 @@ #define SDIO_VENDOR_ID_MICROCHIP_WILC 0x0296 #define SDIO_DEVICE_ID_MICROCHIP_WILC1000 0x5347 +#define SDIO_VENDOR_ID_REALTEK 0x024c +#define SDIO_DEVICE_ID_REALTEK_RTW8723BS 0xb723 +#define SDIO_DEVICE_ID_REALTEK_RTW8821BS 0xb821 +#define SDIO_DEVICE_ID_REALTEK_RTW8822BS 0xb822 +#define SDIO_DEVICE_ID_REALTEK_RTW8821CS 0xc821 +#define SDIO_DEVICE_ID_REALTEK_RTW8822CS 0xc822 +#define SDIO_DEVICE_ID_REALTEK_RTW8723DS 0xd723 +#define SDIO_DEVICE_ID_REALTEK_RTW8821DS 0xd821 + #define SDIO_VENDOR_ID_SIANO 0x039a #define SDIO_DEVICE_ID_SIANO_NOVA_B0 0x0201 #define SDIO_DEVICE_ID_SIANO_NICE 0x0202 diff --git a/include/linux/module.h b/include/linux/module.h index e65a71b04490..3730ed99e5f7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -608,14 +608,6 @@ static inline bool within_module(unsigned long addr, const struct module *mod) /* Search for module by name: must be in a RCU-sched critical section. */ struct module *find_module(const char *name); -/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if - symnum out of range. */ -int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, - char *name, char *module_name, int *exported); - -/* Look for this name: can be of form module:name. */ -unsigned long module_kallsyms_lookup_name(const char *name); - extern void __noreturn __module_put_and_kthread_exit(struct module *mod, long code); #define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) @@ -662,17 +654,6 @@ static inline void __module_get(struct module *module) /* Dereference module function descriptor */ void *dereference_module_function_descriptor(struct module *mod, void *ptr); -/* For kallsyms to ask for address resolution. namebuf should be at - * least KSYM_NAME_LEN long: a pointer to namebuf is returned if - * found, otherwise NULL. */ -const char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, const unsigned char **modbuildid, - char *namebuf); -int lookup_module_symbol_name(unsigned long addr, char *symname); -int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); - int register_module_notifier(struct notifier_block *nb); int unregister_module_notifier(struct notifier_block *nb); @@ -763,39 +744,6 @@ static inline void module_put(struct module *module) #define module_name(mod) "kernel" -/* For kallsyms to ask for address resolution. NULL means not found. */ -static inline const char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, - const unsigned char **modbuildid, - char *namebuf) -{ - return NULL; -} - -static inline int lookup_module_symbol_name(unsigned long addr, char *symname) -{ - return -ERANGE; -} - -static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name) -{ - return -ERANGE; -} - -static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, - char *type, char *name, - char *module_name, int *exported) -{ - return -ERANGE; -} - -static inline unsigned long module_kallsyms_lookup_name(const char *name) -{ - return 0; -} - static inline int register_module_notifier(struct notifier_block *nb) { /* no events will happen anyway, so this can always succeed */ @@ -891,7 +839,36 @@ int module_kallsyms_on_each_symbol(const char *modname, int (*fn)(void *, const char *, struct module *, unsigned long), void *data); -#else + +/* For kallsyms to ask for address resolution. namebuf should be at + * least KSYM_NAME_LEN long: a pointer to namebuf is returned if + * found, otherwise NULL. + */ +const char *module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, const unsigned char **modbuildid, + char *namebuf); +int lookup_module_symbol_name(unsigned long addr, char *symname); +int lookup_module_symbol_attrs(unsigned long addr, + unsigned long *size, + unsigned long *offset, + char *modname, + char *name); + +/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if + * symnum out of range. + */ +int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *name, char *module_name, int *exported); + +/* Look for this name: can be of form module:name. */ +unsigned long module_kallsyms_lookup_name(const char *name); + +unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name); + +#else /* CONFIG_MODULES && CONFIG_KALLSYMS */ + static inline int module_kallsyms_on_each_symbol(const char *modname, int (*fn)(void *, const char *, struct module *, unsigned long), @@ -899,6 +876,50 @@ static inline int module_kallsyms_on_each_symbol(const char *modname, { return -EOPNOTSUPP; } + +/* For kallsyms to ask for address resolution. NULL means not found. */ +static inline const char *module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, + const unsigned char **modbuildid, + char *namebuf) +{ + return NULL; +} + +static inline int lookup_module_symbol_name(unsigned long addr, char *symname) +{ + return -ERANGE; +} + +static inline int lookup_module_symbol_attrs(unsigned long addr, + unsigned long *size, + unsigned long *offset, + char *modname, + char *name) +{ + return -ERANGE; +} + +static inline int module_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported) +{ + return -ERANGE; +} + +static inline unsigned long module_kallsyms_lookup_name(const char *name) +{ + return 0; +} + +static inline unsigned long find_kallsyms_symbol_value(struct module *mod, + const char *name) +{ + return 0; +} + #endif /* CONFIG_MODULES && CONFIG_KALLSYMS */ #endif /* _LINUX_MODULE_H */ diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h new file mode 100644 index 000000000000..fd67f3cc0c4b --- /dev/null +++ b/include/linux/net_tstamp.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_NET_TIMESTAMPING_H_ +#define _LINUX_NET_TIMESTAMPING_H_ + +#include <uapi/linux/net_tstamp.h> + +/** + * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config + * + * @flags: see struct hwtstamp_config + * @tx_type: see struct hwtstamp_config + * @rx_filter: see struct hwtstamp_config + * + * Prefer using this structure for in-kernel processing of hardware + * timestamping configuration, over the inextensible struct hwtstamp_config + * exposed to the %SIOCGHWTSTAMP and %SIOCSHWTSTAMP ioctl UAPI. + */ +struct kernel_hwtstamp_config { + int flags; + int tx_type; + int rx_filter; +}; + +static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, + const struct hwtstamp_config *cfg) +{ + kernel_cfg->flags = cfg->flags; + kernel_cfg->tx_type = cfg->tx_type; + kernel_cfg->rx_filter = cfg->rx_filter; +} + +#endif /* _LINUX_NET_TIMESTAMPING_H_ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c35f04f636f1..08fbd4622ccf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include <linux/rbtree.h> #include <net/net_trackers.h> #include <net/net_debug.h> +#include <net/dropreason-core.h> struct netpoll_info; struct device; @@ -359,18 +360,22 @@ struct napi_struct { unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL + /* CPU actively polling if netpoll is configured */ int poll_owner; #endif + /* CPU on which NAPI has been scheduled for processing */ + int list_owner; struct net_device *dev; struct gro_list gro_hash[GRO_HASH_BUCKETS]; struct sk_buff *skb; struct list_head rx_list; /* Pending GRO_NORMAL skbs */ int rx_count; /* length of rx_list */ + unsigned int napi_id; struct hrtimer timer; + struct task_struct *thread; + /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; - unsigned int napi_id; - struct task_struct *thread; }; enum { @@ -508,15 +513,18 @@ static inline bool napi_reschedule(struct napi_struct *napi) return false; } -bool napi_complete_done(struct napi_struct *n, int work_done); /** - * napi_complete - NAPI processing complete - * @n: NAPI context + * napi_complete_done - NAPI processing complete + * @n: NAPI context + * @work_done: number of packets processed * - * Mark NAPI processing as complete. - * Consider using napi_complete_done() instead. + * Mark NAPI processing as complete. Should only be called if poll budget + * has not been completely consumed. + * Prefer over napi_complete(). * Return false if device should avoid rearming interrupts. */ +bool napi_complete_done(struct napi_struct *n, int work_done); + static inline bool napi_complete(struct napi_struct *n) { return napi_complete_done(n, 0); @@ -1308,6 +1316,17 @@ struct netdev_net_notifier { * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * + * int (*ndo_mdb_add)(struct net_device *dev, struct nlattr *tb[], + * u16 nlmsg_flags, struct netlink_ext_ack *extack); + * Adds an MDB entry to dev. + * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], + * struct netlink_ext_ack *extack); + * Deletes the MDB entry from dev. + * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, + * struct netlink_callback *cb); + * Dumps MDB entries from dev. The first argument (marker) in the netlink + * callback is used by core rtnetlink code. + * * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags, struct netlink_ext_ack *extack) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, @@ -1570,6 +1589,16 @@ struct net_device_ops { const unsigned char *addr, u16 vid, u32 portid, u32 seq, struct netlink_ext_ack *extack); + int (*ndo_mdb_add)(struct net_device *dev, + struct nlattr *tb[], + u16 nlmsg_flags, + struct netlink_ext_ack *extack); + int (*ndo_mdb_del)(struct net_device *dev, + struct nlattr *tb[], + struct netlink_ext_ack *extack); + int (*ndo_mdb_dump)(struct net_device *dev, + struct sk_buff *skb, + struct netlink_callback *cb); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, @@ -2463,6 +2492,7 @@ static inline struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, unsigned int index) { + DEBUG_NET_WARN_ON_ONCE(index >= dev->num_tx_queues); return &dev->_tx[index]; } @@ -2958,7 +2988,8 @@ netdev_notifier_info_to_extack(const struct netdev_notifier_info *info) } int call_netdevice_notifiers(unsigned long val, struct net_device *dev); - +int call_netdevice_notifiers_info(unsigned long val, + struct netdev_notifier_info *info); extern rwlock_t dev_base_lock; /* Device list lock */ @@ -3163,6 +3194,10 @@ struct softnet_data { #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; #endif + + bool in_net_rx_action; + bool in_napi_threaded_poll; + #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit __rcu *flow_limit; #endif @@ -3307,6 +3342,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { + /* Must be an atomic op see netif_txq_try_stop() */ set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -3503,7 +3539,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, * netdev_tx_sent_queue will miss the update and cause the queue to * be stopped forever */ - smp_mb(); + smp_mb(); /* NOTE: netdev_txq_completed_mb() assumes this exists */ if (unlikely(dql_avail(&dev_queue->dql) < 0)) return; @@ -3807,13 +3843,8 @@ static inline unsigned int get_netdev_rx_queue_index( int netif_get_num_default_rss_queues(void); -enum skb_free_reason { - SKB_REASON_CONSUMED, - SKB_REASON_DROPPED, -}; - -void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason); -void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason); +void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason); +void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason); /* * It is not allowed to call kfree_skb() or consume_skb() from hardware @@ -3836,22 +3867,22 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason); */ static inline void dev_kfree_skb_irq(struct sk_buff *skb) { - __dev_kfree_skb_irq(skb, SKB_REASON_DROPPED); + dev_kfree_skb_irq_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); } static inline void dev_consume_skb_irq(struct sk_buff *skb) { - __dev_kfree_skb_irq(skb, SKB_REASON_CONSUMED); + dev_kfree_skb_irq_reason(skb, SKB_CONSUMED); } static inline void dev_kfree_skb_any(struct sk_buff *skb) { - __dev_kfree_skb_any(skb, SKB_REASON_DROPPED); + dev_kfree_skb_any_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); } static inline void dev_consume_skb_any(struct sk_buff *skb) { - __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); + dev_kfree_skb_any_reason(skb, SKB_CONSUMED); } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c8e03bcaecaa..0762444e3767 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -80,6 +80,7 @@ typedef unsigned int nf_hookfn(void *priv, enum nf_hook_ops_type { NF_HOOK_OP_UNDEFINED, NF_HOOK_OP_NF_TABLES, + NF_HOOK_OP_BPF, }; struct nf_hook_ops { diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 241e005f290a..e9a9ab34a7cc 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -45,7 +45,6 @@ struct nfnetlink_subsystem { int (*commit)(struct net *net, struct sk_buff *skb); int (*abort)(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action); - void (*cleanup)(struct net *net); bool (*valid_genid)(struct net *net, u32 genid); }; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 48314ade1506..7834c0be2831 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -197,6 +197,8 @@ static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); +int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen); + int ipv6_netfilter_init(void); void ipv6_netfilter_fini(void); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c43ac7690eca..19c0791ed9d5 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -50,7 +50,6 @@ struct netlink_kernel_cfg { struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); - bool (*compare)(struct net *net, struct sock *sk); }; struct sock *__netlink_kernel_create(struct net *net, int unit, @@ -162,9 +161,31 @@ struct netlink_ext_ack { } \ } while (0) +#define NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, pol, fmt, args...) do { \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (!__extack) \ + break; \ + \ + if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \ + "%s" fmt "%s", "", ##args, "") >= \ + NETLINK_MAX_FMTMSG_LEN) \ + net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \ + ##args, "\n"); \ + \ + do_trace_netlink_extack(__extack->_msg_buf); \ + \ + __extack->_msg = __extack->_msg_buf; \ + __extack->bad_attr = (attr); \ + __extack->policy = (pol); \ +} while (0) + #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \ NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg) +#define NL_SET_ERR_MSG_ATTR_FMT(extack, attr, msg, args...) \ + NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, NULL, msg, ##args) + #define NL_SET_ERR_ATTR_MISS(extack, nest, type) do { \ struct netlink_ext_ack *__extack = (extack); \ \ diff --git a/include/linux/pcs/pcs-mtk-lynxi.h b/include/linux/pcs/pcs-mtk-lynxi.h new file mode 100644 index 000000000000..be3b4ab32f4a --- /dev/null +++ b/include/linux/pcs/pcs-mtk-lynxi.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_PCS_MTK_LYNXI_H +#define __LINUX_PCS_MTK_LYNXI_H + +#include <linux/phylink.h> +#include <linux/regmap.h> + +#define MTK_SGMII_FLAG_PN_SWAP BIT(0) +struct phylink_pcs *mtk_pcs_lynxi_create(struct device *dev, + struct regmap *regmap, + u32 ana_rgc3, u32 flags); +void mtk_pcs_lynxi_destroy(struct phylink_pcs *pcs); +#endif diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h new file mode 100644 index 000000000000..98a60ce87b92 --- /dev/null +++ b/include/linux/pds/pds_adminq.h @@ -0,0 +1,647 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#ifndef _PDS_CORE_ADMINQ_H_ +#define _PDS_CORE_ADMINQ_H_ + +#define PDSC_ADMINQ_MAX_POLL_INTERVAL 256 + +enum pds_core_adminq_flags { + PDS_AQ_FLAG_FASTPOLL = BIT(1), /* completion poll at 1ms */ +}; + +/* + * enum pds_core_adminq_opcode - AdminQ command opcodes + * These commands are only processed on AdminQ, not available in devcmd + */ +enum pds_core_adminq_opcode { + PDS_AQ_CMD_NOP = 0, + + /* Client control */ + PDS_AQ_CMD_CLIENT_REG = 6, + PDS_AQ_CMD_CLIENT_UNREG = 7, + PDS_AQ_CMD_CLIENT_CMD = 8, + + /* LIF commands */ + PDS_AQ_CMD_LIF_IDENTIFY = 20, + PDS_AQ_CMD_LIF_INIT = 21, + PDS_AQ_CMD_LIF_RESET = 22, + PDS_AQ_CMD_LIF_GETATTR = 23, + PDS_AQ_CMD_LIF_SETATTR = 24, + PDS_AQ_CMD_LIF_SETPHC = 25, + + PDS_AQ_CMD_RX_MODE_SET = 30, + PDS_AQ_CMD_RX_FILTER_ADD = 31, + PDS_AQ_CMD_RX_FILTER_DEL = 32, + + /* Queue commands */ + PDS_AQ_CMD_Q_IDENTIFY = 39, + PDS_AQ_CMD_Q_INIT = 40, + PDS_AQ_CMD_Q_CONTROL = 41, + + /* SR/IOV commands */ + PDS_AQ_CMD_VF_GETATTR = 60, + PDS_AQ_CMD_VF_SETATTR = 61, +}; + +/* + * enum pds_core_notifyq_opcode - NotifyQ event codes + */ +enum pds_core_notifyq_opcode { + PDS_EVENT_LINK_CHANGE = 1, + PDS_EVENT_RESET = 2, + PDS_EVENT_XCVR = 5, + PDS_EVENT_CLIENT = 6, +}; + +#define PDS_COMP_COLOR_MASK 0x80 + +/** + * struct pds_core_notifyq_event - Generic event reporting structure + * @eid: event number + * @ecode: event code + * + * This is the generic event report struct from which the other + * actual events will be formed. + */ +struct pds_core_notifyq_event { + __le64 eid; + __le16 ecode; +}; + +/** + * struct pds_core_link_change_event - Link change event notification + * @eid: event number + * @ecode: event code = PDS_EVENT_LINK_CHANGE + * @link_status: link up/down, with error bits + * @link_speed: speed of the network link + * + * Sent when the network link state changes between UP and DOWN + */ +struct pds_core_link_change_event { + __le64 eid; + __le16 ecode; + __le16 link_status; + __le32 link_speed; /* units of 1Mbps: e.g. 10000 = 10Gbps */ +}; + +/** + * struct pds_core_reset_event - Reset event notification + * @eid: event number + * @ecode: event code = PDS_EVENT_RESET + * @reset_code: reset type + * @state: 0=pending, 1=complete, 2=error + * + * Sent when the NIC or some subsystem is going to be or + * has been reset. + */ +struct pds_core_reset_event { + __le64 eid; + __le16 ecode; + u8 reset_code; + u8 state; +}; + +/** + * struct pds_core_client_event - Client event notification + * @eid: event number + * @ecode: event code = PDS_EVENT_CLIENT + * @client_id: client to sent event to + * @client_event: wrapped event struct for the client + * + * Sent when an event needs to be passed on to a client + */ +struct pds_core_client_event { + __le64 eid; + __le16 ecode; + __le16 client_id; + u8 client_event[54]; +}; + +/** + * struct pds_core_notifyq_cmd - Placeholder for building qcq + * @data: anonymous field for building the qcq + */ +struct pds_core_notifyq_cmd { + __le32 data; /* Not used but needed for qcq structure */ +}; + +/* + * union pds_core_notifyq_comp - Overlay of notifyq event structures + */ +union pds_core_notifyq_comp { + struct { + __le64 eid; + __le16 ecode; + }; + struct pds_core_notifyq_event event; + struct pds_core_link_change_event link_change; + struct pds_core_reset_event reset; + u8 data[64]; +}; + +#define PDS_DEVNAME_LEN 32 +/** + * struct pds_core_client_reg_cmd - Register a new client with DSC + * @opcode: opcode PDS_AQ_CMD_CLIENT_REG + * @rsvd: word boundary padding + * @devname: text name of client device + * @vif_type: what type of device (enum pds_core_vif_types) + * + * Tell the DSC of the new client, and receive a client_id from DSC. + */ +struct pds_core_client_reg_cmd { + u8 opcode; + u8 rsvd[3]; + char devname[PDS_DEVNAME_LEN]; + u8 vif_type; +}; + +/** + * struct pds_core_client_reg_comp - Client registration completion + * @status: Status of the command (enum pdc_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the descriptor ring for which this is the completion + * @client_id: New id assigned by DSC + * @rsvd1: Word boundary padding + * @color: Color bit + */ +struct pds_core_client_reg_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + __le16 client_id; + u8 rsvd1[9]; + u8 color; +}; + +/** + * struct pds_core_client_unreg_cmd - Unregister a client from DSC + * @opcode: opcode PDS_AQ_CMD_CLIENT_UNREG + * @rsvd: word boundary padding + * @client_id: id of client being removed + * + * Tell the DSC this client is going away and remove its context + * This uses the generic completion. + */ +struct pds_core_client_unreg_cmd { + u8 opcode; + u8 rsvd; + __le16 client_id; +}; + +/** + * struct pds_core_client_request_cmd - Pass along a wrapped client AdminQ cmd + * @opcode: opcode PDS_AQ_CMD_CLIENT_CMD + * @rsvd: word boundary padding + * @client_id: id of client being removed + * @client_cmd: the wrapped client command + * + * Proxy post an adminq command for the client. + * This uses the generic completion. + */ +struct pds_core_client_request_cmd { + u8 opcode; + u8 rsvd; + __le16 client_id; + u8 client_cmd[60]; +}; + +#define PDS_CORE_MAX_FRAGS 16 + +#define PDS_CORE_QCQ_F_INITED BIT(0) +#define PDS_CORE_QCQ_F_SG BIT(1) +#define PDS_CORE_QCQ_F_INTR BIT(2) +#define PDS_CORE_QCQ_F_TX_STATS BIT(3) +#define PDS_CORE_QCQ_F_RX_STATS BIT(4) +#define PDS_CORE_QCQ_F_NOTIFYQ BIT(5) +#define PDS_CORE_QCQ_F_CMB_RINGS BIT(6) +#define PDS_CORE_QCQ_F_CORE BIT(7) + +enum pds_core_lif_type { + PDS_CORE_LIF_TYPE_DEFAULT = 0, +}; + +/** + * union pds_core_lif_config - LIF configuration + * @state: LIF state (enum pds_core_lif_state) + * @rsvd: Word boundary padding + * @name: LIF name + * @rsvd2: Word boundary padding + * @features: LIF features active (enum pds_core_hw_features) + * @queue_count: Queue counts per queue-type + * @words: Full union buffer size + */ +union pds_core_lif_config { + struct { + u8 state; + u8 rsvd[3]; + char name[PDS_CORE_IFNAMSIZ]; + u8 rsvd2[12]; + __le64 features; + __le32 queue_count[PDS_CORE_QTYPE_MAX]; + } __packed; + __le32 words[64]; +}; + +/** + * struct pds_core_lif_status - LIF status register + * @eid: most recent NotifyQ event id + * @rsvd: full struct size + */ +struct pds_core_lif_status { + __le64 eid; + u8 rsvd[56]; +}; + +/** + * struct pds_core_lif_info - LIF info structure + * @config: LIF configuration structure + * @status: LIF status structure + */ +struct pds_core_lif_info { + union pds_core_lif_config config; + struct pds_core_lif_status status; +}; + +/** + * struct pds_core_lif_identity - LIF identity information (type-specific) + * @features: LIF features (see enum pds_core_hw_features) + * @version: Identify structure version + * @hw_index: LIF hardware index + * @rsvd: Word boundary padding + * @max_nb_sessions: Maximum number of sessions supported + * @rsvd2: buffer padding + * @config: LIF config struct with features, q counts + */ +struct pds_core_lif_identity { + __le64 features; + u8 version; + u8 hw_index; + u8 rsvd[2]; + __le32 max_nb_sessions; + u8 rsvd2[120]; + union pds_core_lif_config config; +}; + +/** + * struct pds_core_lif_identify_cmd - Get LIF identity info command + * @opcode: Opcode PDS_AQ_CMD_LIF_IDENTIFY + * @type: LIF type (enum pds_core_lif_type) + * @client_id: Client identifier + * @ver: Version of identify returned by device + * @rsvd: Word boundary padding + * @ident_pa: DMA address to receive identity info + * + * Firmware will copy LIF identity data (struct pds_core_lif_identity) + * into the buffer address given. + */ +struct pds_core_lif_identify_cmd { + u8 opcode; + u8 type; + __le16 client_id; + u8 ver; + u8 rsvd[3]; + __le64 ident_pa; +}; + +/** + * struct pds_core_lif_identify_comp - LIF identify command completion + * @status: Status of the command (enum pds_core_status_code) + * @ver: Version of identify returned by device + * @bytes: Bytes copied into the buffer + * @rsvd: Word boundary padding + * @color: Color bit + */ +struct pds_core_lif_identify_comp { + u8 status; + u8 ver; + __le16 bytes; + u8 rsvd[11]; + u8 color; +}; + +/** + * struct pds_core_lif_init_cmd - LIF init command + * @opcode: Opcode PDS_AQ_CMD_LIF_INIT + * @type: LIF type (enum pds_core_lif_type) + * @client_id: Client identifier + * @rsvd: Word boundary padding + * @info_pa: Destination address for LIF info (struct pds_core_lif_info) + */ +struct pds_core_lif_init_cmd { + u8 opcode; + u8 type; + __le16 client_id; + __le32 rsvd; + __le64 info_pa; +}; + +/** + * struct pds_core_lif_init_comp - LIF init command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @hw_index: Hardware index of the initialized LIF + * @rsvd1: Word boundary padding + * @color: Color bit + */ +struct pds_core_lif_init_comp { + u8 status; + u8 rsvd; + __le16 hw_index; + u8 rsvd1[11]; + u8 color; +}; + +/** + * struct pds_core_lif_reset_cmd - LIF reset command + * Will reset only the specified LIF. + * @opcode: Opcode PDS_AQ_CMD_LIF_RESET + * @rsvd: Word boundary padding + * @client_id: Client identifier + */ +struct pds_core_lif_reset_cmd { + u8 opcode; + u8 rsvd; + __le16 client_id; +}; + +/** + * enum pds_core_lif_attr - List of LIF attributes + * @PDS_CORE_LIF_ATTR_STATE: LIF state attribute + * @PDS_CORE_LIF_ATTR_NAME: LIF name attribute + * @PDS_CORE_LIF_ATTR_FEATURES: LIF features attribute + * @PDS_CORE_LIF_ATTR_STATS_CTRL: LIF statistics control attribute + */ +enum pds_core_lif_attr { + PDS_CORE_LIF_ATTR_STATE = 0, + PDS_CORE_LIF_ATTR_NAME = 1, + PDS_CORE_LIF_ATTR_FEATURES = 4, + PDS_CORE_LIF_ATTR_STATS_CTRL = 6, +}; + +/** + * struct pds_core_lif_setattr_cmd - Set LIF attributes on the NIC + * @opcode: Opcode PDS_AQ_CMD_LIF_SETATTR + * @attr: Attribute type (enum pds_core_lif_attr) + * @client_id: Client identifier + * @state: LIF state (enum pds_core_lif_state) + * @name: The name string, 0 terminated + * @features: Features (enum pds_core_hw_features) + * @stats_ctl: Stats control commands (enum pds_core_stats_ctl_cmd) + * @rsvd: Command Buffer padding + */ +struct pds_core_lif_setattr_cmd { + u8 opcode; + u8 attr; + __le16 client_id; + union { + u8 state; + char name[PDS_CORE_IFNAMSIZ]; + __le64 features; + u8 stats_ctl; + u8 rsvd[60]; + } __packed; +}; + +/** + * struct pds_core_lif_setattr_comp - LIF set attr command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the descriptor ring for which this is the completion + * @features: Features (enum pds_core_hw_features) + * @rsvd2: Word boundary padding + * @color: Color bit + */ +struct pds_core_lif_setattr_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + __le64 features; + u8 rsvd2[11]; + } __packed; + u8 color; +}; + +/** + * struct pds_core_lif_getattr_cmd - Get LIF attributes from the NIC + * @opcode: Opcode PDS_AQ_CMD_LIF_GETATTR + * @attr: Attribute type (enum pds_core_lif_attr) + * @client_id: Client identifier + */ +struct pds_core_lif_getattr_cmd { + u8 opcode; + u8 attr; + __le16 client_id; +}; + +/** + * struct pds_core_lif_getattr_comp - LIF get attr command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the descriptor ring for which this is the completion + * @state: LIF state (enum pds_core_lif_state) + * @name: LIF name string, 0 terminated + * @features: Features (enum pds_core_hw_features) + * @rsvd2: Word boundary padding + * @color: Color bit + */ +struct pds_core_lif_getattr_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + u8 state; + __le64 features; + u8 rsvd2[11]; + } __packed; + u8 color; +}; + +/** + * union pds_core_q_identity - Queue identity information + * @version: Queue type version that can be used with FW + * @supported: Bitfield of queue versions, first bit = ver 0 + * @rsvd: Word boundary padding + * @features: Queue features + * @desc_sz: Descriptor size + * @comp_sz: Completion descriptor size + * @rsvd2: Word boundary padding + */ +struct pds_core_q_identity { + u8 version; + u8 supported; + u8 rsvd[6]; +#define PDS_CORE_QIDENT_F_CQ 0x01 /* queue has completion ring */ + __le64 features; + __le16 desc_sz; + __le16 comp_sz; + u8 rsvd2[6]; +}; + +/** + * struct pds_core_q_identify_cmd - queue identify command + * @opcode: Opcode PDS_AQ_CMD_Q_IDENTIFY + * @type: Logical queue type (enum pds_core_logical_qtype) + * @client_id: Client identifier + * @ver: Highest queue type version that the driver supports + * @rsvd: Word boundary padding + * @ident_pa: DMA address to receive the data (struct pds_core_q_identity) + */ +struct pds_core_q_identify_cmd { + u8 opcode; + u8 type; + __le16 client_id; + u8 ver; + u8 rsvd[3]; + __le64 ident_pa; +}; + +/** + * struct pds_core_q_identify_comp - queue identify command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the descriptor ring for which this is the completion + * @ver: Queue type version that can be used with FW + * @rsvd1: Word boundary padding + * @color: Color bit + */ +struct pds_core_q_identify_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 ver; + u8 rsvd1[10]; + u8 color; +}; + +/** + * struct pds_core_q_init_cmd - Queue init command + * @opcode: Opcode PDS_AQ_CMD_Q_INIT + * @type: Logical queue type + * @client_id: Client identifier + * @ver: Queue type version + * @rsvd: Word boundary padding + * @index: (LIF, qtype) relative admin queue index + * @intr_index: Interrupt control register index, or Event queue index + * @pid: Process ID + * @flags: + * IRQ: Interrupt requested on completion + * ENA: Enable the queue. If ENA=0 the queue is initialized + * but remains disabled, to be later enabled with the + * Queue Enable command. If ENA=1, then queue is + * initialized and then enabled. + * @cos: Class of service for this queue + * @ring_size: Queue ring size, encoded as a log2(size), in + * number of descriptors. The actual ring size is + * (1 << ring_size). For example, to select a ring size + * of 64 descriptors write ring_size = 6. The minimum + * ring_size value is 2 for a ring of 4 descriptors. + * The maximum ring_size value is 12 for a ring of 4k + * descriptors. Values of ring_size <2 and >12 are + * reserved. + * @ring_base: Queue ring base address + * @cq_ring_base: Completion queue ring base address + */ +struct pds_core_q_init_cmd { + u8 opcode; + u8 type; + __le16 client_id; + u8 ver; + u8 rsvd[3]; + __le32 index; + __le16 pid; + __le16 intr_index; + __le16 flags; +#define PDS_CORE_QINIT_F_IRQ 0x01 /* Request interrupt on completion */ +#define PDS_CORE_QINIT_F_ENA 0x02 /* Enable the queue */ + u8 cos; +#define PDS_CORE_QSIZE_MIN_LG2 2 +#define PDS_CORE_QSIZE_MAX_LG2 12 + u8 ring_size; + __le64 ring_base; + __le64 cq_ring_base; +} __packed; + +/** + * struct pds_core_q_init_comp - Queue init command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the descriptor ring for which this is the completion + * @hw_index: Hardware Queue ID + * @hw_type: Hardware Queue type + * @rsvd2: Word boundary padding + * @color: Color + */ +struct pds_core_q_init_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + __le32 hw_index; + u8 hw_type; + u8 rsvd2[6]; + u8 color; +}; + +union pds_core_adminq_cmd { + u8 opcode; + u8 bytes[64]; + + struct pds_core_client_reg_cmd client_reg; + struct pds_core_client_unreg_cmd client_unreg; + struct pds_core_client_request_cmd client_request; + + struct pds_core_lif_identify_cmd lif_ident; + struct pds_core_lif_init_cmd lif_init; + struct pds_core_lif_reset_cmd lif_reset; + struct pds_core_lif_setattr_cmd lif_setattr; + struct pds_core_lif_getattr_cmd lif_getattr; + + struct pds_core_q_identify_cmd q_ident; + struct pds_core_q_init_cmd q_init; +}; + +union pds_core_adminq_comp { + struct { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 rsvd2[11]; + u8 color; + }; + u32 words[4]; + + struct pds_core_client_reg_comp client_reg; + + struct pds_core_lif_identify_comp lif_ident; + struct pds_core_lif_init_comp lif_init; + struct pds_core_lif_setattr_comp lif_setattr; + struct pds_core_lif_getattr_comp lif_getattr; + + struct pds_core_q_identify_comp q_ident; + struct pds_core_q_init_comp q_init; +}; + +#ifndef __CHECKER__ +static_assert(sizeof(union pds_core_adminq_cmd) == 64); +static_assert(sizeof(union pds_core_adminq_comp) == 16); +static_assert(sizeof(union pds_core_notifyq_comp) == 64); +#endif /* __CHECKER__ */ + +/* The color bit is a 'done' bit for the completion descriptors + * where the meaning alternates between '1' and '0' for alternating + * passes through the completion descriptor ring. + */ +static inline bool pdsc_color_match(u8 color, bool done_color) +{ + return (!!(color & PDS_COMP_COLOR_MASK)) == done_color; +} + +struct pdsc; +int pdsc_adminq_post(struct pdsc *pdsc, + union pds_core_adminq_cmd *cmd, + union pds_core_adminq_comp *comp, + bool fast_poll); + +#endif /* _PDS_CORE_ADMINQ_H_ */ diff --git a/include/linux/pds/pds_auxbus.h b/include/linux/pds/pds_auxbus.h new file mode 100644 index 000000000000..214ef12302d0 --- /dev/null +++ b/include/linux/pds/pds_auxbus.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc */ + +#ifndef _PDSC_AUXBUS_H_ +#define _PDSC_AUXBUS_H_ + +#include <linux/auxiliary_bus.h> + +struct pds_auxiliary_dev { + struct auxiliary_device aux_dev; + struct pci_dev *vf_pdev; + u16 client_id; +}; + +int pds_client_adminq_cmd(struct pds_auxiliary_dev *padev, + union pds_core_adminq_cmd *req, + size_t req_len, + union pds_core_adminq_comp *resp, + u64 flags); +#endif /* _PDSC_AUXBUS_H_ */ diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h new file mode 100644 index 000000000000..060331486d50 --- /dev/null +++ b/include/linux/pds/pds_common.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc. */ + +#ifndef _PDS_COMMON_H_ +#define _PDS_COMMON_H_ + +#define PDS_CORE_DRV_NAME "pds_core" + +/* the device's internal addressing uses up to 52 bits */ +#define PDS_CORE_ADDR_LEN 52 +#define PDS_CORE_ADDR_MASK (BIT_ULL(PDS_ADDR_LEN) - 1) +#define PDS_PAGE_SIZE 4096 + +enum pds_core_driver_type { + PDS_DRIVER_LINUX = 1, + PDS_DRIVER_WIN = 2, + PDS_DRIVER_DPDK = 3, + PDS_DRIVER_FREEBSD = 4, + PDS_DRIVER_IPXE = 5, + PDS_DRIVER_ESXI = 6, +}; + +enum pds_core_vif_types { + PDS_DEV_TYPE_CORE = 0, + PDS_DEV_TYPE_VDPA = 1, + PDS_DEV_TYPE_VFIO = 2, + PDS_DEV_TYPE_ETH = 3, + PDS_DEV_TYPE_RDMA = 4, + PDS_DEV_TYPE_LM = 5, + + /* new ones added before this line */ + PDS_DEV_TYPE_MAX = 16 /* don't change - used in struct size */ +}; + +#define PDS_DEV_TYPE_CORE_STR "Core" +#define PDS_DEV_TYPE_VDPA_STR "vDPA" +#define PDS_DEV_TYPE_VFIO_STR "VFio" +#define PDS_DEV_TYPE_ETH_STR "Eth" +#define PDS_DEV_TYPE_RDMA_STR "RDMA" +#define PDS_DEV_TYPE_LM_STR "LM" + +#define PDS_CORE_IFNAMSIZ 16 + +/** + * enum pds_core_logical_qtype - Logical Queue Types + * @PDS_CORE_QTYPE_ADMINQ: Administrative Queue + * @PDS_CORE_QTYPE_NOTIFYQ: Notify Queue + * @PDS_CORE_QTYPE_RXQ: Receive Queue + * @PDS_CORE_QTYPE_TXQ: Transmit Queue + * @PDS_CORE_QTYPE_EQ: Event Queue + * @PDS_CORE_QTYPE_MAX: Max queue type supported + */ +enum pds_core_logical_qtype { + PDS_CORE_QTYPE_ADMINQ = 0, + PDS_CORE_QTYPE_NOTIFYQ = 1, + PDS_CORE_QTYPE_RXQ = 2, + PDS_CORE_QTYPE_TXQ = 3, + PDS_CORE_QTYPE_EQ = 4, + + PDS_CORE_QTYPE_MAX = 16 /* don't change - used in struct size */ +}; + +int pdsc_register_notify(struct notifier_block *nb); +void pdsc_unregister_notify(struct notifier_block *nb); +void *pdsc_get_pf_struct(struct pci_dev *vf_pdev); +int pds_client_register(struct pci_dev *pf_pdev, char *devname); +int pds_client_unregister(struct pci_dev *pf_pdev, u16 client_id); +#endif /* _PDS_COMMON_H_ */ diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h new file mode 100644 index 000000000000..e838a2b90440 --- /dev/null +++ b/include/linux/pds/pds_core_if.h @@ -0,0 +1,571 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc. */ + +#ifndef _PDS_CORE_IF_H_ +#define _PDS_CORE_IF_H_ + +#define PCI_VENDOR_ID_PENSANDO 0x1dd8 +#define PCI_DEVICE_ID_PENSANDO_CORE_PF 0x100c +#define PCI_DEVICE_ID_VIRTIO_NET_TRANS 0x1000 +#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003 +#define PCI_DEVICE_ID_PENSANDO_VDPA_VF 0x100b +#define PDS_CORE_BARS_MAX 4 +#define PDS_CORE_PCI_BAR_DBELL 1 + +/* Bar0 */ +#define PDS_CORE_DEV_INFO_SIGNATURE 0x44455649 /* 'DEVI' */ +#define PDS_CORE_BAR0_SIZE 0x8000 +#define PDS_CORE_BAR0_DEV_INFO_REGS_OFFSET 0x0000 +#define PDS_CORE_BAR0_DEV_CMD_REGS_OFFSET 0x0800 +#define PDS_CORE_BAR0_DEV_CMD_DATA_REGS_OFFSET 0x0c00 +#define PDS_CORE_BAR0_INTR_STATUS_OFFSET 0x1000 +#define PDS_CORE_BAR0_INTR_CTRL_OFFSET 0x2000 +#define PDS_CORE_DEV_CMD_DONE 0x00000001 + +#define PDS_CORE_DEVCMD_TIMEOUT 5 + +#define PDS_CORE_CLIENT_ID 0 +#define PDS_CORE_ASIC_TYPE_CAPRI 0 + +/* + * enum pds_core_cmd_opcode - Device commands + */ +enum pds_core_cmd_opcode { + /* Core init */ + PDS_CORE_CMD_NOP = 0, + PDS_CORE_CMD_IDENTIFY = 1, + PDS_CORE_CMD_RESET = 2, + PDS_CORE_CMD_INIT = 3, + + PDS_CORE_CMD_FW_DOWNLOAD = 4, + PDS_CORE_CMD_FW_CONTROL = 5, + + /* SR/IOV commands */ + PDS_CORE_CMD_VF_GETATTR = 60, + PDS_CORE_CMD_VF_SETATTR = 61, + PDS_CORE_CMD_VF_CTRL = 62, + + /* Add commands before this line */ + PDS_CORE_CMD_MAX, + PDS_CORE_CMD_COUNT +}; + +/* + * enum pds_core_status_code - Device command return codes + */ +enum pds_core_status_code { + PDS_RC_SUCCESS = 0, /* Success */ + PDS_RC_EVERSION = 1, /* Incorrect version for request */ + PDS_RC_EOPCODE = 2, /* Invalid cmd opcode */ + PDS_RC_EIO = 3, /* I/O error */ + PDS_RC_EPERM = 4, /* Permission denied */ + PDS_RC_EQID = 5, /* Bad qid */ + PDS_RC_EQTYPE = 6, /* Bad qtype */ + PDS_RC_ENOENT = 7, /* No such element */ + PDS_RC_EINTR = 8, /* operation interrupted */ + PDS_RC_EAGAIN = 9, /* Try again */ + PDS_RC_ENOMEM = 10, /* Out of memory */ + PDS_RC_EFAULT = 11, /* Bad address */ + PDS_RC_EBUSY = 12, /* Device or resource busy */ + PDS_RC_EEXIST = 13, /* object already exists */ + PDS_RC_EINVAL = 14, /* Invalid argument */ + PDS_RC_ENOSPC = 15, /* No space left or alloc failure */ + PDS_RC_ERANGE = 16, /* Parameter out of range */ + PDS_RC_BAD_ADDR = 17, /* Descriptor contains a bad ptr */ + PDS_RC_DEV_CMD = 18, /* Device cmd attempted on AdminQ */ + PDS_RC_ENOSUPP = 19, /* Operation not supported */ + PDS_RC_ERROR = 29, /* Generic error */ + PDS_RC_ERDMA = 30, /* Generic RDMA error */ + PDS_RC_EVFID = 31, /* VF ID does not exist */ + PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */ + PDS_RC_ECLIENT = 33, /* No such client id */ +}; + +/** + * struct pds_core_drv_identity - Driver identity information + * @drv_type: Driver type (enum pds_core_driver_type) + * @os_dist: OS distribution, numeric format + * @os_dist_str: OS distribution, string format + * @kernel_ver: Kernel version, numeric format + * @kernel_ver_str: Kernel version, string format + * @driver_ver_str: Driver version, string format + */ +struct pds_core_drv_identity { + __le32 drv_type; + __le32 os_dist; + char os_dist_str[128]; + __le32 kernel_ver; + char kernel_ver_str[32]; + char driver_ver_str[32]; +}; + +#define PDS_DEV_TYPE_MAX 16 +/** + * struct pds_core_dev_identity - Device identity information + * @version: Version of device identify + * @type: Identify type (0 for now) + * @state: Device state + * @rsvd: Word boundary padding + * @nlifs: Number of LIFs provisioned + * @nintrs: Number of interrupts provisioned + * @ndbpgs_per_lif: Number of doorbell pages per LIF + * @intr_coal_mult: Interrupt coalescing multiplication factor + * Scale user-supplied interrupt coalescing + * value in usecs to device units using: + * device units = usecs * mult / div + * @intr_coal_div: Interrupt coalescing division factor + * Scale user-supplied interrupt coalescing + * value in usecs to device units using: + * device units = usecs * mult / div + * @vif_types: How many of each VIF device type is supported + */ +struct pds_core_dev_identity { + u8 version; + u8 type; + u8 state; + u8 rsvd; + __le32 nlifs; + __le32 nintrs; + __le32 ndbpgs_per_lif; + __le32 intr_coal_mult; + __le32 intr_coal_div; + __le16 vif_types[PDS_DEV_TYPE_MAX]; +}; + +#define PDS_CORE_IDENTITY_VERSION_1 1 + +/** + * struct pds_core_dev_identify_cmd - Driver/device identify command + * @opcode: Opcode PDS_CORE_CMD_IDENTIFY + * @ver: Highest version of identify supported by driver + * + * Expects to find driver identification info (struct pds_core_drv_identity) + * in cmd_regs->data. Driver should keep the devcmd interface locked + * while preparing the driver info. + */ +struct pds_core_dev_identify_cmd { + u8 opcode; + u8 ver; +}; + +/** + * struct pds_core_dev_identify_comp - Device identify command completion + * @status: Status of the command (enum pds_core_status_code) + * @ver: Version of identify returned by device + * + * Device identification info (struct pds_core_dev_identity) can be found + * in cmd_regs->data. Driver should keep the devcmd interface locked + * while reading the results. + */ +struct pds_core_dev_identify_comp { + u8 status; + u8 ver; +}; + +/** + * struct pds_core_dev_reset_cmd - Device reset command + * @opcode: Opcode PDS_CORE_CMD_RESET + * + * Resets and clears all LIFs, VDevs, and VIFs on the device. + */ +struct pds_core_dev_reset_cmd { + u8 opcode; +}; + +/** + * struct pds_core_dev_reset_comp - Reset command completion + * @status: Status of the command (enum pds_core_status_code) + */ +struct pds_core_dev_reset_comp { + u8 status; +}; + +/* + * struct pds_core_dev_init_data - Pointers and info needed for the Core + * initialization PDS_CORE_CMD_INIT command. The in and out structs are + * overlays on the pds_core_dev_cmd_regs.data space for passing data down + * to the firmware on init, and then returning initialization results. + */ +struct pds_core_dev_init_data_in { + __le64 adminq_q_base; + __le64 adminq_cq_base; + __le64 notifyq_cq_base; + __le32 flags; + __le16 intr_index; + u8 adminq_ring_size; + u8 notifyq_ring_size; +}; + +struct pds_core_dev_init_data_out { + __le32 core_hw_index; + __le32 adminq_hw_index; + __le32 notifyq_hw_index; + u8 adminq_hw_type; + u8 notifyq_hw_type; +}; + +/** + * struct pds_core_dev_init_cmd - Core device initialize + * @opcode: opcode PDS_CORE_CMD_INIT + * + * Initializes the core device and sets up the AdminQ and NotifyQ. + * Expects to find initialization data (struct pds_core_dev_init_data_in) + * in cmd_regs->data. Driver should keep the devcmd interface locked + * while preparing the driver info. + */ +struct pds_core_dev_init_cmd { + u8 opcode; +}; + +/** + * struct pds_core_dev_init_comp - Core init completion + * @status: Status of the command (enum pds_core_status_code) + * + * Initialization result data (struct pds_core_dev_init_data_in) + * is found in cmd_regs->data. + */ +struct pds_core_dev_init_comp { + u8 status; +}; + +/** + * struct pds_core_fw_download_cmd - Firmware download command + * @opcode: opcode + * @rsvd: Word boundary padding + * @addr: DMA address of the firmware buffer + * @offset: offset of the firmware buffer within the full image + * @length: number of valid bytes in the firmware buffer + */ +struct pds_core_fw_download_cmd { + u8 opcode; + u8 rsvd[3]; + __le32 offset; + __le64 addr; + __le32 length; +}; + +/** + * struct pds_core_fw_download_comp - Firmware download completion + * @status: Status of the command (enum pds_core_status_code) + */ +struct pds_core_fw_download_comp { + u8 status; +}; + +/** + * enum pds_core_fw_control_oper - FW control operations + * @PDS_CORE_FW_INSTALL_ASYNC: Install firmware asynchronously + * @PDS_CORE_FW_INSTALL_STATUS: Firmware installation status + * @PDS_CORE_FW_ACTIVATE_ASYNC: Activate firmware asynchronously + * @PDS_CORE_FW_ACTIVATE_STATUS: Firmware activate status + * @PDS_CORE_FW_UPDATE_CLEANUP: Cleanup any firmware update leftovers + * @PDS_CORE_FW_GET_BOOT: Return current active firmware slot + * @PDS_CORE_FW_SET_BOOT: Set active firmware slot for next boot + * @PDS_CORE_FW_GET_LIST: Return list of installed firmware images + */ +enum pds_core_fw_control_oper { + PDS_CORE_FW_INSTALL_ASYNC = 0, + PDS_CORE_FW_INSTALL_STATUS = 1, + PDS_CORE_FW_ACTIVATE_ASYNC = 2, + PDS_CORE_FW_ACTIVATE_STATUS = 3, + PDS_CORE_FW_UPDATE_CLEANUP = 4, + PDS_CORE_FW_GET_BOOT = 5, + PDS_CORE_FW_SET_BOOT = 6, + PDS_CORE_FW_GET_LIST = 7, +}; + +enum pds_core_fw_slot { + PDS_CORE_FW_SLOT_INVALID = 0, + PDS_CORE_FW_SLOT_A = 1, + PDS_CORE_FW_SLOT_B = 2, + PDS_CORE_FW_SLOT_GOLD = 3, +}; + +/** + * struct pds_core_fw_control_cmd - Firmware control command + * @opcode: opcode + * @rsvd: Word boundary padding + * @oper: firmware control operation (enum pds_core_fw_control_oper) + * @slot: slot to operate on (enum pds_core_fw_slot) + */ +struct pds_core_fw_control_cmd { + u8 opcode; + u8 rsvd[3]; + u8 oper; + u8 slot; +}; + +/** + * struct pds_core_fw_control_comp - Firmware control copletion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word alignment space + * @slot: Slot number (enum pds_core_fw_slot) + * @rsvd1: Struct padding + * @color: Color bit + */ +struct pds_core_fw_control_comp { + u8 status; + u8 rsvd[3]; + u8 slot; + u8 rsvd1[10]; + u8 color; +}; + +struct pds_core_fw_name_info { +#define PDS_CORE_FWSLOT_BUFLEN 8 +#define PDS_CORE_FWVERS_BUFLEN 32 + char slotname[PDS_CORE_FWSLOT_BUFLEN]; + char fw_version[PDS_CORE_FWVERS_BUFLEN]; +}; + +struct pds_core_fw_list_info { +#define PDS_CORE_FWVERS_LIST_LEN 16 + u8 num_fw_slots; + struct pds_core_fw_name_info fw_names[PDS_CORE_FWVERS_LIST_LEN]; +} __packed; + +enum pds_core_vf_attr { + PDS_CORE_VF_ATTR_SPOOFCHK = 1, + PDS_CORE_VF_ATTR_TRUST = 2, + PDS_CORE_VF_ATTR_MAC = 3, + PDS_CORE_VF_ATTR_LINKSTATE = 4, + PDS_CORE_VF_ATTR_VLAN = 5, + PDS_CORE_VF_ATTR_RATE = 6, + PDS_CORE_VF_ATTR_STATSADDR = 7, +}; + +/** + * enum pds_core_vf_link_status - Virtual Function link status + * @PDS_CORE_VF_LINK_STATUS_AUTO: Use link state of the uplink + * @PDS_CORE_VF_LINK_STATUS_UP: Link always up + * @PDS_CORE_VF_LINK_STATUS_DOWN: Link always down + */ +enum pds_core_vf_link_status { + PDS_CORE_VF_LINK_STATUS_AUTO = 0, + PDS_CORE_VF_LINK_STATUS_UP = 1, + PDS_CORE_VF_LINK_STATUS_DOWN = 2, +}; + +/** + * struct pds_core_vf_setattr_cmd - Set VF attributes on the NIC + * @opcode: Opcode + * @attr: Attribute type (enum pds_core_vf_attr) + * @vf_index: VF index + * @macaddr: mac address + * @vlanid: vlan ID + * @maxrate: max Tx rate in Mbps + * @spoofchk: enable address spoof checking + * @trust: enable VF trust + * @linkstate: set link up or down + * @stats: stats addr struct + * @stats.pa: set DMA address for VF stats + * @stats.len: length of VF stats space + * @pad: force union to specific size + */ +struct pds_core_vf_setattr_cmd { + u8 opcode; + u8 attr; + __le16 vf_index; + union { + u8 macaddr[6]; + __le16 vlanid; + __le32 maxrate; + u8 spoofchk; + u8 trust; + u8 linkstate; + struct { + __le64 pa; + __le32 len; + } stats; + u8 pad[60]; + } __packed; +}; + +struct pds_core_vf_setattr_comp { + u8 status; + u8 attr; + __le16 vf_index; + __le16 comp_index; + u8 rsvd[9]; + u8 color; +}; + +/** + * struct pds_core_vf_getattr_cmd - Get VF attributes from the NIC + * @opcode: Opcode + * @attr: Attribute type (enum pds_core_vf_attr) + * @vf_index: VF index + */ +struct pds_core_vf_getattr_cmd { + u8 opcode; + u8 attr; + __le16 vf_index; +}; + +struct pds_core_vf_getattr_comp { + u8 status; + u8 attr; + __le16 vf_index; + union { + u8 macaddr[6]; + __le16 vlanid; + __le32 maxrate; + u8 spoofchk; + u8 trust; + u8 linkstate; + __le64 stats_pa; + u8 pad[11]; + } __packed; + u8 color; +}; + +enum pds_core_vf_ctrl_opcode { + PDS_CORE_VF_CTRL_START_ALL = 0, + PDS_CORE_VF_CTRL_START = 1, +}; + +/** + * struct pds_core_vf_ctrl_cmd - VF control command + * @opcode: Opcode for the command + * @ctrl_opcode: VF control operation type + * @vf_index: VF Index. It is unused if op START_ALL is used. + */ + +struct pds_core_vf_ctrl_cmd { + u8 opcode; + u8 ctrl_opcode; + __le16 vf_index; +}; + +/** + * struct pds_core_vf_ctrl_comp - VF_CTRL command completion. + * @status: Status of the command (enum pds_core_status_code) + */ +struct pds_core_vf_ctrl_comp { + u8 status; +}; + +/* + * union pds_core_dev_cmd - Overlay of core device command structures + */ +union pds_core_dev_cmd { + u8 opcode; + u32 words[16]; + + struct pds_core_dev_identify_cmd identify; + struct pds_core_dev_init_cmd init; + struct pds_core_dev_reset_cmd reset; + struct pds_core_fw_download_cmd fw_download; + struct pds_core_fw_control_cmd fw_control; + + struct pds_core_vf_setattr_cmd vf_setattr; + struct pds_core_vf_getattr_cmd vf_getattr; + struct pds_core_vf_ctrl_cmd vf_ctrl; +}; + +/* + * union pds_core_dev_comp - Overlay of core device completion structures + */ +union pds_core_dev_comp { + u8 status; + u8 bytes[16]; + + struct pds_core_dev_identify_comp identify; + struct pds_core_dev_reset_comp reset; + struct pds_core_dev_init_comp init; + struct pds_core_fw_download_comp fw_download; + struct pds_core_fw_control_comp fw_control; + + struct pds_core_vf_setattr_comp vf_setattr; + struct pds_core_vf_getattr_comp vf_getattr; + struct pds_core_vf_ctrl_comp vf_ctrl; +}; + +/** + * struct pds_core_dev_hwstamp_regs - Hardware current timestamp registers + * @tick_low: Low 32 bits of hardware timestamp + * @tick_high: High 32 bits of hardware timestamp + */ +struct pds_core_dev_hwstamp_regs { + u32 tick_low; + u32 tick_high; +}; + +/** + * struct pds_core_dev_info_regs - Device info register format (read-only) + * @signature: Signature value of 0x44455649 ('DEVI') + * @version: Current version of info + * @asic_type: Asic type + * @asic_rev: Asic revision + * @fw_status: Firmware status + * bit 0 - 1 = fw running + * bit 4-7 - 4 bit generation number, changes on fw restart + * @fw_heartbeat: Firmware heartbeat counter + * @serial_num: Serial number + * @fw_version: Firmware version + * @oprom_regs: oprom_regs to store oprom debug enable/disable and bmp + * @rsvd_pad1024: Struct padding + * @hwstamp: Hardware current timestamp registers + * @rsvd_pad2048: Struct padding + */ +struct pds_core_dev_info_regs { +#define PDS_CORE_DEVINFO_FWVERS_BUFLEN 32 +#define PDS_CORE_DEVINFO_SERIAL_BUFLEN 32 + u32 signature; + u8 version; + u8 asic_type; + u8 asic_rev; +#define PDS_CORE_FW_STS_F_STOPPED 0x00 +#define PDS_CORE_FW_STS_F_RUNNING 0x01 +#define PDS_CORE_FW_STS_F_GENERATION 0xF0 + u8 fw_status; + __le32 fw_heartbeat; + char fw_version[PDS_CORE_DEVINFO_FWVERS_BUFLEN]; + char serial_num[PDS_CORE_DEVINFO_SERIAL_BUFLEN]; + u8 oprom_regs[32]; /* reserved */ + u8 rsvd_pad1024[916]; + struct pds_core_dev_hwstamp_regs hwstamp; /* on 1k boundary */ + u8 rsvd_pad2048[1016]; +} __packed; + +/** + * struct pds_core_dev_cmd_regs - Device command register format (read-write) + * @doorbell: Device Cmd Doorbell, write-only + * Write a 1 to signal device to process cmd + * @done: Command completed indicator, poll for completion + * bit 0 == 1 when command is complete + * @cmd: Opcode-specific command bytes + * @comp: Opcode-specific response bytes + * @rsvd: Struct padding + * @data: Opcode-specific side-data + */ +struct pds_core_dev_cmd_regs { + u32 doorbell; + u32 done; + union pds_core_dev_cmd cmd; + union pds_core_dev_comp comp; + u8 rsvd[48]; + u32 data[478]; +} __packed; + +/** + * struct pds_core_dev_regs - Device register format for bar 0 page 0 + * @info: Device info registers + * @devcmd: Device command registers + */ +struct pds_core_dev_regs { + struct pds_core_dev_info_regs info; + struct pds_core_dev_cmd_regs devcmd; +} __packed; + +#ifndef __CHECKER__ +static_assert(sizeof(struct pds_core_drv_identity) <= 1912); +static_assert(sizeof(struct pds_core_dev_identity) <= 1912); +static_assert(sizeof(union pds_core_dev_cmd) == 64); +static_assert(sizeof(union pds_core_dev_comp) == 16); +static_assert(sizeof(struct pds_core_dev_info_regs) == 2048); +static_assert(sizeof(struct pds_core_dev_cmd_regs) == 2048); +static_assert(sizeof(struct pds_core_dev_regs) == 4096); +#endif /* __CHECKER__ */ + +#endif /* _PDS_CORE_IF_H_ */ diff --git a/include/linux/pds/pds_intr.h b/include/linux/pds/pds_intr.h new file mode 100644 index 000000000000..56277c37248c --- /dev/null +++ b/include/linux/pds/pds_intr.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */ +/* Copyright(c) 2023 Advanced Micro Devices, Inc. */ + +#ifndef _PDS_INTR_H_ +#define _PDS_INTR_H_ + +/* + * Interrupt control register + * @coal_init: Coalescing timer initial value, in + * device units. Use @identity->intr_coal_mult + * and @identity->intr_coal_div to convert from + * usecs to device units: + * + * coal_init = coal_usecs * coal_mutl / coal_div + * + * When an interrupt is sent the interrupt + * coalescing timer current value + * (@coalescing_curr) is initialized with this + * value and begins counting down. No more + * interrupts are sent until the coalescing + * timer reaches 0. When @coalescing_init=0 + * interrupt coalescing is effectively disabled + * and every interrupt assert results in an + * interrupt. Reset value: 0 + * @mask: Interrupt mask. When @mask=1 the interrupt + * resource will not send an interrupt. When + * @mask=0 the interrupt resource will send an + * interrupt if an interrupt event is pending + * or on the next interrupt assertion event. + * Reset value: 1 + * @credits: Interrupt credits. This register indicates + * how many interrupt events the hardware has + * sent. When written by software this + * register atomically decrements @int_credits + * by the value written. When @int_credits + * becomes 0 then the "pending interrupt" bit + * in the Interrupt Status register is cleared + * by the hardware and any pending but unsent + * interrupts are cleared. + * !!!IMPORTANT!!! This is a signed register. + * @flags: Interrupt control flags + * @unmask -- When this bit is written with a 1 + * the interrupt resource will set mask=0. + * @coal_timer_reset -- When this + * bit is written with a 1 the + * @coalescing_curr will be reloaded with + * @coalescing_init to reset the coalescing + * timer. + * @mask_on_assert: Automatically mask on assertion. When + * @mask_on_assert=1 the interrupt resource + * will set @mask=1 whenever an interrupt is + * sent. When using interrupts in Legacy + * Interrupt mode the driver must select + * @mask_on_assert=0 for proper interrupt + * operation. + * @coalescing_curr: Coalescing timer current value, in + * microseconds. When this value reaches 0 + * the interrupt resource is again eligible to + * send an interrupt. If an interrupt event + * is already pending when @coalescing_curr + * reaches 0 the pending interrupt will be + * sent, otherwise an interrupt will be sent + * on the next interrupt assertion event. + */ +struct pds_core_intr { + u32 coal_init; + u32 mask; + u16 credits; + u16 flags; +#define PDS_CORE_INTR_F_UNMASK 0x0001 +#define PDS_CORE_INTR_F_TIMER_RESET 0x0002 + u32 mask_on_assert; + u32 coalescing_curr; + u32 rsvd6[3]; +}; + +#ifndef __CHECKER__ +static_assert(sizeof(struct pds_core_intr) == 32); +#endif /* __CHECKER__ */ + +#define PDS_CORE_INTR_CTRL_REGS_MAX 2048 +#define PDS_CORE_INTR_CTRL_COAL_MAX 0x3F +#define PDS_CORE_INTR_INDEX_NOT_ASSIGNED -1 + +struct pds_core_intr_status { + u32 status[2]; +}; + +/** + * enum pds_core_intr_mask_vals - valid values for mask and mask_assert. + * @PDS_CORE_INTR_MASK_CLEAR: unmask interrupt. + * @PDS_CORE_INTR_MASK_SET: mask interrupt. + */ +enum pds_core_intr_mask_vals { + PDS_CORE_INTR_MASK_CLEAR = 0, + PDS_CORE_INTR_MASK_SET = 1, +}; + +/** + * enum pds_core_intr_credits_bits - Bitwise composition of credits values. + * @PDS_CORE_INTR_CRED_COUNT: bit mask of credit count, no shift needed. + * @PDS_CORE_INTR_CRED_COUNT_SIGNED: bit mask of credit count, including sign bit. + * @PDS_CORE_INTR_CRED_UNMASK: unmask the interrupt. + * @PDS_CORE_INTR_CRED_RESET_COALESCE: reset the coalesce timer. + * @PDS_CORE_INTR_CRED_REARM: unmask the and reset the timer. + */ +enum pds_core_intr_credits_bits { + PDS_CORE_INTR_CRED_COUNT = 0x7fffu, + PDS_CORE_INTR_CRED_COUNT_SIGNED = 0xffffu, + PDS_CORE_INTR_CRED_UNMASK = 0x10000u, + PDS_CORE_INTR_CRED_RESET_COALESCE = 0x20000u, + PDS_CORE_INTR_CRED_REARM = (PDS_CORE_INTR_CRED_UNMASK | + PDS_CORE_INTR_CRED_RESET_COALESCE), +}; + +static inline void +pds_core_intr_coal_init(struct pds_core_intr __iomem *intr_ctrl, u32 coal) +{ + iowrite32(coal, &intr_ctrl->coal_init); +} + +static inline void +pds_core_intr_mask(struct pds_core_intr __iomem *intr_ctrl, u32 mask) +{ + iowrite32(mask, &intr_ctrl->mask); +} + +static inline void +pds_core_intr_credits(struct pds_core_intr __iomem *intr_ctrl, + u32 cred, u32 flags) +{ + if (WARN_ON_ONCE(cred > PDS_CORE_INTR_CRED_COUNT)) { + cred = ioread32(&intr_ctrl->credits); + cred &= PDS_CORE_INTR_CRED_COUNT_SIGNED; + } + + iowrite32(cred | flags, &intr_ctrl->credits); +} + +static inline void +pds_core_intr_clean_flags(struct pds_core_intr __iomem *intr_ctrl, u32 flags) +{ + u32 cred; + + cred = ioread32(&intr_ctrl->credits); + cred &= PDS_CORE_INTR_CRED_COUNT_SIGNED; + cred |= flags; + iowrite32(cred, &intr_ctrl->credits); +} + +static inline void +pds_core_intr_clean(struct pds_core_intr __iomem *intr_ctrl) +{ + pds_core_intr_clean_flags(intr_ctrl, PDS_CORE_INTR_CRED_RESET_COALESCE); +} + +static inline void +pds_core_intr_mask_assert(struct pds_core_intr __iomem *intr_ctrl, u32 mask) +{ + iowrite32(mask, &intr_ctrl->mask_on_assert); +} + +#endif /* _PDS_INTR_H_ */ diff --git a/include/linux/phy.h b/include/linux/phy.h index db7c0bd67559..c5a0dc829714 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -14,6 +14,7 @@ #include <linux/compiler.h> #include <linux/spinlock.h> #include <linux/ethtool.h> +#include <linux/leds.h> #include <linux/linkmode.h> #include <linux/netlink.h> #include <linux/mdio.h> @@ -600,6 +601,7 @@ struct macsec_ops; * @phy_num_led_triggers: Number of triggers in @phy_led_triggers * @led_link_trigger: LED trigger for link up/down * @last_triggered: last LED trigger for link speed + * @leds: list of PHY LED structures * @master_slave_set: User requested master/slave configuration * @master_slave_get: Current master/slave advertisement * @master_slave_state: Current master/slave configuration @@ -699,6 +701,7 @@ struct phy_device { struct phy_led_trigger *led_link_trigger; #endif + struct list_head leds; /* * Interrupt number for this PHY @@ -835,6 +838,23 @@ struct phy_plca_status { }; /** + * struct phy_led: An LED driven by the PHY + * + * @list: List of LEDs + * @phydev: PHY this LED is attached to + * @led_cdev: Standard LED class structure + * @index: Number of the LED + */ +struct phy_led { + struct list_head list; + struct phy_device *phydev; + struct led_classdev led_cdev; + u8 index; +}; + +#define to_phy_led(d) container_of(d, struct phy_led, led_cdev) + +/** * struct phy_driver - Driver structure for a particular PHY type * * @mdiodrv: Data common to all MDIO devices @@ -1056,6 +1076,27 @@ struct phy_driver { /** @get_plca_status: Return the current PLCA status info */ int (*get_plca_status)(struct phy_device *dev, struct phy_plca_status *plca_st); + + /** + * @led_brightness_set: Set a PHY LED brightness. Index + * indicates which of the PHYs led should be set. Value + * follows the standard LED class meaning, e.g. LED_OFF, + * LED_HALF, LED_FULL. + */ + int (*led_brightness_set)(struct phy_device *dev, + u8 index, enum led_brightness value); + + /** + * @led_blink_set: Set a PHY LED brightness. Index indicates + * which of the PHYs led should be configured to blink. Delays + * are in milliseconds and if both are zero then a sensible + * default should be chosen. The call should adjust the + * timings in that case and if it can't match the values + * specified exactly. + */ + int (*led_blink_set)(struct phy_device *dev, u8 index, + unsigned long *delay_on, + unsigned long *delay_off); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -1130,16 +1171,15 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum) #define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \ timeout_us, sleep_before_read) \ ({ \ - int __ret = read_poll_timeout(phy_read, val, (cond) || val < 0, \ + int __ret = read_poll_timeout(phy_read, val, val < 0 || (cond), \ sleep_us, timeout_us, sleep_before_read, phydev, regnum); \ - if (val < 0) \ + if (val < 0) \ __ret = val; \ if (__ret) \ phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \ __ret; \ }) - /** * __phy_read - convenience function for reading a given PHY register * @phydev: the phy_device struct diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 637698ed5cb6..71755c66c162 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -93,7 +93,6 @@ static inline bool phylink_autoneg_inband(unsigned int mode) * the medium link mode (@speed and @duplex) and the speed/duplex of the phy * interface mode (@interface) are different. * @link: true if the link is up. - * @an_enabled: true if autonegotiation is enabled/desired. * @an_complete: true if autonegotiation has completed. */ struct phylink_link_state { @@ -105,7 +104,6 @@ struct phylink_link_state { int pause; int rate_matching; unsigned int link:1; - unsigned int an_enabled:1; unsigned int an_complete:1; }; diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h deleted file mode 100644 index 9e75ac8d19be..000000000000 --- a/include/linux/platform_data/nfcmrvl.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2015, Marvell International Ltd. - * - * This software file (the "File") is distributed by Marvell International - * Ltd. under the terms of the GNU General Public License Version 2, June 1991 - * (the "License"). You may use, redistribute and/or modify this File in - * accordance with the terms and conditions of the License, a copy of which - * is available on the worldwide web at - * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. - * - * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE - * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE - * ARE EXPRESSLY DISCLAIMED. The License provides additional details about - * this warranty disclaimer. - */ - -#ifndef _NFCMRVL_PTF_H_ -#define _NFCMRVL_PTF_H_ - -struct nfcmrvl_platform_data { - /* - * Generic - */ - - /* GPIO that is wired to RESET_N signal */ - int reset_n_io; - /* Tell if transport is muxed in HCI one */ - unsigned int hci_muxed; - - /* - * UART specific - */ - - /* Tell if UART needs flow control at init */ - unsigned int flow_control; - /* Tell if firmware supports break control for power management */ - unsigned int break_control; - - - /* - * I2C specific - */ - - unsigned int irq; - unsigned int irq_polarity; -}; - -#endif /* _NFCMRVL_PTF_H_ */ diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index c2e28deef33a..746fd67c3480 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -14,6 +14,7 @@ struct timespec64; struct clocksource; int kvm_arch_ptp_init(void); +void kvm_arch_ptp_exit(void); int kvm_arch_ptp_get_clock(struct timespec64 *ts); int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, struct clocksource **cs); diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h new file mode 100644 index 000000000000..2c8bfd0f1b6b --- /dev/null +++ b/include/linux/rcuref.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _LINUX_RCUREF_H +#define _LINUX_RCUREF_H + +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/limits.h> +#include <linux/lockdep.h> +#include <linux/preempt.h> +#include <linux/rcupdate.h> + +#define RCUREF_ONEREF 0x00000000U +#define RCUREF_MAXREF 0x7FFFFFFFU +#define RCUREF_SATURATED 0xA0000000U +#define RCUREF_RELEASED 0xC0000000U +#define RCUREF_DEAD 0xE0000000U +#define RCUREF_NOREF 0xFFFFFFFFU + +/** + * rcuref_init - Initialize a rcuref reference count with the given reference count + * @ref: Pointer to the reference count + * @cnt: The initial reference count typically '1' + */ +static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) +{ + atomic_set(&ref->refcnt, cnt - 1); +} + +/** + * rcuref_read - Read the number of held reference counts of a rcuref + * @ref: Pointer to the reference count + * + * Return: The number of held references (0 ... N) + */ +static inline unsigned int rcuref_read(rcuref_t *ref) +{ + unsigned int c = atomic_read(&ref->refcnt); + + /* Return 0 if within the DEAD zone. */ + return c >= RCUREF_RELEASED ? 0 : c + 1; +} + +extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); + +/** + * rcuref_get - Acquire one reference on a rcuref reference count + * @ref: Pointer to the reference count + * + * Similar to atomic_inc_not_zero() but saturates at RCUREF_MAXREF. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See documentation in lib/rcuref.c + * + * Return: + * False if the attempt to acquire a reference failed. This happens + * when the last reference has been put already + * + * True if a reference was successfully acquired + */ +static inline __must_check bool rcuref_get(rcuref_t *ref) +{ + /* + * Unconditionally increase the reference count. The saturation and + * dead zones provide enough tolerance for this. + */ + if (likely(!atomic_add_negative_relaxed(1, &ref->refcnt))) + return true; + + /* Handle the cases inside the saturation and dead zones */ + return rcuref_get_slowpath(ref); +} + +extern __must_check bool rcuref_put_slowpath(rcuref_t *ref); + +/* + * Internal helper. Do not invoke directly. + */ +static __always_inline __must_check bool __rcuref_put(rcuref_t *ref) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(), + "suspicious rcuref_put_rcusafe() usage"); + /* + * Unconditionally decrease the reference count. The saturation and + * dead zones provide enough tolerance for this. + */ + if (likely(!atomic_add_negative_release(-1, &ref->refcnt))) + return false; + + /* + * Handle the last reference drop and cases inside the saturation + * and dead zones. + */ + return rcuref_put_slowpath(ref); +} + +/** + * rcuref_put_rcusafe -- Release one reference for a rcuref reference count RCU safe + * @ref: Pointer to the reference count + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Can be invoked from contexts, which guarantee that no grace period can + * happen which would free the object concurrently if the decrement drops + * the last reference and the slowpath races against a concurrent get() and + * put() pair. rcu_read_lock()'ed and atomic contexts qualify. + * + * Return: + * True if this was the last reference with no future references + * possible. This signals the caller that it can safely release the + * object which is protected by the reference counter. + * + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * release the protected object. + */ +static inline __must_check bool rcuref_put_rcusafe(rcuref_t *ref) +{ + return __rcuref_put(ref); +} + +/** + * rcuref_put -- Release one reference for a rcuref reference count + * @ref: Pointer to the reference count + * + * Can be invoked from any context. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Return: + * + * True if this was the last reference with no future references + * possible. This signals the caller that it can safely schedule the + * object, which is protected by the reference counter, for + * deconstruction. + * + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * deconstruct the protected object. + */ +static inline __must_check bool rcuref_put(rcuref_t *ref) +{ + bool released; + + preempt_disable(); + released = __rcuref_put(ref); + preempt_enable(); + return released; +} + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index b6e6378dcbbd..3d6cf306cd55 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -63,16 +63,6 @@ static inline bool lockdep_rtnl_is_held(void) rcu_dereference_check(p, lockdep_rtnl_is_held()) /** - * rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking - * @p: The pointer to read, prior to dereference - * - * Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh() - * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh() - */ -#define rcu_dereference_bh_rtnl(p) \ - rcu_dereference_bh_check(p, lockdep_rtnl_is_held()) - -/** * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * diff --git a/include/linux/sched.h b/include/linux/sched.h index e1e605b1255b..3f5395ae86bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1318,11 +1318,6 @@ struct task_struct { struct tlbflush_unmap_batch tlb_ubc; - union { - refcount_t rcu_users; - struct rcu_head rcu; - }; - /* Cache last used pipe for splice(): */ struct pipe_inode_info *splice_pipe; @@ -1459,6 +1454,8 @@ struct task_struct { unsigned long saved_state_change; # endif #endif + struct rcu_head rcu; + refcount_t rcu_users; int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 358dc08e0831..836a7e200f39 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -222,7 +222,7 @@ struct sctp_datahdr { __be16 stream; __be16 ssn; __u32 ppid; - __u8 payload[]; + /* __u8 payload[]; */ }; struct sctp_data_chunk { @@ -270,7 +270,7 @@ struct sctp_inithdr { __be16 num_outbound_streams; __be16 num_inbound_streams; __be32 initial_tsn; - __u8 params[]; + /* __u8 params[]; */ }; struct sctp_init_chunk { @@ -385,7 +385,7 @@ struct sctp_sackhdr { __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; - union sctp_sack_variable variable[]; + /* union sctp_sack_variable variable[]; */ }; struct sctp_sack_chunk { @@ -443,7 +443,7 @@ struct sctp_shutdown_chunk { struct sctp_errhdr { __be16 cause; __be16 length; - __u8 variable[]; + /* __u8 variable[]; */ }; struct sctp_operr_chunk { @@ -603,7 +603,7 @@ struct sctp_fwdtsn_skip { struct sctp_fwdtsn_hdr { __be32 new_cum_tsn; - struct sctp_fwdtsn_skip skip[]; + /* struct sctp_fwdtsn_skip skip[]; */ }; struct sctp_fwdtsn_chunk { @@ -620,7 +620,7 @@ struct sctp_ifwdtsn_skip { struct sctp_ifwdtsn_hdr { __be32 new_cum_tsn; - struct sctp_ifwdtsn_skip skip[]; + /* struct sctp_ifwdtsn_skip skip[]; */ }; struct sctp_ifwdtsn_chunk { @@ -667,7 +667,7 @@ struct sctp_addip_param { struct sctp_addiphdr { __be32 serial; - __u8 params[]; + /* __u8 params[]; */ }; struct sctp_addip_chunk { @@ -727,7 +727,7 @@ struct sctp_addip_chunk { struct sctp_authhdr { __be16 shkey_id; __be16 hmac_id; - __u8 hmac[]; + /* __u8 hmac[]; */ }; struct sctp_auth_chunk { @@ -742,7 +742,7 @@ struct sctp_infox { struct sctp_reconf_chunk { struct sctp_chunkhdr chunk_hdr; - __u8 params[]; + /* __u8 params[]; */ }; struct sctp_strreset_outreq { diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 5f6bfe4f6d95..f5f97fa25e8a 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -93,6 +93,7 @@ struct serdev_controller_ops { void (*wait_until_sent)(struct serdev_controller *, long); int (*get_tiocm)(struct serdev_controller *); int (*set_tiocm)(struct serdev_controller *, unsigned int, unsigned int); + int (*break_ctl)(struct serdev_controller *ctrl, unsigned int break_state); }; /** @@ -203,6 +204,7 @@ int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_ void serdev_device_wait_until_sent(struct serdev_device *, long); int serdev_device_get_tiocm(struct serdev_device *); int serdev_device_set_tiocm(struct serdev_device *, int, int); +int serdev_device_break_ctl(struct serdev_device *serdev, int break_state); void serdev_device_write_wakeup(struct serdev_device *); int serdev_device_write(struct serdev_device *, const unsigned char *, size_t, long); void serdev_device_write_flush(struct serdev_device *); @@ -250,11 +252,15 @@ static inline int serdev_device_write_buf(struct serdev_device *serdev, static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {} static inline int serdev_device_get_tiocm(struct serdev_device *serdev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int serdev_device_set_tiocm(struct serdev_device *serdev, int set, int clear) { - return -ENOTSUPP; + return -EOPNOTSUPP; +} +static inline int serdev_device_break_ctl(struct serdev_device *serdev, int break_state) +{ + return -EOPNOTSUPP; } static inline int serdev_device_write(struct serdev_device *sdev, const unsigned char *buf, size_t count, unsigned long timeout) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dbcaac8b6966..738776ab8838 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,7 +37,7 @@ #include <linux/netfilter/nf_conntrack_common.h> #endif #include <net/net_debug.h> -#include <net/dropreason.h> +#include <net/dropreason-core.h> /** * DOC: skb checksums @@ -346,18 +346,12 @@ struct sk_buff_head { struct sk_buff; -/* To allow 64K frame to be packed as single skb without frag_list we - * require 64K/PAGE_SIZE pages plus 1 additional page to allow for - * buffers which do not start on a page boundary. - * - * Since GRO uses frags we allocate at least 16 regardless of page - * size. - */ -#if (65536/PAGE_SIZE + 1) < 16 -#define MAX_SKB_FRAGS 16UL -#else -#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) +#ifndef CONFIG_MAX_SKB_FRAGS +# define CONFIG_MAX_SKB_FRAGS 17 #endif + +#define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS + extern int sysctl_max_skb_frags; /* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to @@ -811,7 +805,6 @@ typedef unsigned char *sk_buff_data_t; * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) - * @scm_io_uring: SKB holds io_uring registered files * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required @@ -942,38 +935,44 @@ struct sk_buff { /* public: */ __u8 pkt_type:3; /* see PKT_TYPE_MAX */ __u8 ignore_df:1; - __u8 nf_trace:1; + __u8 dst_pending_confirm:1; __u8 ip_summed:2; __u8 ooo_okay:1; + /* private: */ + __u8 __mono_tc_offset[0]; + /* public: */ + __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ + __u8 tc_skip_classify:1; +#endif + __u8 remcsum_offload:1; + __u8 csum_complete_sw:1; + __u8 csum_level:2; + __u8 inner_protocol_type:1; + __u8 l4_hash:1; __u8 sw_hash:1; +#ifdef CONFIG_WIRELESS __u8 wifi_acked_valid:1; __u8 wifi_acked:1; +#endif __u8 no_fcs:1; /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; - - /* private: */ - __u8 __pkt_vlan_present_offset[0]; - /* public: */ - __u8 remcsum_offload:1; - __u8 csum_complete_sw:1; - __u8 csum_level:2; - __u8 dst_pending_confirm:1; - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ -#ifdef CONFIG_NET_CLS_ACT - __u8 tc_skip_classify:1; - __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ -#endif #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif +#if IS_ENABLED(CONFIG_IP_VS) __u8 ipvs_property:1; - __u8 inner_protocol_type:1; +#endif +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) + __u8 nf_trace:1; +#endif #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; @@ -989,13 +988,16 @@ struct sk_buff { __u8 decrypted:1; #endif __u8 slow_gro:1; +#if IS_ENABLED(CONFIG_IP_SCTP) __u8 csum_not_inet:1; - __u8 scm_io_uring:1; +#endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #endif + u16 alloc_cpu; + union { __wsum csum; struct { @@ -1019,7 +1021,6 @@ struct sk_buff { unsigned int sender_cpu; }; #endif - u16 alloc_cpu; #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif @@ -1075,13 +1076,13 @@ struct sk_buff { * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define TC_AT_INGRESS_MASK (1 << 0) -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) +#define TC_AT_INGRESS_MASK (1 << 6) #else -#define TC_AT_INGRESS_MASK (1 << 7) -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) +#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) +#define TC_AT_INGRESS_MASK (1 << 1) #endif -#define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) +#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) #ifdef __KERNEL__ /* @@ -1196,6 +1197,15 @@ static inline unsigned int skb_napi_id(const struct sk_buff *skb) #endif } +static inline bool skb_wifi_acked_valid(const struct sk_buff *skb) +{ +#ifdef CONFIG_WIRELESS + return skb->wifi_acked_valid; +#else + return 0; +#endif +} + /** * skb_unref - decrement the skb's reference count * @skb: buffer @@ -3243,7 +3253,7 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, void napi_consume_skb(struct sk_buff *skb, int budget); void napi_skb_free_stolen_head(struct sk_buff *skb); -void __kfree_skb_defer(struct sk_buff *skb); +void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason); /** * __dev_alloc_pages - allocate page for network Rx @@ -3395,6 +3405,18 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } +static inline void +napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) +{ + struct page *page = skb_frag_page(frag); + +#ifdef CONFIG_PAGE_POOL + if (recycle && page_pool_return_skb_page(page, napi_safe)) + return; +#endif + put_page(page); +} + /** * __skb_frag_unref - release a reference on a paged fragment. * @frag: the paged fragment @@ -3405,13 +3427,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) */ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) { - struct page *page = skb_frag_page(frag); - -#ifdef CONFIG_PAGE_POOL - if (recycle && page_pool_return_skb_page(page)) - return; -#endif - put_page(page); + napi_frag_unref(frag, recycle, false); } /** @@ -5050,9 +5066,30 @@ static inline void skb_reset_redirect(struct sk_buff *skb) skb->redirected = 0; } +static inline void skb_set_redirected_noclear(struct sk_buff *skb, + bool from_ingress) +{ + skb->redirected = 1; +#ifdef CONFIG_NET_REDIRECT + skb->from_ingress = from_ingress; +#endif +} + static inline bool skb_csum_is_sctp(struct sk_buff *skb) { +#if IS_ENABLED(CONFIG_IP_SCTP) return skb->csum_not_inet; +#else + return 0; +#endif +} + +static inline void skb_reset_csum_not_inet(struct sk_buff *skb) +{ + skb->ip_summed = CHECKSUM_NONE; +#if IS_ENABLED(CONFIG_IP_SCTP) + skb->csum_not_inet = 0; +#endif } static inline void skb_set_kcov_handle(struct sk_buff *skb, @@ -5072,12 +5109,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) #endif } -#ifdef CONFIG_PAGE_POOL static inline void skb_mark_for_recycle(struct sk_buff *skb) { +#ifdef CONFIG_PAGE_POOL skb->pp_recycle = 1; -} #endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h index 1a136271ba6a..e1c88627755a 100644 --- a/include/linux/smscphy.h +++ b/include/linux/smscphy.h @@ -28,4 +28,14 @@ #define MII_LAN83C185_MODE_POWERDOWN 0xC0 /* Power Down mode */ #define MII_LAN83C185_MODE_ALL 0xE0 /* All capable mode */ +int smsc_phy_config_intr(struct phy_device *phydev); +irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev); +int smsc_phy_config_init(struct phy_device *phydev); +int lan87xx_read_status(struct phy_device *phydev); +int smsc_phy_get_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, void *data); +int smsc_phy_set_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, const void *data); +int smsc_phy_probe(struct phy_device *phydev); + #endif /* __LINUX_SMSCPHY_H__ */ diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index fd0b0605cf90..b2b28180dff7 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -6,6 +6,7 @@ #include <linux/regmap.h> #include <linux/pci.h> #include <linux/skbuff.h> +#include <linux/netdevice.h> #define MTK_WED_TX_QUEUES 2 #define MTK_WED_RX_QUEUES 2 @@ -179,6 +180,8 @@ struct mtk_wed_ops { u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask); void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); + int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev, + enum tc_setup_type type, void *type_data); }; extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; @@ -237,6 +240,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev) (_dev)->ops->msg_update(_dev, _id, _msg, _len) #define mtk_wed_device_stop(_dev) (_dev)->ops->stop(_dev) #define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) +#define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \ + (_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data) #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -255,6 +260,7 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV #define mtk_wed_device_stop(_dev) do {} while (0) #define mtk_wed_device_dma_reset(_dev) do {} while (0) +#define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP #endif #endif diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a2414c187483..225751a8fd8e 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -186,6 +186,24 @@ struct stmmac_safety_feature_cfg { u32 tmouten; }; +/* Addresses that may be customized by a platform */ +struct dwmac4_addrs { + u32 dma_chan; + u32 dma_chan_offset; + u32 mtl_chan; + u32 mtl_chan_offset; + u32 mtl_ets_ctrl; + u32 mtl_ets_ctrl_offset; + u32 mtl_txq_weight; + u32 mtl_txq_weight_offset; + u32 mtl_send_slp_cred; + u32 mtl_send_slp_cred_offset; + u32 mtl_high_cred; + u32 mtl_high_cred_offset; + u32 mtl_low_cred; + u32 mtl_low_cred_offset; +}; + struct plat_stmmacenet_data { int bus_id; int phy_addr; @@ -223,6 +241,7 @@ struct plat_stmmacenet_data { struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; void (*fix_mac_speed)(void *priv, unsigned int speed); + int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); void (*speed_mode_2500)(struct net_device *ndev, void *priv); @@ -273,5 +292,6 @@ struct plat_stmmacenet_data { bool use_phy_wol; bool sph_disable; bool serdes_up_after_phy_linkup; + const struct dwmac4_addrs *dwmac4_addrs; }; #endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ca7f05a130d2..b4c08ac86983 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -472,10 +472,12 @@ enum tsq_flags { TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), }; -static inline struct tcp_sock *tcp_sk(const struct sock *sk) -{ - return (struct tcp_sock *)sk; -} +#define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) + +/* Variant of tcp_sk() upgrading a const sock to a read/write tcp socket. + * Used in context of (lockless) tcp listeners. + */ +#define tcp_sk_rw(ptr) container_of(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) struct tcp_timewait_sock { struct inet_timewait_sock tw_sk; diff --git a/include/linux/types.h b/include/linux/types.h index ea8cf60a8a79..688fb943556a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -175,6 +175,12 @@ typedef struct { } atomic64_t; #endif +typedef struct { + atomic_t refcnt; +} rcuref_t; + +#define RCUREF_INIT(i) { .refcnt = ATOMIC_INIT(i - 1) } + struct list_head { struct list_head *next, *prev; }; diff --git a/include/linux/udp.h b/include/linux/udp.h index a2892e151644..43c1fb2d2c21 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -97,10 +97,7 @@ struct udp_sock { #define UDP_MAX_SEGMENTS (1 << 6UL) -static inline struct udp_sock *udp_sk(const struct sock *sk) -{ - return (struct udp_sock *)sk; -} +#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 3f9c16611306..c58453699ee9 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -245,4 +245,5 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); void virtio_transport_deliver_tap_pkt(struct sk_buff *skb); int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list); +int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t read_actor); #endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 24d76500b1cc..01fa15506286 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -64,11 +64,21 @@ struct wwan_port_ops { poll_table *wait); }; +/** struct wwan_port_caps - The WWAN port capbilities + * @frag_len: WWAN port TX fragments length + * @headroom_len: WWAN port TX fragments reserved headroom length + */ +struct wwan_port_caps { + size_t frag_len; + unsigned int headroom_len; +}; + /** * wwan_create_port - Add a new WWAN port * @parent: Device to use as parent and shared by all WWAN ports * @type: WWAN port type * @ops: WWAN port operations + * @caps: WWAN port capabilities * @drvdata: Pointer to caller driver data * * Allocate and register a new WWAN port. The port will be automatically exposed @@ -86,6 +96,7 @@ struct wwan_port_ops { struct wwan_port *wwan_create_port(struct device *parent, enum wwan_port_type type, const struct wwan_port_ops *ops, + struct wwan_port_caps *caps, void *drvdata); /** |