diff options
author | Linus Torvalds | 2020-10-16 15:22:41 -0700 |
---|---|---|
committer | Linus Torvalds | 2020-10-16 15:22:41 -0700 |
commit | fad70111d57e0b728b587eabc6f9f9b5240faa17 (patch) | |
tree | c9397c51ace72383e91c789e85a82f8e3eb4ad3d /include/trace | |
parent | 7a3dadedc82e340f8292f64e7bfa964c525009c0 (diff) | |
parent | 7530d3eb3dcf1a30750e8e7f1f88b782b96b72b8 (diff) |
Merge tag 'afs-fixes-20201016' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull afs updates from David Howells:
"A collection of fixes to fix afs_cell struct refcounting, thereby
fixing a slew of related syzbot bugs:
- Fix the cell tree in the netns to use an rwsem rather than RCU.
There seem to be some problems deriving from the use of RCU and a
seqlock to walk the rbtree, but it's not entirely clear what since
there are several different failures being seen.
Changing things to use an rwsem instead makes it more robust. The
extra performance derived from using RCU isn't necessary in this
case since the only time we're looking up a cell is during mount or
when cells are being manually added.
- Fix the refcounting by splitting the usage counter into a memory
refcount and an active users counter. The usage counter was doing
double duty, keeping track of whether a cell is still in use and
keeping track of when it needs to be destroyed - but this makes the
clean up tricky. Separating these out simplifies the logic.
- Fix purging a cell that has an alias. A cell alias pins the cell
it's an alias of, but the alias is always later in the list. Trying
to purge in a single pass causes rmmod to hang in such a case.
- Fix cell removal. If a cell's manager is requeued whilst it's
removing itself, the manager will run again and re-remove itself,
causing problems in various places. Follow Hillf Danton's
suggestion to insert a more terminal state that causes the manager
to do nothing post-removal.
In additional to the above, two other changes:
- Add a tracepoint for the cell refcount and active users count. This
helped with debugging the above and may be useful again in future.
- Downgrade an assertion to a print when a still-active server is
seen during purging. This was happening as a consequence of
incomplete cell removal before the servers were cleaned up"
* tag 'afs-fixes-20201016' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
afs: Don't assert on unpurgeable server records
afs: Add tracing for cell refcount and active user count
afs: Fix cell removal
afs: Fix cell purging with aliases
afs: Fix cell refcounting by splitting the usage counter
afs: Fix rapid cell addition/removal by not using RCU on cells tree
Diffstat (limited to 'include/trace')
-rw-r--r-- | include/trace/events/afs.h | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 5f0c1cf1ea13..8eb49231c6bb 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -40,6 +40,7 @@ enum afs_server_trace { afs_server_trace_get_new_cbi, afs_server_trace_get_probe, afs_server_trace_give_up_cb, + afs_server_trace_purging, afs_server_trace_put_call, afs_server_trace_put_cbi, afs_server_trace_put_find_rsq, @@ -50,6 +51,7 @@ enum afs_server_trace { afs_server_trace_update, }; + enum afs_volume_trace { afs_volume_trace_alloc, afs_volume_trace_free, @@ -67,6 +69,46 @@ enum afs_volume_trace { afs_volume_trace_remove, }; +enum afs_cell_trace { + afs_cell_trace_alloc, + afs_cell_trace_free, + afs_cell_trace_get_queue_dns, + afs_cell_trace_get_queue_manage, + afs_cell_trace_get_queue_new, + afs_cell_trace_get_vol, + afs_cell_trace_insert, + afs_cell_trace_manage, + afs_cell_trace_put_candidate, + afs_cell_trace_put_destroy, + afs_cell_trace_put_queue_fail, + afs_cell_trace_put_queue_work, + afs_cell_trace_put_vol, + afs_cell_trace_see_source, + afs_cell_trace_see_ws, + afs_cell_trace_unuse_alias, + afs_cell_trace_unuse_check_alias, + afs_cell_trace_unuse_delete, + afs_cell_trace_unuse_fc, + afs_cell_trace_unuse_lookup, + afs_cell_trace_unuse_mntpt, + afs_cell_trace_unuse_no_pin, + afs_cell_trace_unuse_parse, + afs_cell_trace_unuse_pin, + afs_cell_trace_unuse_probe, + afs_cell_trace_unuse_sbi, + afs_cell_trace_unuse_ws, + afs_cell_trace_use_alias, + afs_cell_trace_use_check_alias, + afs_cell_trace_use_fc, + afs_cell_trace_use_fc_alias, + afs_cell_trace_use_lookup, + afs_cell_trace_use_mntpt, + afs_cell_trace_use_pin, + afs_cell_trace_use_probe, + afs_cell_trace_use_sbi, + afs_cell_trace_wait, +}; + enum afs_fs_operation { afs_FS_FetchData = 130, /* AFS Fetch file data */ afs_FS_FetchACL = 131, /* AFS Fetch file ACL */ @@ -270,6 +312,7 @@ enum afs_cb_break_reason { EM(afs_server_trace_get_new_cbi, "GET cbi ") \ EM(afs_server_trace_get_probe, "GET probe") \ EM(afs_server_trace_give_up_cb, "giveup-cb") \ + EM(afs_server_trace_purging, "PURGE ") \ EM(afs_server_trace_put_call, "PUT call ") \ EM(afs_server_trace_put_cbi, "PUT cbi ") \ EM(afs_server_trace_put_find_rsq, "PUT f-rsq") \ @@ -295,6 +338,44 @@ enum afs_cb_break_reason { EM(afs_volume_trace_put_validate_fc, "PUT fc-validat") \ E_(afs_volume_trace_remove, "REMOVE ") +#define afs_cell_traces \ + EM(afs_cell_trace_alloc, "ALLOC ") \ + EM(afs_cell_trace_free, "FREE ") \ + EM(afs_cell_trace_get_queue_dns, "GET q-dns ") \ + EM(afs_cell_trace_get_queue_manage, "GET q-mng ") \ + EM(afs_cell_trace_get_queue_new, "GET q-new ") \ + EM(afs_cell_trace_get_vol, "GET vol ") \ + EM(afs_cell_trace_insert, "INSERT ") \ + EM(afs_cell_trace_manage, "MANAGE ") \ + EM(afs_cell_trace_put_candidate, "PUT candid") \ + EM(afs_cell_trace_put_destroy, "PUT destry") \ + EM(afs_cell_trace_put_queue_work, "PUT q-work") \ + EM(afs_cell_trace_put_queue_fail, "PUT q-fail") \ + EM(afs_cell_trace_put_vol, "PUT vol ") \ + EM(afs_cell_trace_see_source, "SEE source") \ + EM(afs_cell_trace_see_ws, "SEE ws ") \ + EM(afs_cell_trace_unuse_alias, "UNU alias ") \ + EM(afs_cell_trace_unuse_check_alias, "UNU chk-al") \ + EM(afs_cell_trace_unuse_delete, "UNU delete") \ + EM(afs_cell_trace_unuse_fc, "UNU fc ") \ + EM(afs_cell_trace_unuse_lookup, "UNU lookup") \ + EM(afs_cell_trace_unuse_mntpt, "UNU mntpt ") \ + EM(afs_cell_trace_unuse_parse, "UNU parse ") \ + EM(afs_cell_trace_unuse_pin, "UNU pin ") \ + EM(afs_cell_trace_unuse_probe, "UNU probe ") \ + EM(afs_cell_trace_unuse_sbi, "UNU sbi ") \ + EM(afs_cell_trace_unuse_ws, "UNU ws ") \ + EM(afs_cell_trace_use_alias, "USE alias ") \ + EM(afs_cell_trace_use_check_alias, "USE chk-al") \ + EM(afs_cell_trace_use_fc, "USE fc ") \ + EM(afs_cell_trace_use_fc_alias, "USE fc-al ") \ + EM(afs_cell_trace_use_lookup, "USE lookup") \ + EM(afs_cell_trace_use_mntpt, "USE mntpt ") \ + EM(afs_cell_trace_use_pin, "USE pin ") \ + EM(afs_cell_trace_use_probe, "USE probe ") \ + EM(afs_cell_trace_use_sbi, "USE sbi ") \ + E_(afs_cell_trace_wait, "WAIT ") + #define afs_fs_operations \ EM(afs_FS_FetchData, "FS.FetchData") \ EM(afs_FS_FetchStatus, "FS.FetchStatus") \ @@ -483,6 +564,7 @@ enum afs_cb_break_reason { afs_call_traces; afs_server_traces; +afs_cell_traces; afs_fs_operations; afs_vl_operations; afs_edit_dir_ops; @@ -1358,6 +1440,33 @@ TRACE_EVENT(afs_volume, __entry->ref) ); +TRACE_EVENT(afs_cell, + TP_PROTO(unsigned int cell_debug_id, int usage, int active, + enum afs_cell_trace reason), + + TP_ARGS(cell_debug_id, usage, active, reason), + + TP_STRUCT__entry( + __field(unsigned int, cell ) + __field(int, usage ) + __field(int, active ) + __field(int, reason ) + ), + + TP_fast_assign( + __entry->cell = cell_debug_id; + __entry->usage = usage; + __entry->active = active; + __entry->reason = reason; + ), + + TP_printk("L=%08x %s u=%d a=%d", + __entry->cell, + __print_symbolic(__entry->reason, afs_cell_traces), + __entry->usage, + __entry->active) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ |