diff options
Diffstat (limited to 'fs/nfsd/nfscache.c')
-rw-r--r-- | fs/nfsd/nfscache.c | 214 |
1 files changed, 113 insertions, 101 deletions
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index ff9567633245..122f69185ef5 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -27,8 +27,12 @@ */ #define TARGET_BUCKET_SIZE 64 -static struct hlist_head * cache_hash; -static struct list_head lru_head; +struct nfsd_drc_bucket { + struct list_head lru_head; + spinlock_t cache_lock; +}; + +static struct nfsd_drc_bucket *drc_hashtbl; static struct kmem_cache *drc_slab; /* max number of entries allowed in the cache */ @@ -36,6 +40,7 @@ static unsigned int max_drc_entries; /* number of significant bits in the hash value */ static unsigned int maskbits; +static unsigned int drc_hashsize; /* * Stats and other tracking of on the duplicate reply cache. All of these and @@ -43,7 +48,7 @@ static unsigned int maskbits; */ /* total number of entries */ -static unsigned int num_drc_entries; +static atomic_t num_drc_entries; /* cache misses due only to checksum comparison failures */ static unsigned int payload_misses; @@ -75,7 +80,6 @@ static struct shrinker nfsd_reply_cache_shrinker = { * A cache entry is "single use" if c_state == RC_INPROG * Otherwise, it when accessing _prev or _next, the lock must be held. */ -static DEFINE_SPINLOCK(cache_lock); static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); /* @@ -116,6 +120,12 @@ nfsd_hashsize(unsigned int limit) return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); } +static u32 +nfsd_cache_hash(__be32 xid) +{ + return hash_32(be32_to_cpu(xid), maskbits); +} + static struct svc_cacherep * nfsd_reply_cache_alloc(void) { @@ -126,7 +136,6 @@ nfsd_reply_cache_alloc(void) rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; INIT_LIST_HEAD(&rp->c_lru); - INIT_HLIST_NODE(&rp->c_hash); } return rp; } @@ -138,29 +147,27 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } - if (!hlist_unhashed(&rp->c_hash)) - hlist_del(&rp->c_hash); list_del(&rp->c_lru); - --num_drc_entries; + atomic_dec(&num_drc_entries); drc_mem_usage -= sizeof(*rp); kmem_cache_free(drc_slab, rp); } static void -nfsd_reply_cache_free(struct svc_cacherep *rp) +nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); nfsd_reply_cache_free_locked(rp); - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); } int nfsd_reply_cache_init(void) { unsigned int hashsize; + unsigned int i; - INIT_LIST_HEAD(&lru_head); max_drc_entries = nfsd_cache_size_limit(); - num_drc_entries = 0; + atomic_set(&num_drc_entries, 0); hashsize = nfsd_hashsize(max_drc_entries); maskbits = ilog2(hashsize); @@ -170,9 +177,14 @@ int nfsd_reply_cache_init(void) if (!drc_slab) goto out_nomem; - cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); - if (!cache_hash) + drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); + if (!drc_hashtbl) goto out_nomem; + for (i = 0; i < hashsize; i++) { + INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); + spin_lock_init(&drc_hashtbl[i].cache_lock); + } + drc_hashsize = hashsize; return 0; out_nomem: @@ -184,17 +196,22 @@ out_nomem: void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unsigned int i; unregister_shrinker(&nfsd_reply_cache_shrinker); cancel_delayed_work_sync(&cache_cleaner); - while (!list_empty(&lru_head)) { - rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - nfsd_reply_cache_free_locked(rp); + for (i = 0; i < drc_hashsize; i++) { + struct list_head *head = &drc_hashtbl[i].lru_head; + while (!list_empty(head)) { + rp = list_first_entry(head, struct svc_cacherep, c_lru); + nfsd_reply_cache_free_locked(rp); + } } - kfree (cache_hash); - cache_hash = NULL; + kfree (drc_hashtbl); + drc_hashtbl = NULL; + drc_hashsize = 0; if (drc_slab) { kmem_cache_destroy(drc_slab); @@ -207,61 +224,63 @@ void nfsd_reply_cache_shutdown(void) * not already scheduled. */ static void -lru_put_end(struct svc_cacherep *rp) +lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { rp->c_timestamp = jiffies; - list_move_tail(&rp->c_lru, &lru_head); + list_move_tail(&rp->c_lru, &b->lru_head); schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } -/* - * Move a cache entry from one hash list to another - */ -static void -hash_refile(struct svc_cacherep *rp) -{ - hlist_del_init(&rp->c_hash); - /* - * No point in byte swapping c_xid since we're just using it to pick - * a hash bucket. - */ - hlist_add_head(&rp->c_hash, cache_hash + - hash_32((__force u32)rp->c_xid, maskbits)); -} - -/* - * Walk the LRU list and prune off entries that are older than RC_EXPIRE. - * Also prune the oldest ones when the total exceeds the max number of entries. - */ static long -prune_cache_entries(void) +prune_bucket(struct nfsd_drc_bucket *b) { struct svc_cacherep *rp, *tmp; long freed = 0; - list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) { /* * Don't free entries attached to calls that are still * in-progress, but do keep scanning the list. */ if (rp->c_state == RC_INPROG) continue; - if (num_drc_entries <= max_drc_entries && + if (atomic_read(&num_drc_entries) <= max_drc_entries && time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(rp); freed++; } + return freed; +} + +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static long +prune_cache_entries(void) +{ + unsigned int i; + long freed = 0; + bool cancel = true; + + for (i = 0; i < drc_hashsize; i++) { + struct nfsd_drc_bucket *b = &drc_hashtbl[i]; + + if (list_empty(&b->lru_head)) + continue; + spin_lock(&b->cache_lock); + freed += prune_bucket(b); + if (!list_empty(&b->lru_head)) + cancel = false; + spin_unlock(&b->cache_lock); + } /* - * Conditionally rearm the job. If we cleaned out the list, then - * cancel any pending run (since there won't be any work to do). - * Otherwise, we rearm the job or modify the existing one to run in - * RC_EXPIRE since we just ran the pruner. + * Conditionally rearm the job to run in RC_EXPIRE since we just + * ran the pruner. */ - if (list_empty(&lru_head)) - cancel_delayed_work(&cache_cleaner); - else + if (!cancel) mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); return freed; } @@ -269,32 +288,19 @@ prune_cache_entries(void) static void cache_cleaner_func(struct work_struct *unused) { - spin_lock(&cache_lock); prune_cache_entries(); - spin_unlock(&cache_lock); } static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - unsigned long num; - - spin_lock(&cache_lock); - num = num_drc_entries; - spin_unlock(&cache_lock); - - return num; + return atomic_read(&num_drc_entries); } static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - unsigned long freed; - - spin_lock(&cache_lock); - freed = prune_cache_entries(); - spin_unlock(&cache_lock); - return freed; + return prune_cache_entries(); } /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes @@ -332,20 +338,24 @@ nfsd_cache_csum(struct svc_rqst *rqstp) static bool nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) { - /* Check RPC header info first */ - if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || - rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || - rqstp->rq_arg.len != rp->c_len || - !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || - rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + /* Check RPC XID first */ + if (rqstp->rq_xid != rp->c_xid) return false; - /* compare checksum of NFS data */ if (csum != rp->c_csum) { ++payload_misses; return false; } + /* Other discriminators */ + if (rqstp->rq_proc != rp->c_proc || + rqstp->rq_prot != rp->c_prot || + rqstp->rq_vers != rp->c_vers || + rqstp->rq_arg.len != rp->c_len || + !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || + rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + return false; + return true; } @@ -355,18 +365,14 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) * NULL on failure. */ static struct svc_cacherep * -nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp, + __wsum csum) { struct svc_cacherep *rp, *ret = NULL; - struct hlist_head *rh; + struct list_head *rh = &b->lru_head; unsigned int entries = 0; - /* - * No point in byte swapping rq_xid since we're just using it to pick - * a hash bucket. - */ - rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)]; - hlist_for_each_entry(rp, rh, c_hash) { + list_for_each_entry(rp, rh, c_lru) { ++entries; if (nfsd_cache_match(rqstp, csum, rp)) { ret = rp; @@ -377,11 +383,12 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) /* tally hash chain length stats */ if (entries > longest_chain) { longest_chain = entries; - longest_chain_cachesize = num_drc_entries; + longest_chain_cachesize = atomic_read(&num_drc_entries); } else if (entries == longest_chain) { /* prefer to keep the smallest cachesize possible here */ - longest_chain_cachesize = min(longest_chain_cachesize, - num_drc_entries); + longest_chain_cachesize = min_t(unsigned int, + longest_chain_cachesize, + atomic_read(&num_drc_entries)); } return ret; @@ -403,6 +410,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) vers = rqstp->rq_vers, proc = rqstp->rq_proc; __wsum csum; + u32 hash = nfsd_cache_hash(xid); + struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; unsigned long age; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -420,16 +429,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * preallocate an entry. */ rp = nfsd_reply_cache_alloc(); - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); if (likely(rp)) { - ++num_drc_entries; + atomic_inc(&num_drc_entries); drc_mem_usage += sizeof(*rp); } /* go ahead and prune the cache */ - prune_cache_entries(); + prune_bucket(b); - found = nfsd_cache_search(rqstp, csum); + found = nfsd_cache_search(b, rqstp, csum); if (found) { if (likely(rp)) nfsd_reply_cache_free_locked(rp); @@ -454,8 +463,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rp->c_len = rqstp->rq_arg.len; rp->c_csum = csum; - hash_refile(rp); - lru_put_end(rp); + lru_put_end(b, rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { @@ -465,14 +473,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) } rp->c_type = RC_NOCACHE; out: - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); return rtn; found_entry: nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - lru_put_end(rp); + lru_put_end(b, rp); rtn = RC_DROPIT; /* Request being processed or excessive rexmits */ @@ -527,18 +535,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; + u32 hash; + struct nfsd_drc_bucket *b; int len; size_t bufsize = 0; if (!rp) return; + hash = nfsd_cache_hash(rp->c_xid); + b = &drc_hashtbl[hash]; + len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } @@ -553,23 +566,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) bufsize = len << 2; cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } cachv->iov_len = bufsize; memcpy(cachv->iov_base, statp, bufsize); break; case RC_NOCACHE: - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); drc_mem_usage += bufsize; - lru_put_end(rp); + lru_put_end(b, rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); return; } @@ -600,9 +613,9 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) */ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - spin_lock(&cache_lock); seq_printf(m, "max entries: %u\n", max_drc_entries); - seq_printf(m, "num entries: %u\n", num_drc_entries); + seq_printf(m, "num entries: %u\n", + atomic_read(&num_drc_entries)); seq_printf(m, "hash buckets: %u\n", 1 << maskbits); seq_printf(m, "mem usage: %u\n", drc_mem_usage); seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); @@ -611,7 +624,6 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "payload misses: %u\n", payload_misses); seq_printf(m, "longest chain len: %u\n", longest_chain); seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); - spin_unlock(&cache_lock); return 0; } |