85f0379aa0
To prevent the master of an rsb from changing rapidly, an unused rsb is kept on the "toss list" for a period of time to be reused. The toss list was being cleared completely for each recovery, which is unnecessary. Much of the benefit of the toss list can be maintained if nodes keep rsb's in their toss list that they are the master of. These rsb's need to be included when the resource directory is rebuilt during recovery. Signed-off-by: David Teigland <teigland@redhat.com>
421 lines
9.6 KiB
C
421 lines
9.6 KiB
C
/******************************************************************************
|
|
*******************************************************************************
|
|
**
|
|
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
|
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
|
|
**
|
|
** This copyrighted material is made available to anyone wishing to use,
|
|
** modify, copy, or redistribute it subject to the terms and conditions
|
|
** of the GNU General Public License v.2.
|
|
**
|
|
*******************************************************************************
|
|
******************************************************************************/
|
|
|
|
#include "dlm_internal.h"
|
|
#include "lockspace.h"
|
|
#include "member.h"
|
|
#include "lowcomms.h"
|
|
#include "rcom.h"
|
|
#include "config.h"
|
|
#include "memory.h"
|
|
#include "recover.h"
|
|
#include "util.h"
|
|
#include "lock.h"
|
|
#include "dir.h"
|
|
|
|
|
|
static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de)
|
|
{
|
|
spin_lock(&ls->ls_recover_list_lock);
|
|
list_add(&de->list, &ls->ls_recover_list);
|
|
spin_unlock(&ls->ls_recover_list_lock);
|
|
}
|
|
|
|
static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
|
|
{
|
|
int found = 0;
|
|
struct dlm_direntry *de;
|
|
|
|
spin_lock(&ls->ls_recover_list_lock);
|
|
list_for_each_entry(de, &ls->ls_recover_list, list) {
|
|
if (de->length == len) {
|
|
list_del(&de->list);
|
|
de->master_nodeid = 0;
|
|
memset(de->name, 0, len);
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
spin_unlock(&ls->ls_recover_list_lock);
|
|
|
|
if (!found)
|
|
de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL);
|
|
return de;
|
|
}
|
|
|
|
void dlm_clear_free_entries(struct dlm_ls *ls)
|
|
{
|
|
struct dlm_direntry *de;
|
|
|
|
spin_lock(&ls->ls_recover_list_lock);
|
|
while (!list_empty(&ls->ls_recover_list)) {
|
|
de = list_entry(ls->ls_recover_list.next, struct dlm_direntry,
|
|
list);
|
|
list_del(&de->list);
|
|
kfree(de);
|
|
}
|
|
spin_unlock(&ls->ls_recover_list_lock);
|
|
}
|
|
|
|
/*
|
|
* We use the upper 16 bits of the hash value to select the directory node.
|
|
* Low bits are used for distribution of rsb's among hash buckets on each node.
|
|
*
|
|
* To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
|
|
* num_nodes to the hash value. This value in the desired range is used as an
|
|
* offset into the sorted list of nodeid's to give the particular nodeid.
|
|
*/
|
|
|
|
int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
|
|
{
|
|
struct list_head *tmp;
|
|
struct dlm_member *memb = NULL;
|
|
uint32_t node, n = 0;
|
|
int nodeid;
|
|
|
|
if (ls->ls_num_nodes == 1) {
|
|
nodeid = dlm_our_nodeid();
|
|
goto out;
|
|
}
|
|
|
|
if (ls->ls_node_array) {
|
|
node = (hash >> 16) % ls->ls_total_weight;
|
|
nodeid = ls->ls_node_array[node];
|
|
goto out;
|
|
}
|
|
|
|
/* make_member_array() failed to kmalloc ls_node_array... */
|
|
|
|
node = (hash >> 16) % ls->ls_num_nodes;
|
|
|
|
list_for_each(tmp, &ls->ls_nodes) {
|
|
if (n++ != node)
|
|
continue;
|
|
memb = list_entry(tmp, struct dlm_member, list);
|
|
break;
|
|
}
|
|
|
|
DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n",
|
|
ls->ls_num_nodes, n, node););
|
|
nodeid = memb->nodeid;
|
|
out:
|
|
return nodeid;
|
|
}
|
|
|
|
int dlm_dir_nodeid(struct dlm_rsb *r)
|
|
{
|
|
return dlm_hash2nodeid(r->res_ls, r->res_hash);
|
|
}
|
|
|
|
static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len)
|
|
{
|
|
uint32_t val;
|
|
|
|
val = jhash(name, len, 0);
|
|
val &= (ls->ls_dirtbl_size - 1);
|
|
|
|
return val;
|
|
}
|
|
|
|
static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de)
|
|
{
|
|
uint32_t bucket;
|
|
|
|
bucket = dir_hash(ls, de->name, de->length);
|
|
list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
|
|
}
|
|
|
|
static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name,
|
|
int namelen, uint32_t bucket)
|
|
{
|
|
struct dlm_direntry *de;
|
|
|
|
list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) {
|
|
if (de->length == namelen && !memcmp(name, de->name, namelen))
|
|
goto out;
|
|
}
|
|
de = NULL;
|
|
out:
|
|
return de;
|
|
}
|
|
|
|
void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen)
|
|
{
|
|
struct dlm_direntry *de;
|
|
uint32_t bucket;
|
|
|
|
bucket = dir_hash(ls, name, namelen);
|
|
|
|
write_lock(&ls->ls_dirtbl[bucket].lock);
|
|
|
|
de = search_bucket(ls, name, namelen, bucket);
|
|
|
|
if (!de) {
|
|
log_error(ls, "remove fr %u none", nodeid);
|
|
goto out;
|
|
}
|
|
|
|
if (de->master_nodeid != nodeid) {
|
|
log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid);
|
|
goto out;
|
|
}
|
|
|
|
list_del(&de->list);
|
|
kfree(de);
|
|
out:
|
|
write_unlock(&ls->ls_dirtbl[bucket].lock);
|
|
}
|
|
|
|
void dlm_dir_clear(struct dlm_ls *ls)
|
|
{
|
|
struct list_head *head;
|
|
struct dlm_direntry *de;
|
|
int i;
|
|
|
|
DLM_ASSERT(list_empty(&ls->ls_recover_list), );
|
|
|
|
for (i = 0; i < ls->ls_dirtbl_size; i++) {
|
|
write_lock(&ls->ls_dirtbl[i].lock);
|
|
head = &ls->ls_dirtbl[i].list;
|
|
while (!list_empty(head)) {
|
|
de = list_entry(head->next, struct dlm_direntry, list);
|
|
list_del(&de->list);
|
|
put_free_de(ls, de);
|
|
}
|
|
write_unlock(&ls->ls_dirtbl[i].lock);
|
|
}
|
|
}
|
|
|
|
int dlm_recover_directory(struct dlm_ls *ls)
|
|
{
|
|
struct dlm_member *memb;
|
|
struct dlm_direntry *de;
|
|
char *b, *last_name = NULL;
|
|
int error = -ENOMEM, last_len, count = 0;
|
|
uint16_t namelen;
|
|
|
|
log_debug(ls, "dlm_recover_directory");
|
|
|
|
if (dlm_no_directory(ls))
|
|
goto out_status;
|
|
|
|
dlm_dir_clear(ls);
|
|
|
|
last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
|
|
if (!last_name)
|
|
goto out;
|
|
|
|
list_for_each_entry(memb, &ls->ls_nodes, list) {
|
|
memset(last_name, 0, DLM_RESNAME_MAXLEN);
|
|
last_len = 0;
|
|
|
|
for (;;) {
|
|
error = dlm_recovery_stopped(ls);
|
|
if (error)
|
|
goto out_free;
|
|
|
|
error = dlm_rcom_names(ls, memb->nodeid,
|
|
last_name, last_len);
|
|
if (error)
|
|
goto out_free;
|
|
|
|
schedule();
|
|
|
|
/*
|
|
* pick namelen/name pairs out of received buffer
|
|
*/
|
|
|
|
b = ls->ls_recover_buf + sizeof(struct dlm_rcom);
|
|
|
|
for (;;) {
|
|
memcpy(&namelen, b, sizeof(uint16_t));
|
|
namelen = be16_to_cpu(namelen);
|
|
b += sizeof(uint16_t);
|
|
|
|
/* namelen of 0xFFFFF marks end of names for
|
|
this node; namelen of 0 marks end of the
|
|
buffer */
|
|
|
|
if (namelen == 0xFFFF)
|
|
goto done;
|
|
if (!namelen)
|
|
break;
|
|
|
|
error = -ENOMEM;
|
|
de = get_free_de(ls, namelen);
|
|
if (!de)
|
|
goto out_free;
|
|
|
|
de->master_nodeid = memb->nodeid;
|
|
de->length = namelen;
|
|
last_len = namelen;
|
|
memcpy(de->name, b, namelen);
|
|
memcpy(last_name, b, namelen);
|
|
b += namelen;
|
|
|
|
add_entry_to_hash(ls, de);
|
|
count++;
|
|
}
|
|
}
|
|
done:
|
|
;
|
|
}
|
|
|
|
out_status:
|
|
error = 0;
|
|
dlm_set_recover_status(ls, DLM_RS_DIR);
|
|
log_debug(ls, "dlm_recover_directory %d entries", count);
|
|
out_free:
|
|
kfree(last_name);
|
|
out:
|
|
dlm_clear_free_entries(ls);
|
|
return error;
|
|
}
|
|
|
|
static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
|
|
int namelen, int *r_nodeid)
|
|
{
|
|
struct dlm_direntry *de, *tmp;
|
|
uint32_t bucket;
|
|
|
|
bucket = dir_hash(ls, name, namelen);
|
|
|
|
write_lock(&ls->ls_dirtbl[bucket].lock);
|
|
de = search_bucket(ls, name, namelen, bucket);
|
|
if (de) {
|
|
*r_nodeid = de->master_nodeid;
|
|
write_unlock(&ls->ls_dirtbl[bucket].lock);
|
|
if (*r_nodeid == nodeid)
|
|
return -EEXIST;
|
|
return 0;
|
|
}
|
|
|
|
write_unlock(&ls->ls_dirtbl[bucket].lock);
|
|
|
|
de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL);
|
|
if (!de)
|
|
return -ENOMEM;
|
|
|
|
de->master_nodeid = nodeid;
|
|
de->length = namelen;
|
|
memcpy(de->name, name, namelen);
|
|
|
|
write_lock(&ls->ls_dirtbl[bucket].lock);
|
|
tmp = search_bucket(ls, name, namelen, bucket);
|
|
if (tmp) {
|
|
kfree(de);
|
|
de = tmp;
|
|
} else {
|
|
list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
|
|
}
|
|
*r_nodeid = de->master_nodeid;
|
|
write_unlock(&ls->ls_dirtbl[bucket].lock);
|
|
return 0;
|
|
}
|
|
|
|
int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
|
|
int *r_nodeid)
|
|
{
|
|
return get_entry(ls, nodeid, name, namelen, r_nodeid);
|
|
}
|
|
|
|
static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
|
|
{
|
|
struct dlm_rsb *r;
|
|
|
|
down_read(&ls->ls_root_sem);
|
|
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
|
|
if (len == r->res_length && !memcmp(name, r->res_name, len)) {
|
|
up_read(&ls->ls_root_sem);
|
|
return r;
|
|
}
|
|
}
|
|
up_read(&ls->ls_root_sem);
|
|
return NULL;
|
|
}
|
|
|
|
/* Find the rsb where we left off (or start again), then send rsb names
|
|
for rsb's we're master of and whose directory node matches the requesting
|
|
node. inbuf is the rsb name last sent, inlen is the name's length */
|
|
|
|
void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
|
|
char *outbuf, int outlen, int nodeid)
|
|
{
|
|
struct list_head *list;
|
|
struct dlm_rsb *r;
|
|
int offset = 0, dir_nodeid;
|
|
uint16_t be_namelen;
|
|
|
|
down_read(&ls->ls_root_sem);
|
|
|
|
if (inlen > 1) {
|
|
r = find_rsb_root(ls, inbuf, inlen);
|
|
if (!r) {
|
|
inbuf[inlen - 1] = '\0';
|
|
log_error(ls, "copy_master_names from %d start %d %s",
|
|
nodeid, inlen, inbuf);
|
|
goto out;
|
|
}
|
|
list = r->res_root_list.next;
|
|
} else {
|
|
list = ls->ls_root_list.next;
|
|
}
|
|
|
|
for (offset = 0; list != &ls->ls_root_list; list = list->next) {
|
|
r = list_entry(list, struct dlm_rsb, res_root_list);
|
|
if (r->res_nodeid)
|
|
continue;
|
|
|
|
dir_nodeid = dlm_dir_nodeid(r);
|
|
if (dir_nodeid != nodeid)
|
|
continue;
|
|
|
|
/*
|
|
* The block ends when we can't fit the following in the
|
|
* remaining buffer space:
|
|
* namelen (uint16_t) +
|
|
* name (r->res_length) +
|
|
* end-of-block record 0x0000 (uint16_t)
|
|
*/
|
|
|
|
if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
|
|
/* Write end-of-block record */
|
|
be_namelen = 0;
|
|
memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
|
|
offset += sizeof(uint16_t);
|
|
goto out;
|
|
}
|
|
|
|
be_namelen = cpu_to_be16(r->res_length);
|
|
memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
|
|
offset += sizeof(uint16_t);
|
|
memcpy(outbuf + offset, r->res_name, r->res_length);
|
|
offset += r->res_length;
|
|
}
|
|
|
|
/*
|
|
* If we've reached the end of the list (and there's room) write a
|
|
* terminating record.
|
|
*/
|
|
|
|
if ((list == &ls->ls_root_list) &&
|
|
(offset + sizeof(uint16_t) <= outlen)) {
|
|
be_namelen = 0xFFFF;
|
|
memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
|
|
offset += sizeof(uint16_t);
|
|
}
|
|
|
|
out:
|
|
up_read(&ls->ls_root_sem);
|
|
}
|
|
|