Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (48 commits)
  ocfs2: Avoid to evaluate xattr block flags again.
  ocfs2/cluster: Release debugfs file elapsed_time_in_ms
  ocfs2: Add a mount option "coherency=*" to handle cluster coherency for O_DIRECT writes.
  Initialize max_slots early
  When I tried to compile I got the following warning: fs/ocfs2/slot_map.c: In function ‘ocfs2_init_slot_info’: fs/ocfs2/slot_map.c:360: warning: ‘bytes’ may be used uninitialized in this function fs/ocfs2/slot_map.c:360: note: ‘bytes’ was declared here Compiler: gcc version 4.4.3 (GCC) on Mandriva I'm not sure why this warning occurs, I think compiler don't know that variable "bytes" is initialized when it is sent by reference to ocfs2_slot_map_physical_size and it throws that ugly warning. However, a simple initialization of "bytes" variable with 0 will fix it.
  ocfs2: validate bg_free_bits_count after update
  ocfs2/cluster: Bump up dlm protocol to version 1.1
  ocfs2/cluster: Show per region heartbeat elapsed time
  ocfs2/cluster: Add mlogs for heartbeat up/down events
  ocfs2/cluster: Create debugfs dir/files for each region
  ocfs2/cluster: Create debugfs files for live, quorum and failed region bitmaps
  ocfs2/cluster: Maintain bitmap of failed regions
  ocfs2/cluster: Maintain bitmap of quorum regions
  ocfs2/cluster: Track bitmap of live heartbeat regions
  ocfs2/cluster: Track number of global heartbeat regions
  ocfs2/cluster: Maintain live node bitmap per heartbeat region
  ocfs2/cluster: Reorganize o2hb debugfs init
  ocfs2/cluster: Check slots for unconfigured live nodes
  ocfs2/cluster: Print messages when adding/removing nodes
  ocfs2/cluster: Print messages when adding/removing heartbeat regions
  ...
This commit is contained in:
Linus Torvalds
2010-10-21 19:01:34 -07:00
39 changed files with 1894 additions and 173 deletions
+7
View File
@@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file
reservations - users should rarely need to change this
value. If allocation reservations are turned off, this
option will have no effect.
coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode
lock will be taken to force other nodes drop cache,
therefore full cluster coherency is guaranteed even
for O_DIRECT writes.
coherency=buffered Allow concurrent O_DIRECT writes without EX lock among
nodes, which gains high performance at risk of getting
stale data on other nodes.
+2 -2
View File
@@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount;
}
if (le32_to_cpu(es->s_blocks_count) >
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
if (generic_check_addressable(sb->s_blocksize_bits,
le32_to_cpu(es->s_blocks_count))) {
ext3_msg(sb, KERN_ERR,
"error: filesystem is too large to mount safely");
if (sizeof(sector_t) < 8)
+3 -5
View File
@@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
if ((ext4_blocks_count(es) >
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
(ext4_blocks_count(es) >
(pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
ret = generic_check_addressable(sb->s_blocksize_bits,
ext4_blocks_count(es));
if (ret) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
ret = -EFBIG;
goto failed_mount;
}
+4
View File
@@ -1371,6 +1371,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
if (!compat && !ro && !incompat)
return 1;
/* Load journal superblock if it is not loaded yet. */
if (journal->j_format_version == 0 &&
journal_get_superblock(journal) != 0)
return 0;
if (journal->j_format_version == 1)
return 0;
+29
View File
@@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync)
}
EXPORT_SYMBOL(generic_file_fsync);
/**
* generic_check_addressable - Check addressability of file system
* @blocksize_bits: log of file system block size
* @num_blocks: number of blocks in file system
*
* Determine whether a file system with @num_blocks blocks (and a
* block size of 2**@blocksize_bits) is addressable by the sector_t
* and page cache of the system. Return 0 if so and -EFBIG otherwise.
*/
int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
{
u64 last_fs_block = num_blocks - 1;
u64 last_fs_page =
last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits);
if (unlikely(num_blocks == 0))
return 0;
if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
return -EINVAL;
if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
(last_fs_page > (pgoff_t)(~0ULL))) {
return -EFBIG;
}
return 0;
}
EXPORT_SYMBOL(generic_check_addressable);
/*
* No-op implementation of ->fsync for in-memory filesystems.
*/
+5 -4
View File
@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt {
* out in so that future reads from that region will get
* zero's.
*/
struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
unsigned int w_num_pages;
struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
struct page *w_target_page;
/*
@@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
return ret;
}
int ocfs2_write_begin_nolock(struct address_space *mapping,
int ocfs2_write_begin_nolock(struct file *filp,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page)
@@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
mlog_errno(ret);
goto out;
} else if (ret == 1) {
ret = ocfs2_refcount_cow(inode, di_bh,
ret = ocfs2_refcount_cow(inode, filp, di_bh,
wc->w_cpos, wc->w_clen, UINT_MAX);
if (ret) {
mlog_errno(ret);
@@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep,
fsdata, di_bh, NULL);
if (ret) {
mlog_errno(ret);
+2 -1
View File
@@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
int ocfs2_write_begin_nolock(struct address_space *mapping,
int ocfs2_write_begin_nolock(struct file *filp,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page);
File diff suppressed because it is too large Load Diff
+4
View File
@@ -31,6 +31,8 @@
#define O2HB_REGION_TIMEOUT_MS 2000
#define O2HB_MAX_REGION_NAME_LEN 32
/* number of changes to be seen as live */
#define O2HB_LIVE_THRESHOLD 2
/* number of equal samples to be seen as dead */
@@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
int o2hb_check_local_node_heartbeating(void);
void o2hb_stop_all_regions(void);
int o2hb_get_all_regions(char *region_uuids, u8 numregions);
int o2hb_global_heartbeat_active(void);
#endif /* O2CLUSTER_HEARTBEAT_H */
+2 -1
View File
@@ -119,7 +119,8 @@
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
+5
View File
@@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group,
config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
spin_lock_init(&node->nd_lock);
mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name);
return &node->nd_item;
}
@@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group,
}
write_unlock(&cluster->cl_nodes_lock);
mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n",
config_item_name(&node->nd_item));
config_item_put(item);
}
+6
View File
@@ -36,4 +36,10 @@
/* host name, group name, cluster name all 64 bytes */
#define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN
/*
* Maximum number of global heartbeat regions allowed.
* **CAUTION** Changing this number will break dlm compatibility.
*/
#define O2NM_MAX_REGIONS 32
#endif /* _OCFS2_NODEMANAGER_H */
+5
View File
@@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num,
{
o2quo_hb_down(node_num);
if (!node)
return;
if (node_num != o2nm_this_node())
o2net_disconnect_node(node);
@@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
o2quo_hb_up(node_num);
BUG_ON(!node);
/* ensure an immediate connect attempt */
nn->nn_last_connect_attempt = jiffies -
(msecs_to_jiffies(o2net_reconnect_delay()) + 1);
+29 -4
View File
@@ -40,6 +40,14 @@
#include "inode.h"
#include "super.h"
void ocfs2_dentry_attach_gen(struct dentry *dentry)
{
unsigned long gen =
OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
BUG_ON(dentry->d_inode);
dentry->d_fsdata = (void *)gen;
}
static int ocfs2_dentry_revalidate(struct dentry *dentry,
struct nameidata *nd)
@@ -51,11 +59,20 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
/* Never trust a negative dentry - force a new lookup. */
/* For a negative dentry -
* check the generation number of the parent and compare with the
* one stored in the inode.
*/
if (inode == NULL) {
mlog(0, "negative dentry: %.*s\n", dentry->d_name.len,
dentry->d_name.name);
goto bail;
unsigned long gen = (unsigned long) dentry->d_fsdata;
unsigned long pgen =
OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
mlog(0, "negative dentry: %.*s parent gen: %lu "
"dentry gen: %lu\n",
dentry->d_name.len, dentry->d_name.name, pgen, gen);
if (gen != pgen)
goto bail;
goto valid;
}
BUG_ON(!osb);
@@ -96,6 +113,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
goto bail;
}
valid:
ret = 1;
bail:
@@ -227,6 +245,12 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
if (!inode)
return 0;
if (!dentry->d_inode && dentry->d_fsdata) {
/* Converting a negative dentry to positive
Clear dentry->d_fsdata */
dentry->d_fsdata = dl = NULL;
}
if (dl) {
mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
" \"%.*s\": old parent: %llu, new: %llu\n",
@@ -452,6 +476,7 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
out:
iput(inode);
ocfs2_dentry_attach_gen(dentry);
}
/*
+1
View File
@@ -64,5 +64,6 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
struct inode *old_dir, struct inode *new_dir);
extern spinlock_t dentry_attach_lock;
void ocfs2_dentry_attach_gen(struct dentry *dentry);
#endif /* OCFS2_DCACHE_H */
+28 -1
View File
@@ -445,7 +445,9 @@ enum {
DLM_LOCK_REQUEST_MSG, /* 515 */
DLM_RECO_DATA_DONE_MSG, /* 516 */
DLM_BEGIN_RECO_MSG, /* 517 */
DLM_FINALIZE_RECO_MSG /* 518 */
DLM_FINALIZE_RECO_MSG, /* 518 */
DLM_QUERY_REGION, /* 519 */
DLM_QUERY_NODEINFO, /* 520 */
};
struct dlm_reco_node_data
@@ -727,6 +729,31 @@ struct dlm_cancel_join
u8 domain[O2NM_MAX_NAME_LEN];
};
struct dlm_query_region {
u8 qr_node;
u8 qr_numregions;
u8 qr_namelen;
u8 pad1;
u8 qr_domain[O2NM_MAX_NAME_LEN];
u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS];
};
struct dlm_node_info {
u8 ni_nodenum;
u8 pad1;
u16 ni_ipv4_port;
u32 ni_ipv4_address;
};
struct dlm_query_nodeinfo {
u8 qn_nodenum;
u8 qn_numnodes;
u8 qn_namelen;
u8 pad1;
u8 qn_domain[O2NM_MAX_NAME_LEN];
struct dlm_node_info qn_nodes[O2NM_MAX_NODES];
};
struct dlm_exit_domain
{
u8 node_idx;
+7 -5
View File
@@ -493,7 +493,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
struct hlist_head *bucket;
struct hlist_node *list;
int i, out = 0;
unsigned long total = 0, longest = 0, bktcnt;
unsigned long total = 0, longest = 0, bucket_count = 0;
out += snprintf(db->buf + out, db->len - out,
"Dumping MLEs for Domain: %s\n", dlm->name);
@@ -505,13 +505,13 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
mle = hlist_entry(list, struct dlm_master_list_entry,
master_hash_node);
++total;
++bktcnt;
++bucket_count;
if (db->len - out < 200)
continue;
out += dump_mle(mle, db->buf + out, db->len - out);
}
longest = max(longest, bktcnt);
bktcnt = 0;
longest = max(longest, bucket_count);
bucket_count = 0;
}
spin_unlock(&dlm->master_lock);
@@ -782,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
/* Domain: xxxxxxxxxx Key: 0xdfbac769 */
out += snprintf(db->buf + out, db->len - out,
"Domain: %s Key: 0x%08x\n", dlm->name, dlm->key);
"Domain: %s Key: 0x%08x Protocol: %d.%d\n",
dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
/* Thread Pid: xxx Node: xxx State: xxxxx */
out += snprintf(db->buf + out, db->len - out,
+399 -1
View File
@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
* will have a negotiated version with the same major number and a minor
* number equal or smaller. The dlm_ctxt->dlm_locking_proto field should
* be used to determine what a running domain is actually using.
*
* New in version 1.1:
* - Message DLM_QUERY_REGION added to support global heartbeat
* - Message DLM_QUERY_NODEINFO added to allow online node removes
*/
static const struct dlm_protocol_version dlm_protocol = {
.pv_major = 1,
.pv_minor = 0,
.pv_minor = 1,
};
#define DLM_DOMAIN_BACKOFF_MS 200
@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
void *data, void **ret_data);
static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
static int dlm_protocol_compare(struct dlm_protocol_version *existing,
@@ -921,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
return 0;
}
static int dlm_match_regions(struct dlm_ctxt *dlm,
struct dlm_query_region *qr)
{
char *local = NULL, *remote = qr->qr_regions;
char *l, *r;
int localnr, i, j, foundit;
int status = 0;
if (!o2hb_global_heartbeat_active()) {
if (qr->qr_numregions) {
mlog(ML_ERROR, "Domain %s: Joining node %d has global "
"heartbeat enabled but local node %d does not\n",
qr->qr_domain, qr->qr_node, dlm->node_num);
status = -EINVAL;
}
goto bail;
}
if (o2hb_global_heartbeat_active() && !qr->qr_numregions) {
mlog(ML_ERROR, "Domain %s: Local node %d has global "
"heartbeat enabled but joining node %d does not\n",
qr->qr_domain, dlm->node_num, qr->qr_node);
status = -EINVAL;
goto bail;
}
r = remote;
for (i = 0; i < qr->qr_numregions; ++i) {
mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r);
r += O2HB_MAX_REGION_NAME_LEN;
}
local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
if (!local) {
status = -ENOMEM;
goto bail;
}
localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
/* compare local regions with remote */
l = local;
for (i = 0; i < localnr; ++i) {
foundit = 0;
r = remote;
for (j = 0; j <= qr->qr_numregions; ++j) {
if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) {
foundit = 1;
break;
}
r += O2HB_MAX_REGION_NAME_LEN;
}
if (!foundit) {
status = -EINVAL;
mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
"in local node %d but not in joining node %d\n",
qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l,
dlm->node_num, qr->qr_node);
goto bail;
}
l += O2HB_MAX_REGION_NAME_LEN;
}
/* compare remote with local regions */
r = remote;
for (i = 0; i < qr->qr_numregions; ++i) {
foundit = 0;
l = local;
for (j = 0; j < localnr; ++j) {
if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) {
foundit = 1;
break;
}
l += O2HB_MAX_REGION_NAME_LEN;
}
if (!foundit) {
status = -EINVAL;
mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
"in joining node %d but not in local node %d\n",
qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r,
qr->qr_node, dlm->node_num);
goto bail;
}
r += O2HB_MAX_REGION_NAME_LEN;
}
bail:
kfree(local);
return status;
}
static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map)
{
struct dlm_query_region *qr = NULL;
int status, ret = 0, i;
char *p;
if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
goto bail;
qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL);
if (!qr) {
ret = -ENOMEM;
mlog_errno(ret);
goto bail;
}
qr->qr_node = dlm->node_num;
qr->qr_namelen = strlen(dlm->name);
memcpy(qr->qr_domain, dlm->name, qr->qr_namelen);
/* if local hb, the numregions will be zero */
if (o2hb_global_heartbeat_active())
qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions,
O2NM_MAX_REGIONS);
p = qr->qr_regions;
for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN)
mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p);
i = -1;
while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
i + 1)) < O2NM_MAX_NODES) {
if (i == dlm->node_num)
continue;
mlog(0, "Sending regions to node %d\n", i);
ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr,
sizeof(struct dlm_query_region),
i, &status);
if (ret >= 0)
ret = status;
if (ret) {
mlog(ML_ERROR, "Region mismatch %d, node %d\n",
ret, i);
break;
}
}
bail:
kfree(qr);
return ret;
}
static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
void *data, void **ret_data)
{
struct dlm_query_region *qr;
struct dlm_ctxt *dlm = NULL;
int status = 0;
int locked = 0;
qr = (struct dlm_query_region *) msg->buf;
mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
qr->qr_domain);
status = -EINVAL;
spin_lock(&dlm_domain_lock);
dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen);
if (!dlm) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"before join domain\n", qr->qr_node, qr->qr_domain);
goto bail;
}
spin_lock(&dlm->spinlock);
locked = 1;
if (dlm->joining_node != qr->qr_node) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"but joining node is %d\n", qr->qr_node, qr->qr_domain,
dlm->joining_node);
goto bail;
}
/* Support for global heartbeat was added in 1.1 */
if (dlm->dlm_locking_proto.pv_major == 1 &&
dlm->dlm_locking_proto.pv_minor == 0) {
mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
"but active dlm protocol is %d.%d\n", qr->qr_node,
qr->qr_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
goto bail;
}
status = dlm_match_regions(dlm, qr);
bail:
if (locked)
spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
return status;
}
static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn)
{
struct o2nm_node *local;
struct dlm_node_info *remote;
int i, j;
int status = 0;
for (j = 0; j < qn->qn_numnodes; ++j)
mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum,
&(qn->qn_nodes[j].ni_ipv4_address),
ntohs(qn->qn_nodes[j].ni_ipv4_port));
for (i = 0; i < O2NM_MAX_NODES && !status; ++i) {
local = o2nm_get_node_by_num(i);
remote = NULL;
for (j = 0; j < qn->qn_numnodes; ++j) {
if (qn->qn_nodes[j].ni_nodenum == i) {
remote = &(qn->qn_nodes[j]);
break;
}
}
if (!local && !remote)
continue;
if ((local && !remote) || (!local && remote))
status = -EINVAL;
if (!status &&
((remote->ni_nodenum != local->nd_num) ||
(remote->ni_ipv4_port != local->nd_ipv4_port) ||
(remote->ni_ipv4_address != local->nd_ipv4_address)))
status = -EINVAL;
if (status) {
if (remote && !local)
mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) "
"registered in joining node %d but not in "
"local node %d\n", qn->qn_domain,
remote->ni_nodenum,
&(remote->ni_ipv4_address),
ntohs(remote->ni_ipv4_port),
qn->qn_nodenum, dlm->node_num);
if (local && !remote)
mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) "
"registered in local node %d but not in "
"joining node %d\n", qn->qn_domain,
local->nd_num, &(local->nd_ipv4_address),
ntohs(local->nd_ipv4_port),
dlm->node_num, qn->qn_nodenum);
BUG_ON((!local && !remote));
}
if (local)
o2nm_node_put(local);
}
return status;
}
static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map)
{
struct dlm_query_nodeinfo *qn = NULL;
struct o2nm_node *node;
int ret = 0, status, count, i;
if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
goto bail;
qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL);
if (!qn) {
ret = -ENOMEM;
mlog_errno(ret);
goto bail;
}
for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) {
node = o2nm_get_node_by_num(i);
if (!node)
continue;
qn->qn_nodes[count].ni_nodenum = node->nd_num;
qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port;
qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address;
mlog(0, "Node %3d, %pI4:%u\n", node->nd_num,
&(node->nd_ipv4_address), ntohs(node->nd_ipv4_port));
++count;
o2nm_node_put(node);
}
qn->qn_nodenum = dlm->node_num;
qn->qn_numnodes = count;
qn->qn_namelen = strlen(dlm->name);
memcpy(qn->qn_domain, dlm->name, qn->qn_namelen);
i = -1;
while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
i + 1)) < O2NM_MAX_NODES) {
if (i == dlm->node_num)
continue;
mlog(0, "Sending nodeinfo to node %d\n", i);
ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY,
qn, sizeof(struct dlm_query_nodeinfo),
i, &status);
if (ret >= 0)
ret = status;
if (ret) {
mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i);
break;
}
}
bail:
kfree(qn);
return ret;
}
static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
void *data, void **ret_data)
{
struct dlm_query_nodeinfo *qn;
struct dlm_ctxt *dlm = NULL;
int locked = 0, status = -EINVAL;
qn = (struct dlm_query_nodeinfo *) msg->buf;
mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum,
qn->qn_domain);
spin_lock(&dlm_domain_lock);
dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen);
if (!dlm) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s before "
"join domain\n", qn->qn_nodenum, qn->qn_domain);
goto bail;
}
spin_lock(&dlm->spinlock);
locked = 1;
if (dlm->joining_node != qn->qn_nodenum) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s but "
"joining node is %d\n", qn->qn_nodenum, qn->qn_domain,
dlm->joining_node);
goto bail;
}
/* Support for node query was added in 1.1 */
if (dlm->dlm_locking_proto.pv_major == 1 &&
dlm->dlm_locking_proto.pv_minor == 0) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s "
"but active dlm protocol is %d.%d\n", qn->qn_nodenum,
qn->qn_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
goto bail;
}
status = dlm_match_nodes(dlm, qn);
bail:
if (locked)
spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
return status;
}
static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data)
{
@@ -1241,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
set_bit(dlm->node_num, dlm->domain_map);
spin_unlock(&dlm->spinlock);
/* Support for global heartbeat and node info was added in 1.1 */
if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) {
status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
if (status) {
mlog_errno(status);
goto bail;
}
status = dlm_send_regions(dlm, ctxt->yes_resp_map);
if (status) {
mlog_errno(status);
goto bail;
}
}
dlm_send_join_asserts(dlm, ctxt->yes_resp_map);
/* Joined state *must* be set before the joining node
@@ -1807,7 +2191,21 @@ static int dlm_register_net_handlers(void)
sizeof(struct dlm_cancel_join),
dlm_cancel_join_handler,
NULL, NULL, &dlm_join_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY,
sizeof(struct dlm_query_region),
dlm_query_region_handler,
NULL, NULL, &dlm_join_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY,
sizeof(struct dlm_query_nodeinfo),
dlm_query_nodeinfo_handler,
NULL, NULL, &dlm_join_handlers);
bail:
if (status < 0)
dlm_unregister_net_handlers();
+8
View File
@@ -3635,10 +3635,18 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
{
struct inode *inode;
struct address_space *mapping;
struct ocfs2_inode_info *oi;
inode = ocfs2_lock_res_inode(lockres);
mapping = inode->i_mapping;
if (S_ISDIR(inode->i_mode)) {
oi = OCFS2_I(inode);
oi->ip_dir_lock_gen++;
mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
goto out;
}
if (!S_ISREG(inode->i_mode))
goto out;
+40 -33
View File
@@ -64,12 +64,6 @@
#include "buffer_head_io.h"
static int ocfs2_sync_inode(struct inode *inode)
{
filemap_fdatawrite(inode->i_mapping);
return sync_mapping_buffers(inode->i_mapping);
}
static int ocfs2_init_file_private(struct inode *inode, struct file *file)
{
struct ocfs2_file_private *fp;
@@ -180,16 +174,12 @@ static int ocfs2_sync_file(struct file *file, int datasync)
{
int err = 0;
journal_t *journal;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
dentry->d_name.len, dentry->d_name.name);
err = ocfs2_sync_inode(dentry->d_inode);
if (err)
goto bail;
mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync,
file->f_path.dentry, file->f_path.dentry->d_name.len,
file->f_path.dentry->d_name.name);
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
/*
@@ -370,7 +360,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
goto out;
return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
out:
return status;
@@ -913,8 +903,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
zero_clusters = last_cpos - zero_cpos;
if (needs_cow) {
rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
UINT_MAX);
rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
zero_clusters, UINT_MAX);
if (rc) {
mlog_errno(rc);
goto out;
@@ -2062,6 +2052,7 @@ out:
}
static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
struct file *file,
loff_t pos, size_t count,
int *meta_level)
{
@@ -2079,7 +2070,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
*meta_level = 1;
ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
if (ret)
mlog_errno(ret);
out:
@@ -2087,7 +2078,7 @@ out:
return ret;
}
static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
static int ocfs2_prepare_inode_for_write(struct file *file,
loff_t *ppos,
size_t count,
int appending,
@@ -2095,6 +2086,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
int *has_refcount)
{
int ret = 0, meta_level = 0;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
loff_t saved_pos, end;
@@ -2150,6 +2142,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
meta_level = -1;
ret = ocfs2_prepare_inode_for_refcount(inode,
file,
saved_pos,
count,
&meta_level);
@@ -2232,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int full_coherency = !(osb->s_mount_opt &
OCFS2_MOUNT_COHERENCY_BUFFERED);
mlog_entry("(0x%p, %u, '%.*s')\n", file,
(unsigned int)nr_segs,
@@ -2255,16 +2250,39 @@ relock:
have_alloc_sem = 1;
}
/* concurrent O_DIRECT writes are allowed */
rw_level = !direct_io;
/*
* Concurrent O_DIRECT writes are allowed with
* mount_option "coherency=buffered".
*/
rw_level = (!direct_io || full_coherency);
ret = ocfs2_rw_lock(inode, rw_level);
if (ret < 0) {
mlog_errno(ret);
goto out_sems;
}
/*
* O_DIRECT writes with "coherency=full" need to take EX cluster
* inode_lock to guarantee coherency.
*/
if (direct_io && full_coherency) {
/*
* We need to take and drop the inode lock to force
* other nodes to drop their caches. Buffered I/O
* already does this in write_begin().
*/
ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) {
mlog_errno(ret);
goto out_sems;
}
ocfs2_inode_unlock(inode, 1);
}
can_do_direct = direct_io;
ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
ret = ocfs2_prepare_inode_for_write(file, ppos,
iocb->ki_left, appending,
&can_do_direct, &has_refcount);
if (ret < 0) {
@@ -2312,17 +2330,6 @@ relock:
written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
ppos, count, ocount);
if (written < 0) {
/*
* direct write may have instantiated a few
* blocks outside i_size. Trim these off again.
* Don't need i_size_read because we hold i_mutex.
*
* XXX(truncate): this looks buggy because ocfs2 did not
* actually implement ->truncate. Take a look at
* the new truncate sequence and update this accordingly
*/
if (*ppos + count > inode->i_size)
truncate_setsize(inode, inode->i_size);
ret = written;
goto out_dio;
}
@@ -2394,7 +2401,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
{
int ret;
ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
sd->total_len, 0, NULL, NULL);
if (ret < 0) {
mlog_errno(ret);

Some files were not shown because too many files have changed in this diff Show More