You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'for-4.5/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"This is the block driver pull request for 4.5, with the exception of
NVMe, which is in a separate branch and will be posted after this one.
This pull request contains:
- A set of bcache stability fixes, which have been acked by Kent.
These have been used and tested for more than a year by the
community, so it's about time that they got in.
- A set of drbd updates from the drbd team (Andreas, Lars, Philipp)
and Markus Elfring, Oleg Drokin.
- A set of fixes for xen blkback/front from the usual suspects, (Bob,
Konrad) as well as community based fixes from Kiri, Julien, and
Peng.
- A 2038 time fix for sx8 from Shraddha, with a fix from me.
- A small mtip32xx cleanup from Zhu Yanjun.
- A null_blk division fix from Arnd"
* 'for-4.5/drivers' of git://git.kernel.dk/linux-block: (71 commits)
null_blk: use sector_div instead of do_div
mtip32xx: restrict variables visible in current code module
xen/blkfront: Fix crash if backend doesn't follow the right states.
xen/blkback: Fix two memory leaks.
xen/blkback: make st_ statistics per ring
xen/blkfront: Handle non-indirect grant with 64KB pages
xen-blkfront: Introduce blkif_ring_get_request
xen-blkback: clear PF_NOFREEZE for xen_blkif_schedule()
xen/blkback: Free resources if connect_ring failed.
xen/blocks: Return -EXX instead of -1
xen/blkback: make pool of persistent grants and free pages per-queue
xen/blkback: get the number of hardware queues/rings from blkfront
xen/blkback: pseudo support for multi hardware queues/rings
xen/blkback: separate ring information out of struct xen_blkif
xen/blkfront: correct setting for xen_blkif_max_ring_order
xen/blkfront: make persistent grants pool per-queue
xen/blkfront: Remove duplicate setting of ->xbdev.
xen/blkfront: Cleanup of comments, fix unaligned variables, and syntax errors.
xen/blkfront: negotiate number of queues/rings to be used with backend
xen/blkfront: split per device io_lock
...
This commit is contained in:
+5
-6
@@ -3665,13 +3665,12 @@ F: drivers/scsi/dpt*
|
||||
F: drivers/scsi/dpt/
|
||||
|
||||
DRBD DRIVER
|
||||
P: Philipp Reisner
|
||||
P: Lars Ellenberg
|
||||
M: drbd-dev@lists.linbit.com
|
||||
L: drbd-user@lists.linbit.com
|
||||
M: Philipp Reisner <philipp.reisner@linbit.com>
|
||||
M: Lars Ellenberg <lars.ellenberg@linbit.com>
|
||||
L: drbd-dev@lists.linbit.com
|
||||
W: http://www.drbd.org
|
||||
T: git git://git.drbd.org/linux-2.6-drbd.git drbd
|
||||
T: git git://git.drbd.org/drbd-8.3.git
|
||||
T: git git://git.linbit.com/linux-drbd.git
|
||||
T: git git://git.linbit.com/drbd-8.4.git
|
||||
S: Supported
|
||||
F: drivers/block/drbd/
|
||||
F: lib/lru_cache.c
|
||||
|
||||
+167
-156
@@ -288,7 +288,162 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
|
||||
return need_transaction;
|
||||
}
|
||||
|
||||
static int al_write_transaction(struct drbd_device *device);
|
||||
#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
|
||||
/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
|
||||
* are still coupled, or assume too much about their relation.
|
||||
* Code below will not work if this is violated.
|
||||
* Will be cleaned up with some followup patch.
|
||||
*/
|
||||
# error FIXME
|
||||
#endif
|
||||
|
||||
static unsigned int al_extent_to_bm_page(unsigned int al_enr)
|
||||
{
|
||||
return al_enr >>
|
||||
/* bit to page */
|
||||
((PAGE_SHIFT + 3) -
|
||||
/* al extent number to bit */
|
||||
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
|
||||
}
|
||||
|
||||
static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
|
||||
{
|
||||
const unsigned int stripes = device->ldev->md.al_stripes;
|
||||
const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
|
||||
|
||||
/* transaction number, modulo on-disk ring buffer wrap around */
|
||||
unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
|
||||
|
||||
/* ... to aligned 4k on disk block */
|
||||
t = ((t % stripes) * stripe_size_4kB) + t/stripes;
|
||||
|
||||
/* ... to 512 byte sector in activity log */
|
||||
t *= 8;
|
||||
|
||||
/* ... plus offset to the on disk position */
|
||||
return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
|
||||
}
|
||||
|
||||
static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer)
|
||||
{
|
||||
struct lc_element *e;
|
||||
sector_t sector;
|
||||
int i, mx;
|
||||
unsigned extent_nr;
|
||||
unsigned crc = 0;
|
||||
int err = 0;
|
||||
|
||||
memset(buffer, 0, sizeof(*buffer));
|
||||
buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
|
||||
buffer->tr_number = cpu_to_be32(device->al_tr_number);
|
||||
|
||||
i = 0;
|
||||
|
||||
/* Even though no one can start to change this list
|
||||
* once we set the LC_LOCKED -- from drbd_al_begin_io(),
|
||||
* lc_try_lock_for_transaction() --, someone may still
|
||||
* be in the process of changing it. */
|
||||
spin_lock_irq(&device->al_lock);
|
||||
list_for_each_entry(e, &device->act_log->to_be_changed, list) {
|
||||
if (i == AL_UPDATES_PER_TRANSACTION) {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
|
||||
buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
|
||||
if (e->lc_number != LC_FREE)
|
||||
drbd_bm_mark_for_writeout(device,
|
||||
al_extent_to_bm_page(e->lc_number));
|
||||
i++;
|
||||
}
|
||||
spin_unlock_irq(&device->al_lock);
|
||||
BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
|
||||
|
||||
buffer->n_updates = cpu_to_be16(i);
|
||||
for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
|
||||
buffer->update_slot_nr[i] = cpu_to_be16(-1);
|
||||
buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
|
||||
}
|
||||
|
||||
buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
|
||||
buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
|
||||
|
||||
mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
|
||||
device->act_log->nr_elements - device->al_tr_cycle);
|
||||
for (i = 0; i < mx; i++) {
|
||||
unsigned idx = device->al_tr_cycle + i;
|
||||
extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
|
||||
buffer->context[i] = cpu_to_be32(extent_nr);
|
||||
}
|
||||
for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
|
||||
buffer->context[i] = cpu_to_be32(LC_FREE);
|
||||
|
||||
device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
|
||||
if (device->al_tr_cycle >= device->act_log->nr_elements)
|
||||
device->al_tr_cycle = 0;
|
||||
|
||||
sector = al_tr_number_to_on_disk_sector(device);
|
||||
|
||||
crc = crc32c(0, buffer, 4096);
|
||||
buffer->crc32c = cpu_to_be32(crc);
|
||||
|
||||
if (drbd_bm_write_hinted(device))
|
||||
err = -EIO;
|
||||
else {
|
||||
bool write_al_updates;
|
||||
rcu_read_lock();
|
||||
write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
|
||||
rcu_read_unlock();
|
||||
if (write_al_updates) {
|
||||
if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
|
||||
err = -EIO;
|
||||
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
|
||||
} else {
|
||||
device->al_tr_number++;
|
||||
device->al_writ_cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int al_write_transaction(struct drbd_device *device)
|
||||
{
|
||||
struct al_transaction_on_disk *buffer;
|
||||
int err;
|
||||
|
||||
if (!get_ldev(device)) {
|
||||
drbd_err(device, "disk is %s, cannot start al transaction\n",
|
||||
drbd_disk_str(device->state.disk));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* The bitmap write may have failed, causing a state change. */
|
||||
if (device->state.disk < D_INCONSISTENT) {
|
||||
drbd_err(device,
|
||||
"disk is %s, cannot write al transaction\n",
|
||||
drbd_disk_str(device->state.disk));
|
||||
put_ldev(device);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* protects md_io_buffer, al_tr_cycle, ... */
|
||||
buffer = drbd_md_get_buffer(device, __func__);
|
||||
if (!buffer) {
|
||||
drbd_err(device, "disk failed while waiting for md_io buffer\n");
|
||||
put_ldev(device);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
err = __al_write_transaction(device, buffer);
|
||||
|
||||
drbd_md_put_buffer(device);
|
||||
put_ldev(device);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
void drbd_al_begin_io_commit(struct drbd_device *device)
|
||||
{
|
||||
@@ -420,153 +575,6 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
|
||||
wake_up(&device->al_wait);
|
||||
}
|
||||
|
||||
#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
|
||||
/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
|
||||
* are still coupled, or assume too much about their relation.
|
||||
* Code below will not work if this is violated.
|
||||
* Will be cleaned up with some followup patch.
|
||||
*/
|
||||
# error FIXME
|
||||
#endif
|
||||
|
||||
static unsigned int al_extent_to_bm_page(unsigned int al_enr)
|
||||
{
|
||||
return al_enr >>
|
||||
/* bit to page */
|
||||
((PAGE_SHIFT + 3) -
|
||||
/* al extent number to bit */
|
||||
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
|
||||
}
|
||||
|
||||
static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
|
||||
{
|
||||
const unsigned int stripes = device->ldev->md.al_stripes;
|
||||
const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
|
||||
|
||||
/* transaction number, modulo on-disk ring buffer wrap around */
|
||||
unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
|
||||
|
||||
/* ... to aligned 4k on disk block */
|
||||
t = ((t % stripes) * stripe_size_4kB) + t/stripes;
|
||||
|
||||
/* ... to 512 byte sector in activity log */
|
||||
t *= 8;
|
||||
|
||||
/* ... plus offset to the on disk position */
|
||||
return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
|
||||
}
|
||||
|
||||
int al_write_transaction(struct drbd_device *device)
|
||||
{
|
||||
struct al_transaction_on_disk *buffer;
|
||||
struct lc_element *e;
|
||||
sector_t sector;
|
||||
int i, mx;
|
||||
unsigned extent_nr;
|
||||
unsigned crc = 0;
|
||||
int err = 0;
|
||||
|
||||
if (!get_ldev(device)) {
|
||||
drbd_err(device, "disk is %s, cannot start al transaction\n",
|
||||
drbd_disk_str(device->state.disk));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* The bitmap write may have failed, causing a state change. */
|
||||
if (device->state.disk < D_INCONSISTENT) {
|
||||
drbd_err(device,
|
||||
"disk is %s, cannot write al transaction\n",
|
||||
drbd_disk_str(device->state.disk));
|
||||
put_ldev(device);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* protects md_io_buffer, al_tr_cycle, ... */
|
||||
buffer = drbd_md_get_buffer(device, __func__);
|
||||
if (!buffer) {
|
||||
drbd_err(device, "disk failed while waiting for md_io buffer\n");
|
||||
put_ldev(device);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
memset(buffer, 0, sizeof(*buffer));
|
||||
buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
|
||||
buffer->tr_number = cpu_to_be32(device->al_tr_number);
|
||||
|
||||
i = 0;
|
||||
|
||||
/* Even though no one can start to change this list
|
||||
* once we set the LC_LOCKED -- from drbd_al_begin_io(),
|
||||
* lc_try_lock_for_transaction() --, someone may still
|
||||
* be in the process of changing it. */
|
||||
spin_lock_irq(&device->al_lock);
|
||||
list_for_each_entry(e, &device->act_log->to_be_changed, list) {
|
||||
if (i == AL_UPDATES_PER_TRANSACTION) {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
|
||||
buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
|
||||
if (e->lc_number != LC_FREE)
|
||||
drbd_bm_mark_for_writeout(device,
|
||||
al_extent_to_bm_page(e->lc_number));
|
||||
i++;
|
||||
}
|
||||
spin_unlock_irq(&device->al_lock);
|
||||
BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
|
||||
|
||||
buffer->n_updates = cpu_to_be16(i);
|
||||
for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
|
||||
buffer->update_slot_nr[i] = cpu_to_be16(-1);
|
||||
buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
|
||||
}
|
||||
|
||||
buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
|
||||
buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
|
||||
|
||||
mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
|
||||
device->act_log->nr_elements - device->al_tr_cycle);
|
||||
for (i = 0; i < mx; i++) {
|
||||
unsigned idx = device->al_tr_cycle + i;
|
||||
extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
|
||||
buffer->context[i] = cpu_to_be32(extent_nr);
|
||||
}
|
||||
for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
|
||||
buffer->context[i] = cpu_to_be32(LC_FREE);
|
||||
|
||||
device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
|
||||
if (device->al_tr_cycle >= device->act_log->nr_elements)
|
||||
device->al_tr_cycle = 0;
|
||||
|
||||
sector = al_tr_number_to_on_disk_sector(device);
|
||||
|
||||
crc = crc32c(0, buffer, 4096);
|
||||
buffer->crc32c = cpu_to_be32(crc);
|
||||
|
||||
if (drbd_bm_write_hinted(device))
|
||||
err = -EIO;
|
||||
else {
|
||||
bool write_al_updates;
|
||||
rcu_read_lock();
|
||||
write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
|
||||
rcu_read_unlock();
|
||||
if (write_al_updates) {
|
||||
if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
|
||||
err = -EIO;
|
||||
drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
|
||||
} else {
|
||||
device->al_tr_number++;
|
||||
device->al_writ_cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drbd_md_put_buffer(device);
|
||||
put_ldev(device);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
|
||||
{
|
||||
int rv;
|
||||
@@ -606,21 +614,24 @@ void drbd_al_shrink(struct drbd_device *device)
|
||||
wake_up(&device->al_wait);
|
||||
}
|
||||
|
||||
int drbd_initialize_al(struct drbd_device *device, void *buffer)
|
||||
int drbd_al_initialize(struct drbd_device *device, void *buffer)
|
||||
{
|
||||
struct al_transaction_on_disk *al = buffer;
|
||||
struct drbd_md *md = &device->ldev->md;
|
||||
sector_t al_base = md->md_offset + md->al_offset;
|
||||
int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
|
||||
int i;
|
||||
|
||||
memset(al, 0, 4096);
|
||||
al->magic = cpu_to_be32(DRBD_AL_MAGIC);
|
||||
al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
|
||||
al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
|
||||
__al_write_transaction(device, al);
|
||||
/* There may or may not have been a pending transaction. */
|
||||
spin_lock_irq(&device->al_lock);
|
||||
lc_committed(device->act_log);
|
||||
spin_unlock_irq(&device->al_lock);
|
||||
|
||||
for (i = 0; i < al_size_4k; i++) {
|
||||
int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE);
|
||||
/* The rest of the transactions will have an empty "updates" list, and
|
||||
* are written out only to provide the context, and to initialize the
|
||||
* on-disk ring buffer. */
|
||||
for (i = 1; i < al_size_4k; i++) {
|
||||
int err = __al_write_transaction(device, al);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/drbd.h>
|
||||
@@ -479,8 +479,14 @@ void drbd_bm_cleanup(struct drbd_device *device)
|
||||
* this masks out the remaining bits.
|
||||
* Returns the number of bits cleared.
|
||||
*/
|
||||
#ifndef BITS_PER_PAGE
|
||||
#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
|
||||
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
|
||||
#else
|
||||
# if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
|
||||
# error "ambiguous BITS_PER_PAGE"
|
||||
# endif
|
||||
#endif
|
||||
#define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
|
||||
static int bm_clear_surplus(struct drbd_bitmap *b)
|
||||
{
|
||||
@@ -559,21 +565,19 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
|
||||
unsigned long *p_addr;
|
||||
unsigned long bits = 0;
|
||||
unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
|
||||
int idx, i, last_word;
|
||||
int idx, last_word;
|
||||
|
||||
/* all but last page */
|
||||
for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
|
||||
p_addr = __bm_map_pidx(b, idx);
|
||||
for (i = 0; i < LWPP; i++)
|
||||
bits += hweight_long(p_addr[i]);
|
||||
bits += bitmap_weight(p_addr, BITS_PER_PAGE);
|
||||
__bm_unmap(p_addr);
|
||||
cond_resched();
|
||||
}
|
||||
/* last (or only) page */
|
||||
last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
|
||||
p_addr = __bm_map_pidx(b, idx);
|
||||
for (i = 0; i < last_word; i++)
|
||||
bits += hweight_long(p_addr[i]);
|
||||
bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG);
|
||||
p_addr[last_word] &= cpu_to_lel(mask);
|
||||
bits += hweight_long(p_addr[last_word]);
|
||||
/* 32bit arch, may have an unused padding long */
|
||||
@@ -1419,6 +1423,9 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
|
||||
int bits;
|
||||
int changed = 0;
|
||||
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
|
||||
|
||||
/* I think it is more cache line friendly to hweight_long then set to ~0UL,
|
||||
* than to first bitmap_weight() all words, then bitmap_fill() all words */
|
||||
for (i = first_word; i < last_word; i++) {
|
||||
bits = hweight_long(paddr[i]);
|
||||
paddr[i] = ~0UL;
|
||||
@@ -1628,8 +1635,7 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
|
||||
int n = e-s;
|
||||
p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
|
||||
bm = p_addr + MLPP(s);
|
||||
while (n--)
|
||||
count += hweight_long(*bm++);
|
||||
count += bitmap_weight(bm, n * BITS_PER_LONG);
|
||||
bm_unmap(p_addr);
|
||||
} else {
|
||||
drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
|
||||
|
||||
@@ -771,6 +771,13 @@ static int device_data_gen_id_show(struct seq_file *m, void *ignored)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int device_ed_gen_id_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
struct drbd_device *device = m->private;
|
||||
seq_printf(m, "0x%016llX\n", (unsigned long long)device->ed_uuid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define drbd_debugfs_device_attr(name) \
|
||||
static int device_ ## name ## _open(struct inode *inode, struct file *file) \
|
||||
{ \
|
||||
@@ -796,6 +803,7 @@ drbd_debugfs_device_attr(oldest_requests)
|
||||
drbd_debugfs_device_attr(act_log_extents)
|
||||
drbd_debugfs_device_attr(resync_extents)
|
||||
drbd_debugfs_device_attr(data_gen_id)
|
||||
drbd_debugfs_device_attr(ed_gen_id)
|
||||
|
||||
void drbd_debugfs_device_add(struct drbd_device *device)
|
||||
{
|
||||
@@ -839,6 +847,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
|
||||
DCF(act_log_extents);
|
||||
DCF(resync_extents);
|
||||
DCF(data_gen_id);
|
||||
DCF(ed_gen_id);
|
||||
#undef DCF
|
||||
return;
|
||||
|
||||
@@ -854,6 +863,7 @@ void drbd_debugfs_device_cleanup(struct drbd_device *device)
|
||||
drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
|
||||
drbd_debugfs_remove(&device->debugfs_vol_ed_gen_id);
|
||||
drbd_debugfs_remove(&device->debugfs_vol);
|
||||
}
|
||||
|
||||
|
||||
@@ -77,13 +77,6 @@ extern int fault_devs;
|
||||
extern char usermode_helper[];
|
||||
|
||||
|
||||
/* I don't remember why XCPU ...
|
||||
* This is used to wake the asender,
|
||||
* and to interrupt sending the sending task
|
||||
* on disconnect.
|
||||
*/
|
||||
#define DRBD_SIG SIGXCPU
|
||||
|
||||
/* This is used to stop/restart our threads.
|
||||
* Cannot use SIGTERM nor SIGKILL, since these
|
||||
* are sent out by init on runlevel changes
|
||||
@@ -292,6 +285,9 @@ struct drbd_device_work {
|
||||
|
||||
extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *);
|
||||
|
||||
extern void lock_all_resources(void);
|
||||
extern void unlock_all_resources(void);
|
||||
|
||||
struct drbd_request {
|
||||
struct drbd_work w;
|
||||
struct drbd_device *device;
|
||||
@@ -504,7 +500,6 @@ enum {
|
||||
|
||||
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
|
||||
|
||||
SUSPEND_IO, /* suspend application io */
|
||||
BITMAP_IO, /* suspend application io;
|
||||
once no more io in flight, start bitmap io */
|
||||
BITMAP_IO_QUEUED, /* Started bitmap IO */
|
||||
@@ -632,12 +627,6 @@ struct bm_io_work {
|
||||
void (*done)(struct drbd_device *device, int rv);
|
||||
};
|
||||
|
||||
enum write_ordering_e {
|
||||
WO_none,
|
||||
WO_drain_io,
|
||||
WO_bdev_flush,
|
||||
};
|
||||
|
||||
struct fifo_buffer {
|
||||
unsigned int head_index;
|
||||
unsigned int size;
|
||||
@@ -650,8 +639,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size);
|
||||
enum {
|
||||
NET_CONGESTED, /* The data socket is congested */
|
||||
RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */
|
||||
SEND_PING, /* whether asender should send a ping asap */
|
||||
SIGNAL_ASENDER, /* whether asender wants to be interrupted */
|
||||
SEND_PING,
|
||||
GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */
|
||||
CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */
|
||||
CONN_WD_ST_CHG_OKAY,
|
||||
@@ -670,6 +658,8 @@ enum {
|
||||
DEVICE_WORK_PENDING, /* tell worker that some device has pending work */
|
||||
};
|
||||
|
||||
enum which_state { NOW, OLD = NOW, NEW };
|
||||
|
||||
struct drbd_resource {
|
||||
char *name;
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
@@ -755,7 +745,8 @@ struct drbd_connection {
|
||||
unsigned long last_reconnect_jif;
|
||||
struct drbd_thread receiver;
|
||||
struct drbd_thread worker;
|
||||
struct drbd_thread asender;
|
||||
struct drbd_thread ack_receiver;
|
||||
struct workqueue_struct *ack_sender;
|
||||
|
||||
/* cached pointers,
|
||||
* so we can look up the oldest pending requests more quickly.
|
||||
@@ -774,6 +765,8 @@ struct drbd_connection {
|
||||
struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
|
||||
|
||||
struct {
|
||||
unsigned long last_sent_barrier_jif;
|
||||
|
||||
/* whether this sender thread
|
||||
* has processed a single write yet. */
|
||||
bool seen_any_write_yet;
|
||||
@@ -788,6 +781,17 @@ struct drbd_connection {
|
||||
} send;
|
||||
};
|
||||
|
||||
static inline bool has_net_conf(struct drbd_connection *connection)
|
||||
{
|
||||
bool has_net_conf;
|
||||
|
||||
rcu_read_lock();
|
||||
has_net_conf = rcu_dereference(connection->net_conf);
|
||||
rcu_read_unlock();
|
||||
|
||||
return has_net_conf;
|
||||
}
|
||||
|
||||
void __update_timing_details(
|
||||
struct drbd_thread_timing_details *tdp,
|
||||
unsigned int *cb_nr,
|
||||
@@ -811,6 +815,7 @@ struct drbd_peer_device {
|
||||
struct list_head peer_devices;
|
||||
struct drbd_device *device;
|
||||
struct drbd_connection *connection;
|
||||
struct work_struct send_acks_work;
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct dentry *debugfs_peer_dev;
|
||||
#endif
|
||||
@@ -829,6 +834,7 @@ struct drbd_device {
|
||||
struct dentry *debugfs_vol_act_log_extents;
|
||||
struct dentry *debugfs_vol_resync_extents;
|
||||
struct dentry *debugfs_vol_data_gen_id;
|
||||
struct dentry *debugfs_vol_ed_gen_id;
|
||||
#endif
|
||||
|
||||
unsigned int vnr; /* volume number within the connection */
|
||||
@@ -873,6 +879,7 @@ struct drbd_device {
|
||||
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
|
||||
atomic_t unacked_cnt; /* Need to send replies for */
|
||||
atomic_t local_cnt; /* Waiting for local completion */
|
||||
atomic_t suspend_cnt;
|
||||
|
||||
/* Interval tree of pending local requests */
|
||||
struct rb_root read_requests;
|
||||
@@ -1020,6 +1027,12 @@ static inline struct drbd_peer_device *first_peer_device(struct drbd_device *dev
|
||||
return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
|
||||
}
|
||||
|
||||
static inline struct drbd_peer_device *
|
||||
conn_peer_device(struct drbd_connection *connection, int volume_number)
|
||||
{
|
||||
return idr_find(&connection->peer_devices, volume_number);
|
||||
}
|
||||
|
||||
#define for_each_resource(resource, _resources) \
|
||||
list_for_each_entry(resource, _resources, resources)
|
||||
|
||||
@@ -1113,7 +1126,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
|
||||
extern int drbd_send_bitmap(struct drbd_device *device);
|
||||
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
|
||||
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
|
||||
extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
|
||||
extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
|
||||
extern void drbd_device_cleanup(struct drbd_device *device);
|
||||
void drbd_print_uuids(struct drbd_device *device, const char *text);
|
||||
|
||||
@@ -1424,7 +1437,7 @@ extern struct bio_set *drbd_md_io_bio_set;
|
||||
/* to allocate from that set */
|
||||
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
|
||||
|
||||
extern rwlock_t global_state_lock;
|
||||
extern struct mutex resources_mutex;
|
||||
|
||||
extern int conn_lowest_minor(struct drbd_connection *connection);
|
||||
extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
|
||||
@@ -1454,6 +1467,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
|
||||
|
||||
|
||||
/* drbd_nl.c */
|
||||
|
||||
extern struct mutex notification_mutex;
|
||||
|
||||
extern void drbd_suspend_io(struct drbd_device *device);
|
||||
extern void drbd_resume_io(struct drbd_device *device);
|
||||
extern char *ppsize(char *buf, unsigned long long size);
|
||||
@@ -1536,7 +1552,9 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
|
||||
|
||||
/* drbd_receiver.c */
|
||||
extern int drbd_receiver(struct drbd_thread *thi);
|
||||
extern int drbd_asender(struct drbd_thread *thi);
|
||||
extern int drbd_ack_receiver(struct drbd_thread *thi);
|
||||
extern void drbd_send_ping_wf(struct work_struct *ws);
|
||||
extern void drbd_send_acks_wf(struct work_struct *ws);
|
||||
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
|
||||
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
|
||||
bool throttle_if_app_is_waiting);
|
||||
@@ -1649,7 +1667,7 @@ extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int s
|
||||
#define drbd_rs_failed_io(device, sector, size) \
|
||||
__drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
|
||||
extern void drbd_al_shrink(struct drbd_device *device);
|
||||
extern int drbd_initialize_al(struct drbd_device *, void *);
|
||||
extern int drbd_al_initialize(struct drbd_device *, void *);
|
||||
|
||||
/* drbd_nl.c */
|
||||
/* state info broadcast */
|
||||
@@ -1668,6 +1686,29 @@ struct sib_info {
|
||||
};
|
||||
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
|
||||
|
||||
extern void notify_resource_state(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_resource *,
|
||||
struct resource_info *,
|
||||
enum drbd_notification_type);
|
||||
extern void notify_device_state(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_device *,
|
||||
struct device_info *,
|
||||
enum drbd_notification_type);
|
||||
extern void notify_connection_state(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_connection *,
|
||||
struct connection_info *,
|
||||
enum drbd_notification_type);
|
||||
extern void notify_peer_device_state(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_peer_device *,
|
||||
struct peer_device_info *,
|
||||
enum drbd_notification_type);
|
||||
extern void notify_helper(enum drbd_notification_type, struct drbd_device *,
|
||||
struct drbd_connection *, const char *, int);
|
||||
|
||||
/*
|
||||
* inline helper functions
|
||||
*************************/
|
||||
@@ -1694,19 +1735,6 @@ static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_r
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline enum drbd_state_rv
|
||||
_drbd_set_state(struct drbd_device *device, union drbd_state ns,
|
||||
enum chg_state_flags flags, struct completion *done)
|
||||
{
|
||||
enum drbd_state_rv rv;
|
||||
|
||||
read_lock(&global_state_lock);
|
||||
rv = __drbd_set_state(device, ns, flags, done);
|
||||
read_unlock(&global_state_lock);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static inline union drbd_state drbd_read_state(struct drbd_device *device)
|
||||
{
|
||||
struct drbd_resource *resource = device->resource;
|
||||
@@ -1937,16 +1965,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit)
|
||||
|
||||
extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
|
||||
|
||||
static inline void wake_asender(struct drbd_connection *connection)
|
||||
/* To get the ack_receiver out of the blocking network stack,
|
||||
* so it can change its sk_rcvtimeo from idle- to ping-timeout,
|
||||
* and send a ping, we need to send a signal.
|
||||
* Which signal we send is irrelevant. */
|
||||
static inline void wake_ack_receiver(struct drbd_connection *connection)
|
||||
{
|
||||
if (test_bit(SIGNAL_ASENDER, &connection->flags))
|
||||
force_sig(DRBD_SIG, connection->asender.task);
|
||||
struct task_struct *task = connection->ack_receiver.task;
|
||||
if (task && get_t_state(&connection->ack_receiver) == RUNNING)
|
||||
force_sig(SIGXCPU, task);
|
||||
}
|
||||
|
||||
static inline void request_ping(struct drbd_connection *connection)
|
||||
{
|
||||
set_bit(SEND_PING, &connection->flags);
|
||||
wake_asender(connection);
|
||||
wake_ack_receiver(connection);
|
||||
}
|
||||
|
||||
extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
|
||||
@@ -2230,7 +2263,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device)
|
||||
|
||||
if (drbd_suspended(device))
|
||||
return false;
|
||||
if (test_bit(SUSPEND_IO, &device->flags))
|
||||
if (atomic_read(&device->suspend_cnt))
|
||||
return false;
|
||||
|
||||
/* to avoid potential deadlock or bitmap corruption,
|
||||
|
||||
@@ -117,6 +117,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0
|
||||
*/
|
||||
struct idr drbd_devices;
|
||||
struct list_head drbd_resources;
|
||||
struct mutex resources_mutex;
|
||||
|
||||
struct kmem_cache *drbd_request_cache;
|
||||
struct kmem_cache *drbd_ee_cache; /* peer requests */
|
||||
@@ -1435,8 +1436,8 @@ static int we_should_drop_the_connection(struct drbd_connection *connection, str
|
||||
/* long elapsed = (long)(jiffies - device->last_received); */
|
||||
|
||||
drop_it = connection->meta.socket == sock
|
||||
|| !connection->asender.task
|
||||
|| get_t_state(&connection->asender) != RUNNING
|
||||
|| !connection->ack_receiver.task
|
||||
|| get_t_state(&connection->ack_receiver) != RUNNING
|
||||
|| connection->cstate < C_WF_REPORT_PARAMS;
|
||||
|
||||
if (drop_it)
|
||||
@@ -1793,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
|
||||
drbd_update_congested(connection);
|
||||
}
|
||||
do {
|
||||
/* STRANGE
|
||||
* tcp_sendmsg does _not_ use its size parameter at all ?
|
||||
*
|
||||
* -EAGAIN on timeout, -EINTR on signal.
|
||||
*/
|
||||
/* THINK
|
||||
* do we need to block DRBD_SIG if sock == &meta.socket ??
|
||||
* otherwise wake_asender() might interrupt some send_*Ack !
|
||||
*/
|
||||
rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
|
||||
if (rv == -EAGAIN) {
|
||||
if (we_should_drop_the_connection(connection, sock))
|
||||
@@ -2000,7 +1992,7 @@ void drbd_device_cleanup(struct drbd_device *device)
|
||||
drbd_bm_cleanup(device);
|
||||
}
|
||||
|
||||
drbd_free_ldev(device->ldev);
|
||||
drbd_backing_dev_free(device, device->ldev);
|
||||
device->ldev = NULL;
|
||||
|
||||
clear_bit(AL_SUSPENDED, &device->flags);
|
||||
@@ -2179,7 +2171,7 @@ void drbd_destroy_device(struct kref *kref)
|
||||
if (device->this_bdev)
|
||||
bdput(device->this_bdev);
|
||||
|
||||
drbd_free_ldev(device->ldev);
|
||||
drbd_backing_dev_free(device, device->ldev);
|
||||
device->ldev = NULL;
|
||||
|
||||
drbd_release_all_peer_reqs(device);
|
||||
@@ -2563,7 +2555,7 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
|
||||
cpumask_copy(resource->cpu_mask, new_cpu_mask);
|
||||
for_each_connection_rcu(connection, resource) {
|
||||
connection->receiver.reset_cpu_mask = 1;
|
||||
connection->asender.reset_cpu_mask = 1;
|
||||
connection->ack_receiver.reset_cpu_mask = 1;
|
||||
connection->worker.reset_cpu_mask = 1;
|
||||
}
|
||||
}
|
||||
@@ -2590,7 +2582,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
|
||||
kref_init(&resource->kref);
|
||||
idr_init(&resource->devices);
|
||||
INIT_LIST_HEAD(&resource->connections);
|
||||
resource->write_ordering = WO_bdev_flush;
|
||||
resource->write_ordering = WO_BDEV_FLUSH;
|
||||
list_add_tail_rcu(&resource->resources, &drbd_resources);
|
||||
mutex_init(&resource->conf_update);
|
||||
mutex_init(&resource->adm_mutex);
|
||||
@@ -2652,8 +2644,8 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
|
||||
connection->receiver.connection = connection;
|
||||
drbd_thread_init(resource, &connection->worker, drbd_worker, "worker");
|
||||
connection->worker.connection = connection;
|
||||
drbd_thread_init(resource, &connection->asender, drbd_asender, "asender");
|
||||
connection->asender.connection = connection;
|
||||
drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv");
|
||||
connection->ack_receiver.connection = connection;
|
||||
|
||||
kref_init(&connection->kref);
|
||||
|
||||
@@ -2702,8 +2694,8 @@ static int init_submitter(struct drbd_device *device)
|
||||
{
|
||||
/* opencoded create_singlethread_workqueue(),
|
||||
* to be able to say "drbd%d", ..., minor */
|
||||
device->submit.wq = alloc_workqueue("drbd%u_submit",
|
||||
WQ_UNBOUND | WQ_MEM_RECLAIM, 1, device->minor);
|
||||
device->submit.wq =
|
||||
alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor);
|
||||
if (!device->submit.wq)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -2820,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
goto out_idr_remove_from_resource;
|
||||
}
|
||||
kref_get(&connection->kref);
|
||||
INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
|
||||
}
|
||||
|
||||
if (init_submitter(device)) {
|
||||
@@ -2923,7 +2916,7 @@ static int __init drbd_init(void)
|
||||
drbd_proc = NULL; /* play safe for drbd_cleanup */
|
||||
idr_init(&drbd_devices);
|
||||
|
||||
rwlock_init(&global_state_lock);
|
||||
mutex_init(&resources_mutex);
|
||||
INIT_LIST_HEAD(&drbd_resources);
|
||||
|
||||
err = drbd_genl_register();
|
||||
@@ -2971,18 +2964,6 @@ fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
void drbd_free_ldev(struct drbd_backing_dev *ldev)
|
||||
{
|
||||
if (ldev == NULL)
|
||||
return;
|
||||
|
||||
blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
|
||||
blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
|
||||
|
||||
kfree(ldev->disk_conf);
|
||||
kfree(ldev);
|
||||
}
|
||||
|
||||
static void drbd_free_one_sock(struct drbd_socket *ds)
|
||||
{
|
||||
struct socket *s;
|
||||
@@ -3277,6 +3258,10 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
|
||||
* and read it. */
|
||||
bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
|
||||
bdev->md.md_offset = drbd_md_ss(bdev);
|
||||
/* Even for (flexible or indexed) external meta data,
|
||||
* initially restrict us to the 4k superblock for now.
|
||||
* Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
|
||||
bdev->md.md_size_sect = 8;
|
||||
|
||||
if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) {
|
||||
/* NOTE: can't do normal error processing here as this is
|
||||
@@ -3578,7 +3563,9 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
|
||||
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
set_bit(BITMAP_IO, &device->flags);
|
||||
if (atomic_read(&device->ap_bio_cnt) == 0) {
|
||||
/* don't wait for pending application IO if the caller indicates that
|
||||
* application IO does not conflict anyways. */
|
||||
if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
|
||||
if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
|
||||
drbd_queue_work(&first_peer_device(device)->connection->sender_work,
|
||||
&device->bm_io_work.w);
|
||||
@@ -3746,6 +3733,27 @@ int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void lock_all_resources(void)
|
||||
{
|
||||
struct drbd_resource *resource;
|
||||
int __maybe_unused i = 0;
|
||||
|
||||
mutex_lock(&resources_mutex);
|
||||
local_irq_disable();
|
||||
for_each_resource(resource, &drbd_resources)
|
||||
spin_lock_nested(&resource->req_lock, i++);
|
||||
}
|
||||
|
||||
void unlock_all_resources(void)
|
||||
{
|
||||
struct drbd_resource *resource;
|
||||
|
||||
for_each_resource(resource, &drbd_resources)
|
||||
spin_unlock(&resource->req_lock);
|
||||
local_irq_enable();
|
||||
mutex_unlock(&resources_mutex);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DRBD_FAULT_INJECTION
|
||||
/* Fault insertion support including random number generator shamelessly
|
||||
* stolen from kernel/rcutorture.c */
|
||||
|
||||
+1247
-114
File diff suppressed because it is too large
Load Diff
@@ -245,9 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
|
||||
char wp;
|
||||
|
||||
static char write_ordering_chars[] = {
|
||||
[WO_none] = 'n',
|
||||
[WO_drain_io] = 'd',
|
||||
[WO_bdev_flush] = 'f',
|
||||
[WO_NONE] = 'n',
|
||||
[WO_DRAIN_IO] = 'd',
|
||||
[WO_BDEV_FLUSH] = 'f',
|
||||
};
|
||||
|
||||
seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
|
||||
|
||||
@@ -23,7 +23,7 @@ enum drbd_packet {
|
||||
P_AUTH_RESPONSE = 0x11,
|
||||
P_STATE_CHG_REQ = 0x12,
|
||||
|
||||
/* asender (meta socket */
|
||||
/* (meta socket) */
|
||||
P_PING = 0x13,
|
||||
P_PING_ACK = 0x14,
|
||||
P_RECV_ACK = 0x15, /* Used in protocol B */
|
||||
|
||||
+143
-119
@@ -215,7 +215,7 @@ static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
|
||||
}
|
||||
}
|
||||
|
||||
static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
|
||||
static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
|
||||
{
|
||||
LIST_HEAD(reclaimed);
|
||||
struct drbd_peer_request *peer_req, *t;
|
||||
@@ -223,11 +223,30 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
reclaim_finished_net_peer_reqs(device, &reclaimed);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
|
||||
list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
|
||||
drbd_free_net_peer_req(device, peer_req);
|
||||
}
|
||||
|
||||
static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
|
||||
{
|
||||
struct drbd_peer_device *peer_device;
|
||||
int vnr;
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
|
||||
struct drbd_device *device = peer_device->device;
|
||||
if (!atomic_read(&device->pp_in_use_by_net))
|
||||
continue;
|
||||
|
||||
kref_get(&device->kref);
|
||||
rcu_read_unlock();
|
||||
drbd_reclaim_net_peer_reqs(device);
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
|
||||
* @device: DRBD device.
|
||||
@@ -265,10 +284,15 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
|
||||
if (atomic_read(&device->pp_in_use) < mxb)
|
||||
page = __drbd_alloc_pages(device, number);
|
||||
|
||||
/* Try to keep the fast path fast, but occasionally we need
|
||||
* to reclaim the pages we lended to the network stack. */
|
||||
if (page && atomic_read(&device->pp_in_use_by_net) > 512)
|
||||
drbd_reclaim_net_peer_reqs(device);
|
||||
|
||||
while (page == NULL) {
|
||||
prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
drbd_kick_lo_and_reclaim_net(device);
|
||||
drbd_reclaim_net_peer_reqs(device);
|
||||
|
||||
if (atomic_read(&device->pp_in_use) < mxb) {
|
||||
page = __drbd_alloc_pages(device, number);
|
||||
@@ -1099,7 +1123,15 @@ randomize:
|
||||
return 0;
|
||||
}
|
||||
|
||||
drbd_thread_start(&connection->asender);
|
||||
drbd_thread_start(&connection->ack_receiver);
|
||||
/* opencoded create_singlethread_workqueue(),
|
||||
* to be able to use format string arguments */
|
||||
connection->ack_sender =
|
||||
alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
|
||||
if (!connection->ack_sender) {
|
||||
drbd_err(connection, "Failed to create workqueue ack_sender\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&connection->resource->conf_update);
|
||||
/* The discard_my_data flag is a single-shot modifier to the next
|
||||
@@ -1178,7 +1210,7 @@ static void drbd_flush(struct drbd_connection *connection)
|
||||
struct drbd_peer_device *peer_device;
|
||||
int vnr;
|
||||
|
||||
if (connection->resource->write_ordering >= WO_bdev_flush) {
|
||||
if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
|
||||
struct drbd_device *device = peer_device->device;
|
||||
@@ -1203,7 +1235,7 @@ static void drbd_flush(struct drbd_connection *connection)
|
||||
/* would rather check on EOPNOTSUPP, but that is not reliable.
|
||||
* don't try again for ANY return value != 0
|
||||
* if (rv == -EOPNOTSUPP) */
|
||||
drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
|
||||
drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
|
||||
}
|
||||
put_ldev(device);
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
@@ -1299,10 +1331,10 @@ max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
|
||||
|
||||
dc = rcu_dereference(bdev->disk_conf);
|
||||
|
||||
if (wo == WO_bdev_flush && !dc->disk_flushes)
|
||||
wo = WO_drain_io;
|
||||
if (wo == WO_drain_io && !dc->disk_drain)
|
||||
wo = WO_none;
|
||||
if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
|
||||
wo = WO_DRAIN_IO;
|
||||
if (wo == WO_DRAIN_IO && !dc->disk_drain)
|
||||
wo = WO_NONE;
|
||||
|
||||
return wo;
|
||||
}
|
||||
@@ -1319,13 +1351,13 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
|
||||
enum write_ordering_e pwo;
|
||||
int vnr;
|
||||
static char *write_ordering_str[] = {
|
||||
[WO_none] = "none",
|
||||
[WO_drain_io] = "drain",
|
||||
[WO_bdev_flush] = "flush",
|
||||
[WO_NONE] = "none",
|
||||
[WO_DRAIN_IO] = "drain",
|
||||
[WO_BDEV_FLUSH] = "flush",
|
||||
};
|
||||
|
||||
pwo = resource->write_ordering;
|
||||
if (wo != WO_bdev_flush)
|
||||
if (wo != WO_BDEV_FLUSH)
|
||||
wo = min(pwo, wo);
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&resource->devices, device, vnr) {
|
||||
@@ -1343,7 +1375,7 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
|
||||
rcu_read_unlock();
|
||||
|
||||
resource->write_ordering = wo;
|
||||
if (pwo != resource->write_ordering || wo == WO_bdev_flush)
|
||||
if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
|
||||
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
|
||||
}
|
||||
|
||||
@@ -1380,7 +1412,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
|
||||
if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
|
||||
/* wait for all pending IO completions, before we start
|
||||
* zeroing things out. */
|
||||
conn_wait_active_ee_empty(first_peer_device(device)->connection);
|
||||
conn_wait_active_ee_empty(peer_req->peer_device->connection);
|
||||
/* add it to the active list now,
|
||||
* so we can find it to present it in debugfs */
|
||||
peer_req->submit_jif = jiffies;
|
||||
@@ -1508,12 +1540,6 @@ static void conn_wait_active_ee_empty(struct drbd_connection *connection)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static struct drbd_peer_device *
|
||||
conn_peer_device(struct drbd_connection *connection, int volume_number)
|
||||
{
|
||||
return idr_find(&connection->peer_devices, volume_number);
|
||||
}
|
||||
|
||||
static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
|
||||
{
|
||||
int rv;
|
||||
@@ -1533,7 +1559,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
|
||||
* Therefore we must send the barrier_ack after the barrier request was
|
||||
* completed. */
|
||||
switch (connection->resource->write_ordering) {
|
||||
case WO_none:
|
||||
case WO_NONE:
|
||||
if (rv == FE_RECYCLED)
|
||||
return 0;
|
||||
|
||||
@@ -1546,8 +1572,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
|
||||
drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
|
||||
/* Fall through */
|
||||
|
||||
case WO_bdev_flush:
|
||||
case WO_drain_io:
|
||||
case WO_BDEV_FLUSH:
|
||||
case WO_DRAIN_IO:
|
||||
conn_wait_active_ee_empty(connection);
|
||||
drbd_flush(connection);
|
||||
|
||||
@@ -1752,7 +1778,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
|
||||
}
|
||||
|
||||
/*
|
||||
* e_end_resync_block() is called in asender context via
|
||||
* e_end_resync_block() is called in ack_sender context via
|
||||
* drbd_finish_peer_reqs().
|
||||
*/
|
||||
static int e_end_resync_block(struct drbd_work *w, int unused)
|
||||
@@ -1926,7 +1952,7 @@ static void restart_conflicting_writes(struct drbd_device *device,
|
||||
}
|
||||
|
||||
/*
|
||||
* e_end_block() is called in asender context via drbd_finish_peer_reqs().
|
||||
* e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
|
||||
*/
|
||||
static int e_end_block(struct drbd_work *w, int cancel)
|
||||
{
|
||||
@@ -1966,7 +1992,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
|
||||
} else
|
||||
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
|
||||
|
||||
drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
|
||||
drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -2098,7 +2124,7 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
|
||||
tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!tp)
|
||||
@@ -2217,7 +2243,7 @@ static int handle_write_conflicts(struct drbd_device *device,
|
||||
peer_req->w.cb = superseded ? e_send_superseded :
|
||||
e_send_retry_write;
|
||||
list_add_tail(&peer_req->w.list, &device->done_ee);
|
||||
wake_asender(connection);
|
||||
queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
|
||||
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
@@ -2364,7 +2390,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
|
||||
if (dp_flags & DP_SEND_RECEIVE_ACK) {
|
||||
/* I really don't like it that the receiver thread
|
||||
* sends on the msock, but anyways */
|
||||
drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
|
||||
drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
|
||||
}
|
||||
|
||||
if (tp) {
|
||||
@@ -4056,7 +4082,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
|
||||
os = ns = drbd_read_state(device);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
|
||||
/* If some other part of the code (asender thread, timeout)
|
||||
/* If some other part of the code (ack_receiver thread, timeout)
|
||||
* already decided to close the connection again,
|
||||
* we must not "re-establish" it here. */
|
||||
if (os.conn <= C_TEAR_DOWN)
|
||||
@@ -4661,8 +4687,12 @@ static void conn_disconnect(struct drbd_connection *connection)
|
||||
*/
|
||||
conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
|
||||
|
||||
/* asender does not clean up anything. it must not interfere, either */
|
||||
drbd_thread_stop(&connection->asender);
|
||||
/* ack_receiver does not clean up anything. it must not interfere, either */
|
||||
drbd_thread_stop(&connection->ack_receiver);
|
||||
if (connection->ack_sender) {
|
||||
destroy_workqueue(connection->ack_sender);
|
||||
connection->ack_sender = NULL;
|
||||
}
|
||||
drbd_free_sock(connection);
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -5431,49 +5461,39 @@ static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int connection_finish_peer_reqs(struct drbd_connection *connection)
|
||||
{
|
||||
struct drbd_peer_device *peer_device;
|
||||
int vnr, not_empty = 0;
|
||||
|
||||
do {
|
||||
clear_bit(SIGNAL_ASENDER, &connection->flags);
|
||||
flush_signals(current);
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
|
||||
struct drbd_device *device = peer_device->device;
|
||||
kref_get(&device->kref);
|
||||
rcu_read_unlock();
|
||||
if (drbd_finish_peer_reqs(device)) {
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
return 1;
|
||||
}
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
rcu_read_lock();
|
||||
}
|
||||
set_bit(SIGNAL_ASENDER, &connection->flags);
|
||||
|
||||
spin_lock_irq(&connection->resource->req_lock);
|
||||
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
|
||||
struct drbd_device *device = peer_device->device;
|
||||
not_empty = !list_empty(&device->done_ee);
|
||||
if (not_empty)
|
||||
break;
|
||||
}
|
||||
spin_unlock_irq(&connection->resource->req_lock);
|
||||
rcu_read_unlock();
|
||||
} while (not_empty);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct asender_cmd {
|
||||
struct meta_sock_cmd {
|
||||
size_t pkt_size;
|
||||
int (*fn)(struct drbd_connection *connection, struct packet_info *);
|
||||
};
|
||||
|
||||
static struct asender_cmd asender_tbl[] = {
|
||||
static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
|
||||
{
|
||||
long t;
|
||||
struct net_conf *nc;
|
||||
|
||||
rcu_read_lock();
|
||||
nc = rcu_dereference(connection->net_conf);
|
||||
t = ping_timeout ? nc->ping_timeo : nc->ping_int;
|
||||
rcu_read_unlock();
|
||||
|
||||
t *= HZ;
|
||||
if (ping_timeout)
|
||||
t /= 10;
|
||||
|
||||
connection->meta.socket->sk->sk_rcvtimeo = t;
|
||||
}
|
||||
|
||||
static void set_ping_timeout(struct drbd_connection *connection)
|
||||
{
|
||||
set_rcvtimeo(connection, 1);
|
||||
}
|
||||
|
||||
static void set_idle_timeout(struct drbd_connection *connection)
|
||||
{
|
||||
set_rcvtimeo(connection, 0);
|
||||
}
|
||||
|
||||
static struct meta_sock_cmd ack_receiver_tbl[] = {
|
||||
[P_PING] = { 0, got_Ping },
|
||||
[P_PING_ACK] = { 0, got_PingAck },
|
||||
[P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
|
||||
@@ -5493,64 +5513,40 @@ static struct asender_cmd asender_tbl[] = {
|
||||
[P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
|
||||
};
|
||||
|
||||
int drbd_asender(struct drbd_thread *thi)
|
||||
int drbd_ack_receiver(struct drbd_thread *thi)
|
||||
{
|
||||
struct drbd_connection *connection = thi->connection;
|
||||
struct asender_cmd *cmd = NULL;
|
||||
struct meta_sock_cmd *cmd = NULL;
|
||||
struct packet_info pi;
|
||||
unsigned long pre_recv_jif;
|
||||
int rv;
|
||||
void *buf = connection->meta.rbuf;
|
||||
int received = 0;
|
||||
unsigned int header_size = drbd_header_size(connection);
|
||||
int expect = header_size;
|
||||
bool ping_timeout_active = false;
|
||||
struct net_conf *nc;
|
||||
int ping_timeo, tcp_cork, ping_int;
|
||||
struct sched_param param = { .sched_priority = 2 };
|
||||
|
||||
rv = sched_setscheduler(current, SCHED_RR, ¶m);
|
||||
if (rv < 0)
|
||||
drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
|
||||
drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
|
||||
|
||||
while (get_t_state(thi) == RUNNING) {
|
||||
drbd_thread_current_set_cpu(thi);
|
||||
|
||||
rcu_read_lock();
|
||||
nc = rcu_dereference(connection->net_conf);
|
||||
ping_timeo = nc->ping_timeo;
|
||||
tcp_cork = nc->tcp_cork;
|
||||
ping_int = nc->ping_int;
|
||||
rcu_read_unlock();
|
||||
conn_reclaim_net_peer_reqs(connection);
|
||||
|
||||
if (test_and_clear_bit(SEND_PING, &connection->flags)) {
|
||||
if (drbd_send_ping(connection)) {
|
||||
drbd_err(connection, "drbd_send_ping has failed\n");
|
||||
goto reconnect;
|
||||
}
|
||||
connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
|
||||
set_ping_timeout(connection);
|
||||
ping_timeout_active = true;
|
||||
}
|
||||
|
||||
/* TODO: conditionally cork; it may hurt latency if we cork without
|
||||
much to send */
|
||||
if (tcp_cork)
|
||||
drbd_tcp_cork(connection->meta.socket);
|
||||
if (connection_finish_peer_reqs(connection)) {
|
||||
drbd_err(connection, "connection_finish_peer_reqs() failed\n");
|
||||
goto reconnect;
|
||||
}
|
||||
/* but unconditionally uncork unless disabled */
|
||||
if (tcp_cork)
|
||||
drbd_tcp_uncork(connection->meta.socket);
|
||||
|
||||
/* short circuit, recv_msg would return EINTR anyways. */
|
||||
if (signal_pending(current))
|
||||
continue;
|
||||
|
||||
pre_recv_jif = jiffies;
|
||||
rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
|
||||
clear_bit(SIGNAL_ASENDER, &connection->flags);
|
||||
|
||||
flush_signals(current);
|
||||
|
||||
/* Note:
|
||||
* -EINTR (on meta) we got a signal
|
||||
@@ -5562,7 +5558,6 @@ int drbd_asender(struct drbd_thread *thi)
|
||||
* rv < expected: "woken" by signal during receive
|
||||
* rv == 0 : "connection shut down by peer"
|
||||
*/
|
||||
received_more:
|
||||
if (likely(rv > 0)) {
|
||||
received += rv;
|
||||
buf += rv;
|
||||
@@ -5584,8 +5579,7 @@ received_more:
|
||||
} else if (rv == -EAGAIN) {
|
||||
/* If the data socket received something meanwhile,
|
||||
* that is good enough: peer is still alive. */
|
||||
if (time_after(connection->last_received,
|
||||
jiffies - connection->meta.socket->sk->sk_rcvtimeo))
|
||||
if (time_after(connection->last_received, pre_recv_jif))
|
||||
continue;
|
||||
if (ping_timeout_active) {
|
||||
drbd_err(connection, "PingAck did not arrive in time.\n");
|
||||
@@ -5594,6 +5588,10 @@ received_more:
|
||||
set_bit(SEND_PING, &connection->flags);
|
||||
continue;
|
||||
} else if (rv == -EINTR) {
|
||||
/* maybe drbd_thread_stop(): the while condition will notice.
|
||||
* maybe woken for send_ping: we'll send a ping above,
|
||||
* and change the rcvtimeo */
|
||||
flush_signals(current);
|
||||
continue;
|
||||
} else {
|
||||
drbd_err(connection, "sock_recvmsg returned %d\n", rv);
|
||||
@@ -5603,8 +5601,8 @@ received_more:
|
||||
if (received == expect && cmd == NULL) {
|
||||
if (decode_header(connection, connection->meta.rbuf, &pi))
|
||||
goto reconnect;
|
||||
cmd = &asender_tbl[pi.cmd];
|
||||
if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
|
||||
cmd = &ack_receiver_tbl[pi.cmd];
|
||||
if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
|
||||
drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
|
||||
cmdname(pi.cmd), pi.cmd);
|
||||
goto disconnect;
|
||||
@@ -5627,9 +5625,8 @@ received_more:
|
||||
|
||||
connection->last_received = jiffies;
|
||||
|
||||
if (cmd == &asender_tbl[P_PING_ACK]) {
|
||||
/* restore idle timeout */
|
||||
connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
|
||||
if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
|
||||
set_idle_timeout(connection);
|
||||
ping_timeout_active = false;
|
||||
}
|
||||
|
||||
@@ -5638,11 +5635,6 @@ received_more:
|
||||
expect = header_size;
|
||||
cmd = NULL;
|
||||
}
|
||||
if (test_bit(SEND_PING, &connection->flags))
|
||||
continue;
|
||||
rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT);
|
||||
if (rv > 0)
|
||||
goto received_more;
|
||||
}
|
||||
|
||||
if (0) {
|
||||
@@ -5654,9 +5646,41 @@ reconnect:
|
||||
disconnect:
|
||||
conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
|
||||
}
|
||||
clear_bit(SIGNAL_ASENDER, &connection->flags);
|
||||
|
||||
drbd_info(connection, "asender terminated\n");
|
||||
drbd_info(connection, "ack_receiver terminated\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void drbd_send_acks_wf(struct work_struct *ws)
|
||||
{
|
||||
struct drbd_peer_device *peer_device =
|
||||
container_of(ws, struct drbd_peer_device, send_acks_work);
|
||||
struct drbd_connection *connection = peer_device->connection;
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct net_conf *nc;
|
||||
int tcp_cork, err;
|
||||
|
||||
rcu_read_lock();
|
||||
nc = rcu_dereference(connection->net_conf);
|
||||
tcp_cork = nc->tcp_cork;
|
||||
rcu_read_unlock();
|
||||
|
||||
if (tcp_cork)
|
||||
drbd_tcp_cork(connection->meta.socket);
|
||||
|
||||
err = drbd_finish_peer_reqs(device);
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
/* get is in drbd_endio_write_sec_final(). That is necessary to keep the
|
||||
struct work_struct send_acks_work alive, which is in the peer_device object */
|
||||
|
||||
if (err) {
|
||||
conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
|
||||
return;
|
||||
}
|
||||
|
||||
if (tcp_cork)
|
||||
drbd_tcp_uncork(connection->meta.socket);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
+117
-30
@@ -453,12 +453,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
|
||||
kref_get(&req->kref); /* wait for the DONE */
|
||||
|
||||
if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
|
||||
/* potentially already completed in the asender thread */
|
||||
/* potentially already completed in the ack_receiver thread */
|
||||
if (!(s & RQ_NET_DONE)) {
|
||||
atomic_add(req->i.size >> 9, &device->ap_in_flight);
|
||||
set_if_null_req_not_net_done(peer_device, req);
|
||||
}
|
||||
if (s & RQ_NET_PENDING)
|
||||
if (req->rq_state & RQ_NET_PENDING)
|
||||
set_if_null_req_ack_pending(peer_device, req);
|
||||
}
|
||||
|
||||
@@ -1095,6 +1095,24 @@ static bool do_remote_read(struct drbd_request *req)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool drbd_should_do_remote(union drbd_dev_state s)
|
||||
{
|
||||
return s.pdsk == D_UP_TO_DATE ||
|
||||
(s.pdsk >= D_INCONSISTENT &&
|
||||
s.conn >= C_WF_BITMAP_T &&
|
||||
s.conn < C_AHEAD);
|
||||
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
|
||||
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
|
||||
states. */
|
||||
}
|
||||
|
||||
static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
|
||||
{
|
||||
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
|
||||
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
|
||||
since we enter state C_AHEAD only if proto >= 96 */
|
||||
}
|
||||
|
||||
/* returns number of connections (== 1, for drbd 8.4)
|
||||
* expected to actually write this data,
|
||||
* which does NOT include those that we are L_AHEAD for. */
|
||||
@@ -1149,7 +1167,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
|
||||
* stable storage, and this is a WRITE, we may not even submit
|
||||
* this bio. */
|
||||
if (get_ldev(device)) {
|
||||
req->pre_submit_jif = jiffies;
|
||||
if (drbd_insert_fault(device,
|
||||
rw == WRITE ? DRBD_FAULT_DT_WR
|
||||
: rw == READ ? DRBD_FAULT_DT_RD
|
||||
@@ -1293,6 +1310,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
|
||||
&device->pending_master_completion[rw == WRITE]);
|
||||
if (req->private_bio) {
|
||||
/* needs to be marked within the same spinlock */
|
||||
req->pre_submit_jif = jiffies;
|
||||
list_add_tail(&req->req_pending_local,
|
||||
&device->pending_completion[rw == WRITE]);
|
||||
_req_mod(req, TO_BE_SUBMITTED);
|
||||
@@ -1513,6 +1531,78 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
static bool net_timeout_reached(struct drbd_request *net_req,
|
||||
struct drbd_connection *connection,
|
||||
unsigned long now, unsigned long ent,
|
||||
unsigned int ko_count, unsigned int timeout)
|
||||
{
|
||||
struct drbd_device *device = net_req->device;
|
||||
|
||||
if (!time_after(now, net_req->pre_send_jif + ent))
|
||||
return false;
|
||||
|
||||
if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
|
||||
return false;
|
||||
|
||||
if (net_req->rq_state & RQ_NET_PENDING) {
|
||||
drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
|
||||
jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* We received an ACK already (or are using protocol A),
|
||||
* but are waiting for the epoch closing barrier ack.
|
||||
* Check if we sent the barrier already. We should not blame the peer
|
||||
* for being unresponsive, if we did not even ask it yet. */
|
||||
if (net_req->epoch == connection->send.current_epoch_nr) {
|
||||
drbd_warn(device,
|
||||
"We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
|
||||
jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Worst case: we may have been blocked for whatever reason, then
|
||||
* suddenly are able to send a lot of requests (and epoch separating
|
||||
* barriers) in quick succession.
|
||||
* The timestamp of the net_req may be much too old and not correspond
|
||||
* to the sending time of the relevant unack'ed barrier packet, so
|
||||
* would trigger a spurious timeout. The latest barrier packet may
|
||||
* have a too recent timestamp to trigger the timeout, potentially miss
|
||||
* a timeout. Right now we don't have a place to conveniently store
|
||||
* these timestamps.
|
||||
* But in this particular situation, the application requests are still
|
||||
* completed to upper layers, DRBD should still "feel" responsive.
|
||||
* No need yet to kill this connection, it may still recover.
|
||||
* If not, eventually we will have queued enough into the network for
|
||||
* us to block. From that point of view, the timestamp of the last sent
|
||||
* barrier packet is relevant enough.
|
||||
*/
|
||||
if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
|
||||
drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
|
||||
connection->send.last_sent_barrier_jif, now,
|
||||
jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* A request is considered timed out, if
|
||||
* - we have some effective timeout from the configuration,
|
||||
* with some state restrictions applied,
|
||||
* - the oldest request is waiting for a response from the network
|
||||
* resp. the local disk,
|
||||
* - the oldest request is in fact older than the effective timeout,
|
||||
* - the connection was established (resp. disk was attached)
|
||||
* for longer than the timeout already.
|
||||
* Note that for 32bit jiffies and very stable connections/disks,
|
||||
* we may have a wrap around, which is catched by
|
||||
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
|
||||
*
|
||||
* Side effect: once per 32bit wrap-around interval, which means every
|
||||
* ~198 days with 250 HZ, we have a window where the timeout would need
|
||||
* to expire twice (worst case) to become effective. Good enough.
|
||||
*/
|
||||
|
||||
void request_timer_fn(unsigned long data)
|
||||
{
|
||||
struct drbd_device *device = (struct drbd_device *) data;
|
||||
@@ -1522,11 +1612,14 @@ void request_timer_fn(unsigned long data)
|
||||
unsigned long oldest_submit_jif;
|
||||
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
|
||||
unsigned long now;
|
||||
unsigned int ko_count = 0, timeout = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
nc = rcu_dereference(connection->net_conf);
|
||||
if (nc && device->state.conn >= C_WF_REPORT_PARAMS)
|
||||
ent = nc->timeout * HZ/10 * nc->ko_count;
|
||||
if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
|
||||
ko_count = nc->ko_count;
|
||||
timeout = nc->timeout;
|
||||
}
|
||||
|
||||
if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
|
||||
dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
|
||||
@@ -1534,6 +1627,8 @@ void request_timer_fn(unsigned long data)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
|
||||
ent = timeout * HZ/10 * ko_count;
|
||||
et = min_not_zero(dt, ent);
|
||||
|
||||
if (!et)
|
||||
@@ -1545,11 +1640,22 @@ void request_timer_fn(unsigned long data)
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
|
||||
req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
|
||||
req_peer = connection->req_not_net_done;
|
||||
|
||||
/* maybe the oldest request waiting for the peer is in fact still
|
||||
* blocking in tcp sendmsg */
|
||||
if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
|
||||
req_peer = connection->req_next;
|
||||
* blocking in tcp sendmsg. That's ok, though, that's handled via the
|
||||
* socket send timeout, requesting a ping, and bumping ko-count in
|
||||
* we_should_drop_the_connection().
|
||||
*/
|
||||
|
||||
/* check the oldest request we did successfully sent,
|
||||
* but which is still waiting for an ACK. */
|
||||
req_peer = connection->req_ack_pending;
|
||||
|
||||
/* if we don't have such request (e.g. protocoll A)
|
||||
* check the oldest requests which is still waiting on its epoch
|
||||
* closing barrier ack. */
|
||||
if (!req_peer)
|
||||
req_peer = connection->req_not_net_done;
|
||||
|
||||
/* evaluate the oldest peer request only in one timer! */
|
||||
if (req_peer && req_peer->device != device)
|
||||
@@ -1566,28 +1672,9 @@ void request_timer_fn(unsigned long data)
|
||||
: req_write ? req_write->pre_submit_jif
|
||||
: req_read ? req_read->pre_submit_jif : now;
|
||||
|
||||
/* The request is considered timed out, if
|
||||
* - we have some effective timeout from the configuration,
|
||||
* with above state restrictions applied,
|
||||
* - the oldest request is waiting for a response from the network
|
||||
* resp. the local disk,
|
||||
* - the oldest request is in fact older than the effective timeout,
|
||||
* - the connection was established (resp. disk was attached)
|
||||
* for longer than the timeout already.
|
||||
* Note that for 32bit jiffies and very stable connections/disks,
|
||||
* we may have a wrap around, which is catched by
|
||||
* !time_in_range(now, last_..._jif, last_..._jif + timeout).
|
||||
*
|
||||
* Side effect: once per 32bit wrap-around interval, which means every
|
||||
* ~198 days with 250 HZ, we have a window where the timeout would need
|
||||
* to expire twice (worst case) to become effective. Good enough.
|
||||
*/
|
||||
if (ent && req_peer &&
|
||||
time_after(now, req_peer->pre_send_jif + ent) &&
|
||||
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
|
||||
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
|
||||
if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
|
||||
_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
|
||||
}
|
||||
|
||||
if (dt && oldest_submit_jif != now &&
|
||||
time_after(now, oldest_submit_jif + dt) &&
|
||||
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
|
||||
|
||||
@@ -331,21 +331,6 @@ static inline int req_mod(struct drbd_request *req,
|
||||
return rv;
|
||||
}
|
||||
|
||||
static inline bool drbd_should_do_remote(union drbd_dev_state s)
|
||||
{
|
||||
return s.pdsk == D_UP_TO_DATE ||
|
||||
(s.pdsk >= D_INCONSISTENT &&
|
||||
s.conn >= C_WF_BITMAP_T &&
|
||||
s.conn < C_AHEAD);
|
||||
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
|
||||
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
|
||||
states. */
|
||||
}
|
||||
static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
|
||||
{
|
||||
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
|
||||
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
|
||||
since we enter state C_AHEAD only if proto >= 96 */
|
||||
}
|
||||
extern bool drbd_should_do_remote(union drbd_dev_state);
|
||||
|
||||
#endif
|
||||
|
||||
+416
-12
File diff suppressed because it is too large
Load Diff
@@ -122,9 +122,9 @@ extern enum drbd_state_rv
|
||||
_drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state,
|
||||
union drbd_state, enum chg_state_flags);
|
||||
|
||||
extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state,
|
||||
enum chg_state_flags,
|
||||
struct completion *done);
|
||||
extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state,
|
||||
enum chg_state_flags,
|
||||
struct completion *done);
|
||||
extern void print_st_err(struct drbd_device *, union drbd_state,
|
||||
union drbd_state, int);
|
||||
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
#ifndef DRBD_STATE_CHANGE_H
|
||||
#define DRBD_STATE_CHANGE_H
|
||||
|
||||
struct drbd_resource_state_change {
|
||||
struct drbd_resource *resource;
|
||||
enum drbd_role role[2];
|
||||
bool susp[2];
|
||||
bool susp_nod[2];
|
||||
bool susp_fen[2];
|
||||
};
|
||||
|
||||
struct drbd_device_state_change {
|
||||
struct drbd_device *device;
|
||||
enum drbd_disk_state disk_state[2];
|
||||
};
|
||||
|
||||
struct drbd_connection_state_change {
|
||||
struct drbd_connection *connection;
|
||||
enum drbd_conns cstate[2]; /* drbd9: enum drbd_conn_state */
|
||||
enum drbd_role peer_role[2];
|
||||
};
|
||||
|
||||
struct drbd_peer_device_state_change {
|
||||
struct drbd_peer_device *peer_device;
|
||||
enum drbd_disk_state disk_state[2];
|
||||
enum drbd_conns repl_state[2]; /* drbd9: enum drbd_repl_state */
|
||||
bool resync_susp_user[2];
|
||||
bool resync_susp_peer[2];
|
||||
bool resync_susp_dependency[2];
|
||||
};
|
||||
|
||||
struct drbd_state_change {
|
||||
struct list_head list;
|
||||
unsigned int n_devices;
|
||||
unsigned int n_connections;
|
||||
struct drbd_resource_state_change resource[1];
|
||||
struct drbd_device_state_change *devices;
|
||||
struct drbd_connection_state_change *connections;
|
||||
struct drbd_peer_device_state_change *peer_devices;
|
||||
};
|
||||
|
||||
extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_t);
|
||||
extern void copy_old_to_new_state_change(struct drbd_state_change *);
|
||||
extern void forget_state_change(struct drbd_state_change *);
|
||||
|
||||
extern void notify_resource_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_resource_state_change *,
|
||||
enum drbd_notification_type type);
|
||||
extern void notify_connection_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_connection_state_change *,
|
||||
enum drbd_notification_type type);
|
||||
extern void notify_device_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_device_state_change *,
|
||||
enum drbd_notification_type type);
|
||||
extern void notify_peer_device_state_change(struct sk_buff *,
|
||||
unsigned int,
|
||||
struct drbd_peer_device_state_change *,
|
||||
enum drbd_notification_type type);
|
||||
|
||||
#endif /* DRBD_STATE_CHANGE_H */
|
||||
@@ -55,13 +55,6 @@ static int make_resync_request(struct drbd_device *, int);
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* About the global_state_lock
|
||||
Each state transition on an device holds a read lock. In case we have
|
||||
to evaluate the resync after dependencies, we grab a write lock, because
|
||||
we need stable states on all devices for that. */
|
||||
rwlock_t global_state_lock;
|
||||
|
||||
/* used for synchronous meta data and bitmap IO
|
||||
* submitted by drbd_md_sync_page_io()
|
||||
*/
|
||||
@@ -120,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
|
||||
unsigned long flags = 0;
|
||||
struct drbd_peer_device *peer_device = peer_req->peer_device;
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct drbd_connection *connection = peer_device->connection;
|
||||
struct drbd_interval i;
|
||||
int do_wake;
|
||||
u64 block_id;
|
||||
@@ -152,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
|
||||
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
|
||||
if (peer_req->flags & EE_WAS_ERROR)
|
||||
__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
|
||||
|
||||
if (connection->cstate >= C_WF_REPORT_PARAMS) {
|
||||
kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
|
||||
if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
|
||||
kref_put(&device->kref, drbd_destroy_device);
|
||||
}
|
||||
spin_unlock_irqrestore(&device->resource->req_lock, flags);
|
||||
|
||||
if (block_id == ID_SYNCER)
|
||||
@@ -163,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
|
||||
if (do_al_complete_io)
|
||||
drbd_al_complete_io(device, &i);
|
||||
|
||||
wake_asender(peer_device->connection);
|
||||
put_ldev(device);
|
||||
}
|
||||
|
||||
@@ -195,6 +194,12 @@ void drbd_peer_request_endio(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
|
||||
{
|
||||
panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
|
||||
device->minor, device->resource->name, device->vnr);
|
||||
}
|
||||
|
||||
/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
|
||||
*/
|
||||
void drbd_request_endio(struct bio *bio)
|
||||
@@ -238,7 +243,7 @@ void drbd_request_endio(struct bio *bio)
|
||||
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
|
||||
|
||||
if (!bio->bi_error)
|
||||
panic("possible random memory corruption caused by delayed completion of aborted local request\n");
|
||||
drbd_panic_after_delayed_completion_of_aborted_request(device);
|
||||
}
|
||||
|
||||
/* to avoid recursion in __req_mod */
|
||||
@@ -1291,6 +1296,7 @@ static int drbd_send_barrier(struct drbd_connection *connection)
|
||||
p->barrier = connection->send.current_epoch_nr;
|
||||
p->pad = 0;
|
||||
connection->send.current_epoch_writes = 0;
|
||||
connection->send.last_sent_barrier_jif = jiffies;
|
||||
|
||||
return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
|
||||
}
|
||||
@@ -1315,6 +1321,7 @@ static void re_init_if_first_write(struct drbd_connection *connection, unsigned
|
||||
connection->send.seen_any_write_yet = true;
|
||||
connection->send.current_epoch_nr = epoch;
|
||||
connection->send.current_epoch_writes = 0;
|
||||
connection->send.last_sent_barrier_jif = jiffies;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1456,70 +1463,73 @@ static int _drbd_may_sync_now(struct drbd_device *device)
|
||||
}
|
||||
|
||||
/**
|
||||
* _drbd_pause_after() - Pause resync on all devices that may not resync now
|
||||
* drbd_pause_after() - Pause resync on all devices that may not resync now
|
||||
* @device: DRBD device.
|
||||
*
|
||||
* Called from process context only (admin command and after_state_ch).
|
||||
*/
|
||||
static int _drbd_pause_after(struct drbd_device *device)
|
||||
static bool drbd_pause_after(struct drbd_device *device)
|
||||
{
|
||||
bool changed = false;
|
||||
struct drbd_device *odev;
|
||||
int i, rv = 0;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&drbd_devices, odev, i) {
|
||||
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
|
||||
continue;
|
||||
if (!_drbd_may_sync_now(odev))
|
||||
rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
|
||||
!= SS_NOTHING_TO_DO);
|
||||
if (!_drbd_may_sync_now(odev) &&
|
||||
_drbd_set_state(_NS(odev, aftr_isp, 1),
|
||||
CS_HARD, NULL) != SS_NOTHING_TO_DO)
|
||||
changed = true;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return rv;
|
||||
return changed;
|
||||
}
|
||||
|
||||
/**
|
||||
* _drbd_resume_next() - Resume resync on all devices that may resync now
|
||||
* drbd_resume_next() - Resume resync on all devices that may resync now
|
||||
* @device: DRBD device.
|
||||
*
|
||||
* Called from process context only (admin command and worker).
|
||||
*/
|
||||
static int _drbd_resume_next(struct drbd_device *device)
|
||||
static bool drbd_resume_next(struct drbd_device *device)
|
||||
{
|
||||
bool changed = false;
|
||||
struct drbd_device *odev;
|
||||
int i, rv = 0;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
idr_for_each_entry(&drbd_devices, odev, i) {
|
||||
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
|
||||
continue;
|
||||
if (odev->state.aftr_isp) {
|
||||
if (_drbd_may_sync_now(odev))
|
||||
rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
|
||||
CS_HARD, NULL)
|
||||
!= SS_NOTHING_TO_DO) ;
|
||||
if (_drbd_may_sync_now(odev) &&
|
||||
_drbd_set_state(_NS(odev, aftr_isp, 0),
|
||||
CS_HARD, NULL) != SS_NOTHING_TO_DO)
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return rv;
|
||||
return changed;
|
||||
}
|
||||
|
||||
void resume_next_sg(struct drbd_device *device)
|
||||
{
|
||||
write_lock_irq(&global_state_lock);
|
||||
_drbd_resume_next(device);
|
||||
write_unlock_irq(&global_state_lock);
|
||||
lock_all_resources();
|
||||
drbd_resume_next(device);
|
||||
unlock_all_resources();
|
||||
}
|
||||
|
||||
void suspend_other_sg(struct drbd_device *device)
|
||||
{
|
||||
write_lock_irq(&global_state_lock);
|
||||
_drbd_pause_after(device);
|
||||
write_unlock_irq(&global_state_lock);
|
||||
lock_all_resources();
|
||||
drbd_pause_after(device);
|
||||
unlock_all_resources();
|
||||
}
|
||||
|
||||
/* caller must hold global_state_lock */
|
||||
/* caller must lock_all_resources() */
|
||||
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
|
||||
{
|
||||
struct drbd_device *odev;
|
||||
@@ -1557,15 +1567,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_min
|
||||
}
|
||||
}
|
||||
|
||||
/* caller must hold global_state_lock */
|
||||
/* caller must lock_all_resources() */
|
||||
void drbd_resync_after_changed(struct drbd_device *device)
|
||||
{
|
||||
int changes;
|
||||
int changed;
|
||||
|
||||
do {
|
||||
changes = _drbd_pause_after(device);
|
||||
changes |= _drbd_resume_next(device);
|
||||
} while (changes);
|
||||
changed = drbd_pause_after(device);
|
||||
changed |= drbd_resume_next(device);
|
||||
} while (changed);
|
||||
}
|
||||
|
||||
void drbd_rs_controller_reset(struct drbd_device *device)
|
||||
@@ -1685,19 +1695,14 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
} else {
|
||||
mutex_lock(device->state_mutex);
|
||||
}
|
||||
clear_bit(B_RS_H_DONE, &device->flags);
|
||||
|
||||
/* req_lock: serialize with drbd_send_and_submit() and others
|
||||
* global_state_lock: for stable sync-after dependencies */
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
write_lock(&global_state_lock);
|
||||
lock_all_resources();
|
||||
clear_bit(B_RS_H_DONE, &device->flags);
|
||||
/* Did some connection breakage or IO error race with us? */
|
||||
if (device->state.conn < C_CONNECTED
|
||||
|| !get_ldev_if_state(device, D_NEGOTIATING)) {
|
||||
write_unlock(&global_state_lock);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
mutex_unlock(device->state_mutex);
|
||||
return;
|
||||
unlock_all_resources();
|
||||
goto out;
|
||||
}
|
||||
|
||||
ns = drbd_read_state(device);
|
||||
@@ -1711,7 +1716,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
else /* side == C_SYNC_SOURCE */
|
||||
ns.pdsk = D_INCONSISTENT;
|
||||
|
||||
r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
|
||||
r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
|
||||
ns = drbd_read_state(device);
|
||||
|
||||
if (ns.conn < C_CONNECTED)
|
||||
@@ -1732,7 +1737,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
device->rs_mark_left[i] = tw;
|
||||
device->rs_mark_time[i] = now;
|
||||
}
|
||||
_drbd_pause_after(device);
|
||||
drbd_pause_after(device);
|
||||
/* Forget potentially stale cached per resync extent bit-counts.
|
||||
* Open coded drbd_rs_cancel_all(device), we already have IRQs
|
||||
* disabled, and know the disk state is ok. */
|
||||
@@ -1742,8 +1747,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
device->resync_wenr = LC_FREE;
|
||||
spin_unlock(&device->al_lock);
|
||||
}
|
||||
write_unlock(&global_state_lock);
|
||||
spin_unlock_irq(&device->resource->req_lock);
|
||||
unlock_all_resources();
|
||||
|
||||
if (r == SS_SUCCESS) {
|
||||
wake_up(&device->al_wait); /* for lc_reset() above */
|
||||
@@ -1807,6 +1811,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
|
||||
drbd_md_sync(device);
|
||||
}
|
||||
put_ldev(device);
|
||||
out:
|
||||
mutex_unlock(device->state_mutex);
|
||||
}
|
||||
|
||||
@@ -1836,7 +1841,7 @@ static void drbd_ldev_destroy(struct drbd_device *device)
|
||||
device->act_log = NULL;
|
||||
|
||||
__acquire(local);
|
||||
drbd_free_ldev(device->ldev);
|
||||
drbd_backing_dev_free(device, device->ldev);
|
||||
device->ldev = NULL;
|
||||
__release(local);
|
||||
|
||||
|
||||
@@ -104,9 +104,9 @@
|
||||
/* Device instance number, incremented each time a device is probed. */
|
||||
static int instance;
|
||||
|
||||
struct list_head online_list;
|
||||
struct list_head removing_list;
|
||||
spinlock_t dev_lock;
|
||||
static struct list_head online_list;
|
||||
static struct list_head removing_list;
|
||||
static spinlock_t dev_lock;
|
||||
|
||||
/*
|
||||
* Global variable used to hold the major block device number
|
||||
|
||||
@@ -495,17 +495,17 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
|
||||
id->ppaf.ch_offset = 56;
|
||||
id->ppaf.ch_len = 8;
|
||||
|
||||
do_div(size, bs); /* convert size to pages */
|
||||
do_div(size, 256); /* concert size to pgs pr blk */
|
||||
sector_div(size, bs); /* convert size to pages */
|
||||
size >>= 8; /* concert size to pgs pr blk */
|
||||
grp = &id->groups[0];
|
||||
grp->mtype = 0;
|
||||
grp->fmtype = 0;
|
||||
grp->num_ch = 1;
|
||||
grp->num_pg = 256;
|
||||
blksize = size;
|
||||
do_div(size, (1 << 16));
|
||||
size >>= 16;
|
||||
grp->num_lun = size + 1;
|
||||
do_div(blksize, grp->num_lun);
|
||||
sector_div(blksize, grp->num_lun);
|
||||
grp->num_blk = blksize;
|
||||
grp->num_pln = 1;
|
||||
|
||||
|
||||
+3
-4
@@ -23,7 +23,7 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/completion.h>
|
||||
@@ -671,16 +671,15 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
|
||||
static unsigned int carm_fill_sync_time(struct carm_host *host,
|
||||
unsigned int idx, void *mem)
|
||||
{
|
||||
struct timeval tv;
|
||||
struct carm_msg_sync_time *st = mem;
|
||||
|
||||
do_gettimeofday(&tv);
|
||||
time64_t tv = ktime_get_real_seconds();
|
||||
|
||||
memset(st, 0, sizeof(*st));
|
||||
st->type = CARM_MSG_MISC;
|
||||
st->subtype = MISC_SET_TIME;
|
||||
st->handle = cpu_to_le32(TAG_ENCODE(idx));
|
||||
st->timestamp = cpu_to_le32(tv.tv_sec);
|
||||
st->timestamp = cpu_to_le32(tv);
|
||||
|
||||
return sizeof(struct carm_msg_sync_time);
|
||||
}
|
||||
|
||||
+208
-183
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user