Merge branch 'for-2.6.39/drivers' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.39/drivers' of git://git.kernel.dk/linux-2.6-block: (122 commits)
  cciss: fix lost command issue
  drbd: need include for bitops functions declarations
  Revert "cciss: Add missing allocation in scsi_cmd_stack_setup and  corresponding deallocation"
  cciss: fix missed command status value CMD_UNABORTABLE
  cciss: remove unnecessary casts
  cciss: Mask off error bits of c->busaddr in cmd_special_free when calling pci_free_consistent
  cciss: Inform controller we are using 32-bit tags.
  cciss: hoist tag masking out of loop
  cciss: Add missing allocation in scsi_cmd_stack_setup and  corresponding deallocation
  cciss: export resettable host attribute
  drbd: drop code present under #ifdef which is relevant to 2.6.28 and below
  drbd: Fixed handling of read errors on a 'VerifyS' node
  drbd: Fixed handling of read errors on a 'VerifyT' node
  drbd: Implemented real timeout checking for request processing time
  drbd: Remove unused function atodb_endio()
  drbd: improve log message if received sector offset exceeds local capacity
  drbd: kill dead code
  drbd: don't BUG_ON, if bio_add_page of a single page to an empty bio fails
  drbd: Removed left over, now wrong comments
  drbd: serialize admin requests for new verify run with pending bitmap io
  ...
This commit is contained in:
Linus Torvalds
2011-03-27 20:02:07 -07:00
21 changed files with 2273 additions and 1409 deletions

View File

@@ -59,3 +59,15 @@ Kernel Version: 2.6.31
Contact: iss_storagedev@hp.com
Description: Displays the usage count (number of opens) of logical drive Y
of controller X.
Where: /sys/bus/pci/devices/<dev>/ccissX/resettable
Date: February 2011
Kernel Version: 2.6.38
Contact: iss_storagedev@hp.com
Description: Value of 1 indicates the controller can honor the reset_devices
kernel parameter. Value of 0 indicates reset_devices cannot be
honored. This is to allow, for example, kexec tools to be able
to warn the user if they designate an unresettable device as
a dump device, as kdump requires resetting the device in order
to work reliably.

View File

@@ -193,7 +193,7 @@ static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
u64 *cfg_offset);
static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
unsigned long *memory_bar);
static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag);
/* performant mode helper functions */
static void calc_bucket_map(int *bucket, int num_buckets, int nsgs,
@@ -231,7 +231,7 @@ static const struct block_device_operations cciss_fops = {
*/
static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c)
{
if (likely(h->transMethod == CFGTBL_Trans_Performant))
if (likely(h->transMethod & CFGTBL_Trans_Performant))
c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
}
@@ -556,6 +556,44 @@ static void __devinit cciss_procinit(ctlr_info_t *h)
#define to_hba(n) container_of(n, struct ctlr_info, dev)
#define to_drv(n) container_of(n, drive_info_struct, dev)
/* List of controllers which cannot be reset on kexec with reset_devices */
static u32 unresettable_controller[] = {
0x324a103C, /* Smart Array P712m */
0x324b103C, /* SmartArray P711m */
0x3223103C, /* Smart Array P800 */
0x3234103C, /* Smart Array P400 */
0x3235103C, /* Smart Array P400i */
0x3211103C, /* Smart Array E200i */
0x3212103C, /* Smart Array E200 */
0x3213103C, /* Smart Array E200i */
0x3214103C, /* Smart Array E200i */
0x3215103C, /* Smart Array E200i */
0x3237103C, /* Smart Array E500 */
0x323D103C, /* Smart Array P700m */
0x409C0E11, /* Smart Array 6400 */
0x409D0E11, /* Smart Array 6400 EM */
};
static int ctlr_is_resettable(struct ctlr_info *h)
{
int i;
for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
if (unresettable_controller[i] == h->board_id)
return 0;
return 1;
}
static ssize_t host_show_resettable(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ctlr_info *h = to_hba(dev);
return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h));
}
static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL);
static ssize_t host_store_rescan(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -741,6 +779,7 @@ static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
static struct attribute *cciss_host_attrs[] = {
&dev_attr_rescan.attr,
&dev_attr_resettable.attr,
NULL
};
@@ -973,8 +1012,8 @@ static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c)
temp64.val32.upper = c->ErrDesc.Addr.upper;
pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
c->err_info, (dma_addr_t) temp64.val);
pci_free_consistent(h->pdev, sizeof(CommandList_struct),
c, (dma_addr_t) c->busaddr);
pci_free_consistent(h->pdev, sizeof(CommandList_struct), c,
(dma_addr_t) cciss_tag_discard_error_bits(h, (u32) c->busaddr));
}
static inline ctlr_info_t *get_host(struct gendisk *disk)
@@ -1490,8 +1529,7 @@ static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp)
return -EINVAL;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
ioc = (BIG_IOCTL_Command_struct *)
kmalloc(sizeof(*ioc), GFP_KERNEL);
ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
if (!ioc) {
status = -ENOMEM;
goto cleanup1;
@@ -2653,6 +2691,10 @@ static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c)
c->Request.CDB[0]);
return_status = IO_NEEDS_RETRY;
break;
case CMD_UNABORTABLE:
dev_warn(&h->pdev->dev, "cmd unabortable\n");
return_status = IO_ERROR;
break;
default:
dev_warn(&h->pdev->dev, "cmd 0x%02x returned "
"unknown status %x\n", c->Request.CDB[0],
@@ -3103,6 +3145,13 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
DID_PASSTHROUGH : DID_ERROR);
break;
case CMD_UNABORTABLE:
dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
rq->errors = make_status_bytes(SAM_STAT_GOOD,
cmd->err_info->CommandStatus, DRIVER_OK,
cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ?
DID_PASSTHROUGH : DID_ERROR);
break;
default:
dev_warn(&h->pdev->dev, "cmd %p returned "
"unknown status %x\n", cmd,
@@ -3136,10 +3185,13 @@ static inline u32 cciss_tag_to_index(u32 tag)
return tag >> DIRECT_LOOKUP_SHIFT;
}
static inline u32 cciss_tag_discard_error_bits(u32 tag)
static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag)
{
#define CCISS_ERROR_BITS 0x03
return tag & ~CCISS_ERROR_BITS;
#define CCISS_PERF_ERROR_BITS ((1 << DIRECT_LOOKUP_SHIFT) - 1)
#define CCISS_SIMPLE_ERROR_BITS 0x03
if (likely(h->transMethod & CFGTBL_Trans_Performant))
return tag & ~CCISS_PERF_ERROR_BITS;
return tag & ~CCISS_SIMPLE_ERROR_BITS;
}
static inline void cciss_mark_tag_indexed(u32 *tag)
@@ -3359,7 +3411,7 @@ static inline u32 next_command(ctlr_info_t *h)
{
u32 a;
if (unlikely(h->transMethod != CFGTBL_Trans_Performant))
if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant)))
return h->access.command_completed(h);
if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
@@ -3394,14 +3446,12 @@ static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag)
/* process completion of a non-indexed command */
static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
{
u32 tag;
CommandList_struct *c = NULL;
__u32 busaddr_masked, tag_masked;
tag = cciss_tag_discard_error_bits(raw_tag);
tag_masked = cciss_tag_discard_error_bits(h, raw_tag);
list_for_each_entry(c, &h->cmpQ, list) {
busaddr_masked = cciss_tag_discard_error_bits(c->busaddr);
tag_masked = cciss_tag_discard_error_bits(tag);
busaddr_masked = cciss_tag_discard_error_bits(h, c->busaddr);
if (busaddr_masked == tag_masked) {
finish_cmd(h, c, raw_tag);
return next_command(h);
@@ -3753,7 +3803,8 @@ static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h)
}
}
static __devinit void cciss_enter_performant_mode(ctlr_info_t *h)
static __devinit void cciss_enter_performant_mode(ctlr_info_t *h,
u32 use_short_tags)
{
/* This is a bit complicated. There are 8 registers on
* the controller which we write to to tell it 8 different
@@ -3808,7 +3859,7 @@ static __devinit void cciss_enter_performant_mode(ctlr_info_t *h)
writel(0, &h->transtable->RepQCtrAddrHigh32);
writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
writel(0, &h->transtable->RepQAddr0High32);
writel(CFGTBL_Trans_Performant,
writel(CFGTBL_Trans_Performant | use_short_tags,
&(h->cfgtable->HostWrite.TransportRequest));
writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
@@ -3855,7 +3906,8 @@ static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
goto clean_up;
cciss_enter_performant_mode(h);
cciss_enter_performant_mode(h,
trans_support & CFGTBL_Trans_use_short_tags);
/* Change the access methods to the performant access methods */
h->access = SA5_performant_access;

View File

@@ -222,6 +222,7 @@ static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c)
h->ctlr, c->busaddr);
#endif /* CCISS_DEBUG */
writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
readl(h->vaddr + SA5_REQUEST_PORT_OFFSET);
h->commands_outstanding++;
if ( h->commands_outstanding > h->max_outstanding)
h->max_outstanding = h->commands_outstanding;

View File

@@ -56,6 +56,7 @@
#define CFGTBL_Trans_Simple 0x00000002l
#define CFGTBL_Trans_Performant 0x00000004l
#define CFGTBL_Trans_use_short_tags 0x20000000l
#define CFGTBL_BusType_Ultra2 0x00000001l
#define CFGTBL_BusType_Ultra3 0x00000002l

View File

@@ -824,13 +824,18 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,
break;
case CMD_UNSOLICITED_ABORT:
cmd->result = DID_ABORT << 16;
dev_warn(&h->pdev->dev, "%p aborted do to an "
dev_warn(&h->pdev->dev, "%p aborted due to an "
"unsolicited abort\n", c);
break;
case CMD_TIMEOUT:
cmd->result = DID_TIME_OUT << 16;
dev_warn(&h->pdev->dev, "%p timedout\n", c);
break;
case CMD_UNABORTABLE:
cmd->result = DID_ERROR << 16;
dev_warn(&h->pdev->dev, "c %p command "
"unabortable\n", c);
break;
default:
cmd->result = DID_ERROR << 16;
dev_warn(&h->pdev->dev,
@@ -1007,11 +1012,15 @@ cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
break;
case CMD_UNSOLICITED_ABORT:
dev_warn(&h->pdev->dev,
"%p aborted do to an unsolicited abort\n", c);
"%p aborted due to an unsolicited abort\n", c);
break;
case CMD_TIMEOUT:
dev_warn(&h->pdev->dev, "%p timedout\n", c);
break;
case CMD_UNABORTABLE:
dev_warn(&h->pdev->dev,
"%p unabortable\n", c);
break;
default:
dev_warn(&h->pdev->dev,
"%p returned unknown status %x\n",

View File

@@ -92,7 +92,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
bio->bi_end_io = drbd_md_io_complete;
bio->bi_rw = rw;
if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
bio_endio(bio, -EIO);
else
submit_bio(rw, bio);
@@ -176,13 +176,17 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
struct lc_element *al_ext;
struct lc_element *tmp;
unsigned long al_flags = 0;
int wake;
spin_lock_irq(&mdev->al_lock);
tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
if (unlikely(tmp != NULL)) {
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
spin_unlock_irq(&mdev->al_lock);
if (wake)
wake_up(&mdev->al_wait);
return NULL;
}
}
@@ -258,6 +262,33 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
spin_unlock_irqrestore(&mdev->al_lock, flags);
}
#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
* are still coupled, or assume too much about their relation.
* Code below will not work if this is violated.
* Will be cleaned up with some followup patch.
*/
# error FIXME
#endif
static unsigned int al_extent_to_bm_page(unsigned int al_enr)
{
return al_enr >>
/* bit to page */
((PAGE_SHIFT + 3) -
/* al extent number to bit */
(AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
}
static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
{
return rs_enr >>
/* bit to page */
((PAGE_SHIFT + 3) -
/* al extent number to bit */
(BM_EXT_SHIFT - BM_BLOCK_SHIFT));
}
int
w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
{
@@ -285,7 +316,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
* For now, we must not write the transaction,
* if we cannot write out the bitmap of the evicted extent. */
if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted));
/* The bitmap write may have failed, causing a state change. */
if (mdev->state.disk < D_INCONSISTENT) {
@@ -334,7 +365,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
drbd_chk_io_error(mdev, 1, TRUE);
drbd_chk_io_error(mdev, 1, true);
if (++mdev->al_tr_pos >
div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
@@ -511,225 +542,6 @@ cancel:
return 1;
}
static void atodb_endio(struct bio *bio, int error)
{
struct drbd_atodb_wait *wc = bio->bi_private;
struct drbd_conf *mdev = wc->mdev;
struct page *page;
int uptodate = bio_flagged(bio, BIO_UPTODATE);
/* strange behavior of some lower level drivers...
* fail the request by clearing the uptodate flag,
* but do not return any error?! */
if (!error && !uptodate)
error = -EIO;
drbd_chk_io_error(mdev, error, TRUE);
if (error && wc->error == 0)
wc->error = error;
if (atomic_dec_and_test(&wc->count))
complete(&wc->io_done);
page = bio->bi_io_vec[0].bv_page;
put_page(page);
bio_put(bio);
mdev->bm_writ_cnt++;
put_ldev(mdev);
}
/* sector to word */
#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
/* activity log to on disk bitmap -- prepare bio unless that sector
* is already covered by previously prepared bios */
static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
struct bio **bios,
unsigned int enr,
struct drbd_atodb_wait *wc) __must_hold(local)
{
struct bio *bio;
struct page *page;
sector_t on_disk_sector;
unsigned int page_offset = PAGE_SIZE;
int offset;
int i = 0;
int err = -ENOMEM;
/* We always write aligned, full 4k blocks,
* so we can ignore the logical_block_size (for now) */
enr &= ~7U;
on_disk_sector = enr + mdev->ldev->md.md_offset
+ mdev->ldev->md.bm_offset;
D_ASSERT(!(on_disk_sector & 7U));
/* Check if that enr is already covered by an already created bio.
* Caution, bios[] is not NULL terminated,
* but only initialized to all NULL.
* For completely scattered activity log,
* the last invocation iterates over all bios,
* and finds the last NULL entry.
*/
while ((bio = bios[i])) {
if (bio->bi_sector == on_disk_sector)
return 0;
i++;
}
/* bios[i] == NULL, the next not yet used slot */
/* GFP_KERNEL, we are not in the write-out path */
bio = bio_alloc(GFP_KERNEL, 1);
if (bio == NULL)
return -ENOMEM;
if (i > 0) {
const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec;
page_offset = prev_bv->bv_offset + prev_bv->bv_len;
page = prev_bv->bv_page;
}
if (page_offset == PAGE_SIZE) {
page = alloc_page(__GFP_HIGHMEM);
if (page == NULL)
goto out_bio_put;
page_offset = 0;
} else {
get_page(page);
}
offset = S2W(enr);
drbd_bm_get_lel(mdev, offset,
min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset),
kmap(page) + page_offset);
kunmap(page);
bio->bi_private = wc;
bio->bi_end_io = atodb_endio;
bio->bi_bdev = mdev->ldev->md_bdev;
bio->bi_sector = on_disk_sector;
if (bio_add_page(bio, page, 4096, page_offset) != 4096)
goto out_put_page;
atomic_inc(&wc->count);
/* we already know that we may do this...
* get_ldev_if_state(mdev,D_ATTACHING);
* just get the extra reference, so that the local_cnt reflects
* the number of pending IO requests DRBD at its backing device.
*/
atomic_inc(&mdev->local_cnt);
bios[i] = bio;
return 0;
out_put_page:
err = -EINVAL;
put_page(page);
out_bio_put:
bio_put(bio);
return err;
}
/**
* drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents
* @mdev: DRBD device.
*
* Called when we detach (unconfigure) local storage,
* or when we go from R_PRIMARY to R_SECONDARY role.
*/
void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
{
int i, nr_elements;
unsigned int enr;
struct bio **bios;
struct drbd_atodb_wait wc;
ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING))
return; /* sorry, I don't have any act_log etc... */
wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
nr_elements = mdev->act_log->nr_elements;
/* GFP_KERNEL, we are not in anyone's write-out path */
bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL);
if (!bios)
goto submit_one_by_one;
atomic_set(&wc.count, 0);
init_completion(&wc.io_done);
wc.mdev = mdev;
wc.error = 0;
for (i = 0; i < nr_elements; i++) {
enr = lc_element_by_index(mdev->act_log, i)->lc_number;
if (enr == LC_FREE)
continue;
/* next statement also does atomic_inc wc.count and local_cnt */
if (atodb_prepare_unless_covered(mdev, bios,
enr/AL_EXT_PER_BM_SECT,
&wc))
goto free_bios_submit_one_by_one;
}
/* unnecessary optimization? */
lc_unlock(mdev->act_log);
wake_up(&mdev->al_wait);
/* all prepared, submit them */
for (i = 0; i < nr_elements; i++) {
if (bios[i] == NULL)
break;
if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) {
bios[i]->bi_rw = WRITE;
bio_endio(bios[i], -EIO);
} else {
submit_bio(WRITE, bios[i]);
}
}
/* always (try to) flush bitmap to stable storage */
drbd_md_flush(mdev);
/* In case we did not submit a single IO do not wait for
* them to complete. ( Because we would wait forever here. )
*
* In case we had IOs and they are already complete, there
* is not point in waiting anyways.
* Therefore this if () ... */
if (atomic_read(&wc.count))
wait_for_completion(&wc.io_done);
put_ldev(mdev);
kfree(bios);
return;
free_bios_submit_one_by_one:
/* free everything by calling the endio callback directly. */
for (i = 0; i < nr_elements && bios[i]; i++)
bio_endio(bios[i], 0);
kfree(bios);
submit_one_by_one:
dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");
for (i = 0; i < mdev->act_log->nr_elements; i++) {
enr = lc_element_by_index(mdev->act_log, i)->lc_number;
if (enr == LC_FREE)
continue;
/* Really slow: if we have al-extents 16..19 active,
* sector 4 will be written four times! Synchronous! */
drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT);
}
lc_unlock(mdev->act_log);
wake_up(&mdev->al_wait);
put_ldev(mdev);
}
/**
* drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
* @mdev: DRBD device.
@@ -809,7 +621,7 @@ static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused
return 1;
}
drbd_bm_write_sect(mdev, udw->enr);
drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
put_ldev(mdev);
kfree(udw);
@@ -889,7 +701,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
dev_warn(DEV, "Kicking resync_lru element enr=%u "
"out with rs_failed=%d\n",
ext->lce.lc_number, ext->rs_failed);
set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
}
ext->rs_left = rs_left;
ext->rs_failed = success ? 0 : count;
@@ -908,7 +719,6 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
drbd_queue_work_front(&mdev->data.work, &udw->w);
} else {
dev_warn(DEV, "Could not kmalloc an udw\n");
set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
}
}
} else {
@@ -919,6 +729,22 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
}
}
void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go)
{
unsigned long now = jiffies;
unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go &&
mdev->state.conn != C_PAUSED_SYNC_T &&
mdev->state.conn != C_PAUSED_SYNC_S) {
mdev->rs_mark_time[next] = now;
mdev->rs_mark_left[next] = still_to_go;
mdev->rs_last_mark = next;
}
}
}
/* clear the bit corresponding to the piece of storage in question:
* size byte of data starting from sector. Only clear a bits of the affected
* one ore more _aligned_ BM_BLOCK_SIZE blocks.
@@ -936,7 +762,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
int wake_up = 0;
unsigned long flags;
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
(unsigned long long)sector, size);
return;
@@ -969,21 +795,9 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
*/
count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
if (count && get_ldev(mdev)) {
unsigned long now = jiffies;
unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
unsigned long tw = drbd_bm_total_weight(mdev);
if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
mdev->state.conn != C_PAUSED_SYNC_T &&
mdev->state.conn != C_PAUSED_SYNC_S) {
mdev->rs_mark_time[next] = now;
mdev->rs_mark_left[next] = tw;
mdev->rs_last_mark = next;
}
}
drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
spin_lock_irqsave(&mdev->al_lock, flags);
drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
drbd_try_clear_on_disk_bm(mdev, sector, count, true);
spin_unlock_irqrestore(&mdev->al_lock, flags);
/* just wake_up unconditional now, various lc_chaged(),
@@ -998,27 +812,27 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
/*
* this is intended to set one request worth of data out of sync.
* affects at least 1 bit,
* and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits.
* and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
*
* called by tl_clear and drbd_send_dblock (==drbd_make_request).
* so this can be _any_ process.
*/
void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
const char *file, const unsigned int line)
{
unsigned long sbnr, ebnr, lbnr, flags;
sector_t esector, nr_sectors;
unsigned int enr, count;
unsigned int enr, count = 0;
struct lc_element *e;
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
dev_err(DEV, "sector: %llus, size: %d\n",
(unsigned long long)sector, size);
return;
return 0;
}
if (!get_ldev(mdev))
return; /* no disk, no metadata, no bitmap to set bits in */
return 0; /* no disk, no metadata, no bitmap to set bits in */
nr_sectors = drbd_get_capacity(mdev->this_bdev);
esector = sector + (size >> 9) - 1;
@@ -1048,6 +862,8 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
out:
put_ldev(mdev);
return count;
}
static
@@ -1128,7 +944,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
unsigned int enr = BM_SECT_TO_EXT(sector);
struct bm_extent *bm_ext;
int i, sig;
int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
200 times -> 20 seconds. */
retry:
sig = wait_event_interruptible(mdev->al_wait,
(bm_ext = _bme_get(mdev, enr)));
if (sig)
@@ -1139,16 +958,25 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(mdev->al_wait,
!_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i));
if (sig) {
!_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
test_bit(BME_PRIORITY, &bm_ext->flags));
if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
spin_lock_irq(&mdev->al_lock);
if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
clear_bit(BME_NO_WRITES, &bm_ext->flags);
bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
mdev->resync_locked--;
wake_up(&mdev->al_wait);
}
spin_unlock_irq(&mdev->al_lock);
return -EINTR;
if (sig)
return -EINTR;
if (schedule_timeout_interruptible(HZ/10))
return -EINTR;
if (sa && --sa == 0)
dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec."
"Resync stalled?\n");
goto retry;
}
}
set_bit(BME_LOCKED, &bm_ext->flags);
@@ -1291,8 +1119,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
}
if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
clear_bit(BME_LOCKED, &bm_ext->flags);
clear_bit(BME_NO_WRITES, &bm_ext->flags);
bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
mdev->resync_locked--;
wake_up(&mdev->al_wait);
}
@@ -1383,7 +1210,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
sector_t esector, nr_sectors;
int wake_up = 0;
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
(unsigned long long)sector, size);
return;
@@ -1420,7 +1247,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
mdev->rs_failed += count;
if (get_ldev(mdev)) {
drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE);
drbd_try_clear_on_disk_bm(mdev, sector, count, false);
put_ldev(mdev);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -288,10 +288,11 @@ void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
}
int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
enum drbd_state_rv
drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
{
const int max_tries = 4;
int r = 0;
enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
int try = 0;
int forced = 0;
union drbd_state mask, val;
@@ -306,17 +307,17 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
val.i = 0; val.role = new_role;
while (try++ < max_tries) {
r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
/* in case we first succeeded to outdate,
* but now suddenly could establish a connection */
if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
val.pdsk = 0;
mask.pdsk = 0;
continue;
}
if (r == SS_NO_UP_TO_DATE_DISK && force &&
if (rv == SS_NO_UP_TO_DATE_DISK && force &&
(mdev->state.disk < D_UP_TO_DATE &&
mdev->state.disk >= D_INCONSISTENT)) {
mask.disk = D_MASK;
@@ -325,7 +326,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
continue;
}
if (r == SS_NO_UP_TO_DATE_DISK &&
if (rv == SS_NO_UP_TO_DATE_DISK &&
mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
nps = drbd_try_outdate_peer(mdev);
@@ -341,9 +342,9 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
continue;
}
if (r == SS_NOTHING_TO_DO)
if (rv == SS_NOTHING_TO_DO)
goto fail;
if (r == SS_PRIMARY_NOP && mask.pdsk == 0) {
if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
nps = drbd_try_outdate_peer(mdev);
if (force && nps > D_OUTDATED) {
@@ -356,25 +357,24 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
continue;
}
if (r == SS_TWO_PRIMARIES) {
if (rv == SS_TWO_PRIMARIES) {
/* Maybe the peer is detected as dead very soon...
retry at most once more in this case. */
__set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10);
schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10);
if (try < max_tries)
try = max_tries - 1;
continue;
}
if (r < SS_SUCCESS) {
r = _drbd_request_state(mdev, mask, val,
if (rv < SS_SUCCESS) {
rv = _drbd_request_state(mdev, mask, val,
CS_VERBOSE + CS_WAIT_COMPLETE);
if (r < SS_SUCCESS)
if (rv < SS_SUCCESS)
goto fail;
}
break;
}
if (r < SS_SUCCESS)
if (rv < SS_SUCCESS)
goto fail;
if (forced)
@@ -384,7 +384,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
if (new_role == R_SECONDARY) {
set_disk_ro(mdev->vdisk, TRUE);
set_disk_ro(mdev->vdisk, true);
if (get_ldev(mdev)) {
mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
put_ldev(mdev);
@@ -394,7 +394,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
mdev->net_conf->want_lose = 0;
put_net_conf(mdev);
}
set_disk_ro(mdev->vdisk, FALSE);
set_disk_ro(mdev->vdisk, false);
if (get_ldev(mdev)) {
if (((mdev->state.conn < C_CONNECTED ||
mdev->state.pdsk <= D_FAILED)
@@ -406,10 +406,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
}
}
if ((new_role == R_SECONDARY) && get_ldev(mdev)) {
drbd_al_to_on_disk_bm(mdev);
put_ldev(mdev);
}
/* writeout of activity log covered areas of the bitmap
* to stable storage done in after state change already */
if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
/* if this was forced, we should consider sync */
@@ -423,7 +421,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
fail:
mutex_unlock(&mdev->state_mutex);
return r;
return rv;
}
static struct drbd_conf *ensure_mdev(int minor, int create)
@@ -528,17 +526,19 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
}
}
/* input size is expected to be in KB */
char *ppsize(char *buf, unsigned long long size)
{
/* Needs 9 bytes at max. */
/* Needs 9 bytes at max including trailing NUL:
* -1ULL ==> "16384 EB" */
static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
int base = 0;
while (size >= 10000) {
while (size >= 10000 && base < sizeof(units)-1) {
/* shift + round */
size = (size >> 10) + !!(size & (1<<9));
base++;
}
sprintf(buf, "%lu %cB", (long)size, units[base]);
sprintf(buf, "%u %cB", (unsigned)size, units[base]);
return buf;
}
@@ -642,11 +642,19 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_
|| prev_size != mdev->ldev->md.md_size_sect;
if (la_size_changed || md_moved) {
int err;
drbd_al_shrink(mdev); /* All extents inactive. */
dev_info(DEV, "Writing the whole bitmap, %s\n",
la_size_changed && md_moved ? "size changed and md moved" :
la_size_changed ? "size changed" : "md moved");
rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
err = drbd_bitmap_io(mdev, &drbd_bm_write,
"size changed", BM_LOCKED_MASK);
if (err) {
rv = dev_size_error;
goto out;
}
drbd_md_mark_dirty(mdev);
}
@@ -765,22 +773,21 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
return 0;
}
void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local)
void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local)
{
struct request_queue * const q = mdev->rq_queue;
struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
int max_segments = mdev->ldev->dc.max_bio_bvecs;
int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
blk_queue_max_hw_sectors(q, max_seg_s >> 9);
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_max_segment_size(q, max_seg_s);
blk_queue_logical_block_size(q, 512);
blk_queue_segment_boundary(q, PAGE_SIZE-1);
blk_stack_limits(&q->limits, &b->limits, 0);
blk_queue_max_hw_sectors(q, max_hw_sectors);
/* This is the workaround for "bio would need to, but cannot, be split" */
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
blk_queue_stack_limits(q, b);
dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9);
if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
@@ -850,7 +857,7 @@ static void drbd_suspend_al(struct drbd_conf *mdev)
static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
enum drbd_ret_codes retcode;
enum drbd_ret_code retcode;
enum determine_dev_size dd;
sector_t max_possible_sectors;
sector_t min_md_device_sectors;
@@ -858,8 +865,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
struct block_device *bdev;
struct lru_cache *resync_lru = NULL;
union drbd_state ns, os;
unsigned int max_seg_s;
int rv;
unsigned int max_bio_size;
enum drbd_state_rv rv;
int cp_discovered = 0;
int logical_block_size;
@@ -1005,9 +1012,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
/* and for any other previously queued work */
drbd_flush_workqueue(mdev);
retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
retcode = rv; /* FIXME: Type mismatch. */
drbd_resume_io(mdev);
if (retcode < SS_SUCCESS)
if (rv < SS_SUCCESS)
goto fail;
if (!get_ldev_if_state(mdev, D_ATTACHING))
@@ -1109,20 +1117,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
mdev->read_cnt = 0;
mdev->writ_cnt = 0;
max_seg_s = DRBD_MAX_SEGMENT_SIZE;
max_bio_size = DRBD_MAX_BIO_SIZE;
if (mdev->state.conn == C_CONNECTED) {
/* We are Primary, Connected, and now attach a new local
* backing store. We must not increase the user visible maximum
* bio size on this device to something the peer may not be
* able to handle. */
if (mdev->agreed_pro_version < 94)
max_seg_s = queue_max_segment_size(mdev->rq_queue);
max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
else if (mdev->agreed_pro_version == 94)
max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
/* else: drbd 8.3.9 and later, stay with default */
}
drbd_setup_queue_param(mdev, max_seg_s);
drbd_setup_queue_param(mdev, max_bio_size);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,
@@ -1154,12 +1162,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
dev_info(DEV, "Assuming that all blocks are out of sync "
"(aka FullSync)\n");
if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) {
if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
"set_n_write from attaching", BM_LOCKED_MASK)) {
retcode = ERR_IO_MD_DISK;
goto force_diskless_dec;
}
} else {
if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) {
if (drbd_bitmap_io(mdev, &drbd_bm_read,
"read from attaching", BM_LOCKED_MASK) < 0) {
retcode = ERR_IO_MD_DISK;
goto force_diskless_dec;
}
@@ -1167,7 +1177,11 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
if (cp_discovered) {
drbd_al_apply_to_bm(mdev);
drbd_al_to_on_disk_bm(mdev);
if (drbd_bitmap_io(mdev, &drbd_bm_write,
"crashed primary apply AL", BM_LOCKED_MASK)) {
retcode = ERR_IO_MD_DISK;
goto force_diskless_dec;
}
}
if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
@@ -1279,7 +1293,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
int i, ns;
enum drbd_ret_codes retcode;
enum drbd_ret_code retcode;
struct net_conf *new_conf = NULL;
struct crypto_hash *tfm = NULL;
struct crypto_hash *integrity_w_tfm = NULL;
@@ -1324,6 +1338,8 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
new_conf->wire_protocol = DRBD_PROT_C;
new_conf->ping_timeo = DRBD_PING_TIMEO_DEF;
new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF;
new_conf->on_congestion = DRBD_ON_CONGESTION_DEF;
new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF;
if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
retcode = ERR_MANDATORY_TAG;
@@ -1345,6 +1361,11 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
}
}
if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
retcode = ERR_CONG_NOT_PROTO_A;
goto fail;
}
if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
retcode = ERR_DISCARD;
goto fail;
@@ -1525,6 +1546,21 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
struct drbd_nl_cfg_reply *reply)
{
int retcode;
struct disconnect dc;
memset(&dc, 0, sizeof(struct disconnect));
if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
retcode = ERR_MANDATORY_TAG;
goto fail;
}
if (dc.force) {
spin_lock_irq(&mdev->req_lock);
if (mdev->state.conn >= C_WF_CONNECTION)
_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
spin_unlock_irq(&mdev->req_lock);
goto done;
}
retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
@@ -1842,6 +1878,10 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
{
int retcode;
/* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync. */
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
@@ -1877,6 +1917,10 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
{
int retcode;
/* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync. */
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
if (retcode < SS_SUCCESS) {
@@ -1885,9 +1929,9 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
into a full resync. */
retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
if (retcode >= SS_SUCCESS) {
/* open coded drbd_bitmap_io() */
if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
"set_n_write from invalidate_peer"))
"set_n_write from invalidate_peer",
BM_LOCKED_SET_ALLOWED))
retcode = ERR_IO_MD_DISK;
}
} else
@@ -1914,9 +1958,17 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
struct drbd_nl_cfg_reply *reply)
{
int retcode = NO_ERROR;
union drbd_state s;
if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO)
retcode = ERR_PAUSE_IS_CLEAR;
if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
s = mdev->state;
if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
} else {
retcode = ERR_PAUSE_IS_CLEAR;
}
}
reply->ret_code = retcode;
return 0;
@@ -2054,6 +2106,11 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
reply->ret_code = ERR_MANDATORY_TAG;
return 0;
}
/* If there is still bitmap IO pending, e.g. previous resync or verify
* just being finished, wait for it before requesting a new resync. */
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
/* w_make_ov_request expects position to be aligned */
mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
@@ -2097,7 +2154,8 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
if (args.clear_bm) {
err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid");
err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
if (err) {
dev_err(DEV, "Writing bitmap failed with %d\n",err);
retcode = ERR_IO_MD_DISK;
@@ -2105,6 +2163,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
if (skip_initial_sync) {
drbd_send_uuids_skip_initial_sync(mdev);
_drbd_uuid_set(mdev, UI_BITMAP, 0);
drbd_print_uuids(mdev, "cleared bitmap UUID");
spin_lock_irq(&mdev->req_lock);
_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
CS_VERBOSE, NULL);
@@ -2189,7 +2248,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
goto fail;
}
if (nlp->packet_type >= P_nl_after_last_packet) {
if (nlp->packet_type >= P_nl_after_last_packet ||
nlp->packet_type == P_return_code_only) {
retcode = ERR_PACKET_NR;
goto fail;
}
@@ -2205,7 +2265,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
reply_size += cm->reply_body_size;
/* allocation not in the IO path, cqueue thread context */
cn_reply = kmalloc(reply_size, GFP_KERNEL);
cn_reply = kzalloc(reply_size, GFP_KERNEL);
if (!cn_reply) {
retcode = ERR_NOMEM;
goto fail;
@@ -2213,7 +2273,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
reply->packet_type =
cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet;
cm->reply_body_size ? nlp->packet_type : P_return_code_only;
reply->minor = nlp->drbd_minor;
reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
/* reply->tag_list; might be modified by cm->function. */
@@ -2376,7 +2436,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
/* receiver thread context, which is not in the writeout path (of this node),
* but may be in the writeout path of the _other_ node.
* GFP_NOIO to avoid potential "distributed deadlock". */
cn_reply = kmalloc(
cn_reply = kzalloc(
sizeof(struct cn_msg)+
sizeof(struct drbd_nl_cfg_reply)+
sizeof(struct dump_ee_tag_len_struct)+
@@ -2398,10 +2458,11 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
tl = tl_add_int(tl, T_ee_sector, &e->sector);
tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
/* dump the first 32k */
len = min_t(unsigned, e->size, 32 << 10);
put_unaligned(T_ee_data, tl++);
put_unaligned(e->size, tl++);
put_unaligned(len, tl++);
len = e->size;
page = e->pages;
page_chain_for_each(page) {
void *d = kmap_atomic(page, KM_USER0);
@@ -2410,6 +2471,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
kunmap_atomic(d, KM_USER0);
tl = (unsigned short*)((char*)tl + l);
len -= l;
if (len == 0)
break;
}
put_unaligned(TT_END, tl++); /* Close the tag list */
@@ -2508,6 +2571,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
(struct drbd_nl_cfg_reply *)cn_reply->data;
int rr;
memset(buffer, 0, sizeof(buffer));
cn_reply->id = req->id;
cn_reply->seq = req->seq;
@@ -2515,6 +2579,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
cn_reply->flags = 0;
reply->packet_type = P_return_code_only;
reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
reply->ret_code = ret_code;

View File

@@ -34,6 +34,7 @@
#include "drbd_int.h"
static int drbd_proc_open(struct inode *inode, struct file *file);
static int drbd_proc_release(struct inode *inode, struct file *file);
struct proc_dir_entry *drbd_proc;
@@ -42,9 +43,22 @@ const struct file_operations drbd_proc_fops = {
.open = drbd_proc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.release = drbd_proc_release,
};
void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
{
/* v is in kB/sec. We don't expect TiByte/sec yet. */
if (unlikely(v >= 1000000)) {
/* cool: > GiByte/s */
seq_printf(seq, "%ld,", v / 1000000);
v /= 1000000;
seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
} else if (likely(v >= 1000))
seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
else
seq_printf(seq, "%ld", v);
}
/*lge
* progress bars shamelessly adapted from driver/md/md.c
@@ -71,10 +85,15 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
seq_printf(seq, ".");
seq_printf(seq, "] ");
seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
/* if more than 1 GB display in MB */
if (mdev->rs_total > 0x100000L)
seq_printf(seq, "(%lu/%lu)M\n\t",
if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
seq_printf(seq, "verified:");
else
seq_printf(seq, "sync'ed:");
seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
/* if more than a few GB, display in MB */
if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
seq_printf(seq, "(%lu/%lu)M",
(unsigned long) Bit2KB(rs_left >> 10),
(unsigned long) Bit2KB(mdev->rs_total >> 10));
else
@@ -94,6 +113,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
/* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is
* at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
* least DRBD_SYNC_MARK_STEP time before it will be modified. */
/* ------------------------ ~18s average ------------------------ */
i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
@@ -107,14 +127,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
seq_printf(seq, "finish: %lu:%02lu:%02lu",
rt / 3600, (rt % 3600) / 60, rt % 60);
/* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */
dbdt = Bit2KB(db/dt);
if (dbdt > 1000)
seq_printf(seq, " speed: %ld,%03ld",
dbdt/1000, dbdt % 1000);
else
seq_printf(seq, " speed: %ld", dbdt);
seq_printf(seq, " speed: ");
seq_printf_with_thousands_grouping(seq, dbdt);
seq_printf(seq, " (");
/* ------------------------- ~3s average ------------------------ */
if (proc_details >= 1) {
/* this is what drbd_rs_should_slow_down() uses */
i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
if (!dt)
dt++;
db = mdev->rs_mark_left[i] - rs_left;
dbdt = Bit2KB(db/dt);
seq_printf_with_thousands_grouping(seq, dbdt);
seq_printf(seq, " -- ");
}
/* --------------------- long term average ---------------------- */
/* mean speed since syncer started
* we do account for PausedSync periods */
dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
@@ -122,20 +152,34 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
dt = 1;
db = mdev->rs_total - rs_left;
dbdt = Bit2KB(db/dt);
if (dbdt > 1000)
seq_printf(seq, " (%ld,%03ld)",
dbdt/1000, dbdt % 1000);
else
seq_printf(seq, " (%ld)", dbdt);
seq_printf_with_thousands_grouping(seq, dbdt);
seq_printf(seq, ")");
if (mdev->state.conn == C_SYNC_TARGET) {
if (mdev->c_sync_rate > 1000)
seq_printf(seq, " want: %d,%03d",
mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
else
seq_printf(seq, " want: %d", mdev->c_sync_rate);
if (mdev->state.conn == C_SYNC_TARGET ||
mdev->state.conn == C_VERIFY_S) {
seq_printf(seq, " want: ");
seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate);
}
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
if (proc_details >= 1) {
/* 64 bit:
* we convert to sectors in the display below. */
unsigned long bm_bits = drbd_bm_bits(mdev);
unsigned long bit_pos;
if (mdev->state.conn == C_VERIFY_S ||
mdev->state.conn == C_VERIFY_T)
bit_pos = bm_bits - mdev->ov_left;
else
bit_pos = mdev->bm_resync_fo;
/* Total sectors may be slightly off for oddly
* sized devices. So what. */
seq_printf(seq,
"\t%3d%% sector pos: %llu/%llu\n",
(int)(bit_pos / (bm_bits/100+1)),
(unsigned long long)bit_pos * BM_SECT_PER_BIT,
(unsigned long long)bm_bits * BM_SECT_PER_BIT);
}
}
static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
@@ -232,20 +276,16 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
mdev->epochs,
write_ordering_chars[mdev->write_ordering]
);
seq_printf(seq, " oos:%lu\n",
Bit2KB(drbd_bm_total_weight(mdev)));
seq_printf(seq, " oos:%llu\n",
Bit2KB((unsigned long long)
drbd_bm_total_weight(mdev)));
}
if (mdev->state.conn == C_SYNC_SOURCE ||
mdev->state.conn == C_SYNC_TARGET)
mdev->state.conn == C_SYNC_TARGET ||
mdev->state.conn == C_VERIFY_S ||
mdev->state.conn == C_VERIFY_T)
drbd_syncer_progress(mdev, seq);
if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
seq_printf(seq, "\t%3d%% %lu/%lu\n",
(int)((mdev->rs_total-mdev->ov_left) /
(mdev->rs_total/100+1)),
mdev->rs_total - mdev->ov_left,
mdev->rs_total);
if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
lc_seq_printf_stats(seq, mdev->resync);
lc_seq_printf_stats(seq, mdev->act_log);
@@ -265,7 +305,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
static int drbd_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, drbd_seq_show, PDE(inode)->data);
if (try_module_get(THIS_MODULE))
return single_open(file, drbd_seq_show, PDE(inode)->data);
return -ENODEV;
}
static int drbd_proc_release(struct inode *inode, struct file *file)
{
module_put(THIS_MODULE);
return single_release(inode, file);
}
/* PROC FS stuff end */

File diff suppressed because it is too large Load Diff

View File

@@ -140,9 +140,14 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
struct hlist_node *n;
struct hlist_head *slot;
/* before we can signal completion to the upper layers,
* we may need to close the current epoch */
/* Before we can signal completion to the upper layers,
* we may need to close the current epoch.
* We can skip this, if this request has not even been sent, because we
* did not have a fully established connection yet/anymore, during
* bitmap exchange, or while we are C_AHEAD due to congestion policy.
*/
if (mdev->state.conn >= C_CONNECTED &&
(s & RQ_NET_SENT) != 0 &&
req->epoch == mdev->newest_tle->br_number)
queue_barrier(mdev);
@@ -440,7 +445,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
__drbd_chk_io_error(mdev, FALSE);
__drbd_chk_io_error(mdev, false);
_req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
@@ -461,7 +466,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(!(req->rq_state & RQ_NET_MASK));
__drbd_chk_io_error(mdev, FALSE);
__drbd_chk_io_error(mdev, false);
put_ldev(mdev);
/* no point in retrying if there is no good remote data,
@@ -545,6 +550,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
case queue_for_send_oos:
req->rq_state |= RQ_NET_QUEUED;
req->w.cb = w_send_oos;
drbd_queue_work(&mdev->data.work, &req->w);
break;
case oos_handed_to_network:
/* actually the same */
case send_canceled:
/* treat it the same */
case send_failed:
@@ -558,6 +571,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
case handed_over_to_network:
/* assert something? */
if (bio_data_dir(req->master_bio) == WRITE)
atomic_add(req->size>>9, &mdev->ap_in_flight);
if (bio_data_dir(req->master_bio) == WRITE &&
mdev->net_conf->wire_protocol == DRBD_PROT_A) {
/* this is what is dangerous about protocol A:
@@ -591,6 +607,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
dec_ap_pending(mdev);
req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
req->rq_state |= RQ_NET_DONE;
if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
atomic_sub(req->size>>9, &mdev->ap_in_flight);
/* if it is still queued, we may not complete it here.
* it will be canceled soon. */
if (!(req->rq_state & RQ_NET_QUEUED))
@@ -628,14 +647,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_NET_OK;
D_ASSERT(req->rq_state & RQ_NET_PENDING);
dec_ap_pending(mdev);
atomic_sub(req->size>>9, &mdev->ap_in_flight);
req->rq_state &= ~RQ_NET_PENDING;
_req_may_be_done_not_susp(req, m);
break;
case neg_acked:
/* assert something? */
if (req->rq_state & RQ_NET_PENDING)
if (req->rq_state & RQ_NET_PENDING) {
dec_ap_pending(mdev);
atomic_sub(req->size>>9, &mdev->ap_in_flight);
}
req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
req->rq_state |= RQ_NET_DONE;
@@ -690,8 +712,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
dev_err(DEV, "FIXME (barrier_acked but pending)\n");
list_move(&req->tl_requests, &mdev->out_of_sequence_requests);
}
D_ASSERT(req->rq_state & RQ_NET_SENT);
req->rq_state |= RQ_NET_DONE;
if ((req->rq_state & RQ_NET_MASK) != 0) {
req->rq_state |= RQ_NET_DONE;
if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
atomic_sub(req->size>>9, &mdev->ap_in_flight);
}
_req_may_be_done(req, m); /* Allowed while state.susp */
break;
@@ -738,14 +763,14 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s
return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
}
static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
{
const int rw = bio_rw(bio);
const int size = bio->bi_size;
const sector_t sector = bio->bi_sector;
struct drbd_tl_epoch *b = NULL;
struct drbd_request *req;
int local, remote;
int local, remote, send_oos = 0;
int err = -EIO;
int ret = 0;
@@ -759,6 +784,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
bio_endio(bio, -ENOMEM);
return 0;
}
req->start_time = start_time;
local = get_ldev(mdev);
if (!local) {
@@ -808,9 +834,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
drbd_al_begin_io(mdev, sector);
}
remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED));
remote = remote && drbd_should_do_remote(mdev->state);
send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
D_ASSERT(!(remote && send_oos));
if (!(local || remote) && !is_susp(mdev->state)) {
if (__ratelimit(&drbd_ratelimit_state))
@@ -824,7 +850,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
* but there is a race between testing the bit and pointer outside the
* spinlock, and grabbing the spinlock.
* if we lost that race, we retry. */
if (rw == WRITE && remote &&
if (rw == WRITE && (remote || send_oos) &&
mdev->unused_spare_tle == NULL &&
test_bit(CREATE_BARRIER, &mdev->flags)) {
allocate_barrier:
@@ -842,18 +868,19 @@ allocate_barrier:
if (is_susp(mdev->state)) {
/* If we got suspended, use the retry mechanism of
generic_make_request() to restart processing of this
bio. In the next call to drbd_make_request_26
bio. In the next call to drbd_make_request
we sleep in inc_ap_bio() */
ret = 1;
spin_unlock_irq(&mdev->req_lock);
goto fail_free_complete;
}
if (remote) {
remote = (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED));
if (!remote)
if (remote || send_oos) {
remote = drbd_should_do_remote(mdev->state);
send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
D_ASSERT(!(remote && send_oos));
if (!(remote || send_oos))
dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
if (!(local || remote)) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
@@ -866,7 +893,7 @@ allocate_barrier:
mdev->unused_spare_tle = b;
b = NULL;
}
if (rw == WRITE && remote &&
if (rw == WRITE && (remote || send_oos) &&
mdev->unused_spare_tle == NULL &&
test_bit(CREATE_BARRIER, &mdev->flags)) {
/* someone closed the current epoch
@@ -889,7 +916,7 @@ allocate_barrier:
* barrier packet. To get the write ordering right, we only have to
* make sure that, if this is a write request and it triggered a
* barrier packet, this request is queued within the same spinlock. */
if (remote && mdev->unused_spare_tle &&
if ((remote || send_oos) && mdev->unused_spare_tle &&
test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
_tl_add_barrier(mdev, mdev->unused_spare_tle);
mdev->unused_spare_tle = NULL;
@@ -937,6 +964,34 @@ allocate_barrier:
? queue_for_net_write
: queue_for_net_read);
}
if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
_req_mod(req, queue_for_send_oos);
if (remote &&
mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
int congested = 0;
if (mdev->net_conf->cong_fill &&
atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
dev_info(DEV, "Congestion-fill threshold reached\n");
congested = 1;
}
if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
dev_info(DEV, "Congestion-extents threshold reached\n");
congested = 1;
}
if (congested) {
queue_barrier(mdev); /* last barrier, after mirrored writes */
if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
_drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
}
}
spin_unlock_irq(&mdev->req_lock);
kfree(b); /* if someone else has beaten us to it... */
@@ -949,9 +1004,9 @@ allocate_barrier:
* stable storage, and this is a WRITE, we may not even submit
* this bio. */
if (get_ldev(mdev)) {
if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
: DRBD_FAULT_DT_RA))
if (drbd_insert_fault(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
: DRBD_FAULT_DT_RA))
bio_endio(req->private_bio, -EIO);
else
generic_make_request(req->private_bio);
@@ -1018,16 +1073,19 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
return 0;
}
int drbd_make_request_26(struct request_queue *q, struct bio *bio)
int drbd_make_request(struct request_queue *q, struct bio *bio)
{
unsigned int s_enr, e_enr;
struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
unsigned long start_time;
if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
bio_endio(bio, -EPERM);
return 0;
}
start_time = jiffies;
/*
* what we "blindly" assume:
*/
@@ -1042,12 +1100,12 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
if (likely(s_enr == e_enr)) {
inc_ap_bio(mdev, 1);
return drbd_make_request_common(mdev, bio);
return drbd_make_request_common(mdev, bio, start_time);
}
/* can this bio be split generically?
* Maybe add our own split-arbitrary-bios function. */
if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) {
if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
/* rather error out here than BUG in bio_split */
dev_err(DEV, "bio would need to, but cannot, be split: "
"(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
@@ -1069,11 +1127,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
const int sps = 1 << HT_SHIFT; /* sectors per slot */
const int mask = sps - 1;
const sector_t first_sectors = sps - (sect & mask);
bp = bio_split(bio,
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
bio_split_pool,
#endif
first_sectors);
bp = bio_split(bio, first_sectors);
/* we need to get a "reference count" (ap_bio_cnt)
* to avoid races with the disconnect/reconnect/suspend code.
@@ -1084,10 +1138,10 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
D_ASSERT(e_enr == s_enr + 1);
while (drbd_make_request_common(mdev, &bp->bio1))
while (drbd_make_request_common(mdev, &bp->bio1, start_time))
inc_ap_bio(mdev, 1);
while (drbd_make_request_common(mdev, &bp->bio2))
while (drbd_make_request_common(mdev, &bp->bio2, start_time))
inc_ap_bio(mdev, 1);
dec_ap_bio(mdev);
@@ -1098,7 +1152,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
}
/* This is called by bio_add_page(). With this function we reduce
* the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs
* the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs
* units (was AL_EXTENTs).
*
* we do the calculation within the lower 32bit of the byte offsets,
@@ -1108,7 +1162,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
* As long as the BIO is empty we have to allow at least one bvec,
* regardless of size and offset. so the resulting bio may still
* cross extent boundaries. those are dealt with (bio_split) in
* drbd_make_request_26.
* drbd_make_request.
*/
int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
{
@@ -1118,8 +1172,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
unsigned int bio_size = bvm->bi_size;
int limit, backing_limit;
limit = DRBD_MAX_SEGMENT_SIZE
- ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size);
limit = DRBD_MAX_BIO_SIZE
- ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size);
if (limit < 0)
limit = 0;
if (bio_size == 0) {
@@ -1136,3 +1190,42 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
}
return limit;
}
void request_timer_fn(unsigned long data)
{
struct drbd_conf *mdev = (struct drbd_conf *) data;
struct drbd_request *req; /* oldest request */
struct list_head *le;
unsigned long et = 0; /* effective timeout = ko_count * timeout */
if (get_net_conf(mdev)) {
et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
put_net_conf(mdev);
}
if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
return; /* Recurring timer stopped */
spin_lock_irq(&mdev->req_lock);
le = &mdev->oldest_tle->requests;
if (list_empty(le)) {
spin_unlock_irq(&mdev->req_lock);
mod_timer(&mdev->request_timer, jiffies + et);
return;
}
le = le->prev;
req = list_entry(le, struct drbd_request, tl_requests);
if (time_is_before_eq_jiffies(req->start_time + et)) {
if (req->rq_state & RQ_NET_PENDING) {
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
} else {
dev_warn(DEV, "Local backing block device frozen?\n");
mod_timer(&mdev->request_timer, jiffies + et);
}
} else {
mod_timer(&mdev->request_timer, req->start_time + et);
}
spin_unlock_irq(&mdev->req_lock);
}

View File

@@ -82,14 +82,16 @@ enum drbd_req_event {
to_be_submitted,
/* XXX yes, now I am inconsistent...
* these two are not "events" but "actions"
* these are not "events" but "actions"
* oh, well... */
queue_for_net_write,
queue_for_net_read,
queue_for_send_oos,
send_canceled,
send_failed,
handed_over_to_network,
oos_handed_to_network,
connection_lost_while_pending,
read_retry_remote_canceled,
recv_acked_by_peer,
@@ -289,7 +291,6 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
req->epoch = 0;
req->sector = bio_src->bi_sector;
req->size = bio_src->bi_size;
req->start_time = jiffies;
INIT_HLIST_NODE(&req->colision);
INIT_LIST_HEAD(&req->tl_requests);
INIT_LIST_HEAD(&req->w.list);
@@ -321,6 +322,7 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m);
extern void complete_master_bio(struct drbd_conf *mdev,
struct bio_and_error *m);
extern void request_timer_fn(unsigned long data);
/* use this if you don't want to deal with calling complete_master_bio()
* outside the spinlock, e.g. when walking some list on cleanup. */
@@ -338,23 +340,43 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
return rv;
}
/* completion of master bio is outside of spinlock.
* If you need it irqsave, do it your self!
* Which means: don't use from bio endio callback. */
/* completion of master bio is outside of our spinlock.
* We still may or may not be inside some irqs disabled section
* of the lower level driver completion callback, so we need to
* spin_lock_irqsave here. */
static inline int req_mod(struct drbd_request *req,
enum drbd_req_event what)
{
unsigned long flags;
struct drbd_conf *mdev = req->mdev;
struct bio_and_error m;
int rv;
spin_lock_irq(&mdev->req_lock);
spin_lock_irqsave(&mdev->req_lock, flags);
rv = __req_mod(req, what, &m);
spin_unlock_irq(&mdev->req_lock);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (m.bio)
complete_master_bio(mdev, &m);
return rv;
}
static inline bool drbd_should_do_remote(union drbd_state s)
{
return s.pdsk == D_UP_TO_DATE ||
(s.pdsk >= D_INCONSISTENT &&
s.conn >= C_WF_BITMAP_T &&
s.conn < C_AHEAD);
/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
states. */
}
static inline bool drbd_should_send_oos(union drbd_state s)
{
return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
since we enter state C_AHEAD only if proto >= 96 */
}
#endif

View File

@@ -48,6 +48,8 @@ static const char *drbd_conn_s_names[] = {
[C_PAUSED_SYNC_T] = "PausedSyncT",
[C_VERIFY_S] = "VerifyS",
[C_VERIFY_T] = "VerifyT",
[C_AHEAD] = "Ahead",
[C_BEHIND] = "Behind",
};
static const char *drbd_role_s_names[] = {
@@ -92,7 +94,7 @@ static const char *drbd_state_sw_errors[] = {
const char *drbd_conn_str(enum drbd_conns s)
{
/* enums are unsigned... */
return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s];
return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s];
}
const char *drbd_role_str(enum drbd_role s)
@@ -105,7 +107,7 @@ const char *drbd_disk_str(enum drbd_disk_state s)
return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s];
}
const char *drbd_set_st_err_str(enum drbd_state_ret_codes err)
const char *drbd_set_st_err_str(enum drbd_state_rv err)
{
return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
err > SS_TWO_PRIMARIES ? "TOO_LARGE"

File diff suppressed because it is too large Load Diff

View File

@@ -39,7 +39,7 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
return;
}
if (FAULT_ACTIVE(mdev, fault_type))
if (drbd_insert_fault(mdev, fault_type))
bio_endio(bio, -EIO);
else
generic_make_request(bio);

View File

@@ -53,10 +53,10 @@
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.9"
#define REL_VERSION "8.3.10"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 95
#define PRO_VERSION_MAX 96
enum drbd_io_error_p {
@@ -96,8 +96,14 @@ enum drbd_on_no_data {
OND_SUSPEND_IO
};
enum drbd_on_congestion {
OC_BLOCK,
OC_PULL_AHEAD,
OC_DISCONNECT,
};
/* KEEP the order, do not delete or insert. Only append. */
enum drbd_ret_codes {
enum drbd_ret_code {
ERR_CODE_BASE = 100,
NO_ERROR = 101,
ERR_LOCAL_ADDR = 102,
@@ -146,6 +152,9 @@ enum drbd_ret_codes {
ERR_PERM = 152,
ERR_NEED_APV_93 = 153,
ERR_STONITH_AND_PROT_A = 154,
ERR_CONG_NOT_PROTO_A = 155,
ERR_PIC_AFTER_DEP = 156,
ERR_PIC_PEER_DEP = 157,
/* insert new ones above this line */
AFTER_LAST_ERR_CODE
@@ -199,6 +208,10 @@ enum drbd_conns {
C_VERIFY_T,
C_PAUSED_SYNC_S,
C_PAUSED_SYNC_T,
C_AHEAD,
C_BEHIND,
C_MASK = 31
};
@@ -259,7 +272,7 @@ union drbd_state {
unsigned int i;
};
enum drbd_state_ret_codes {
enum drbd_state_rv {
SS_CW_NO_NEED = 4,
SS_CW_SUCCESS = 3,
SS_NOTHING_TO_DO = 2,
@@ -290,7 +303,7 @@ enum drbd_state_ret_codes {
extern const char *drbd_conn_str(enum drbd_conns);
extern const char *drbd_role_str(enum drbd_role);
extern const char *drbd_disk_str(enum drbd_disk_state);
extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes);
extern const char *drbd_set_st_err_str(enum drbd_state_rv);
#define SHARED_SECRET_MAX 64

View File

@@ -16,7 +16,8 @@
#define DEBUG_RANGE_CHECK 0
#define DRBD_MINOR_COUNT_MIN 1
#define DRBD_MINOR_COUNT_MAX 255
#define DRBD_MINOR_COUNT_MAX 256
#define DRBD_MINOR_COUNT_DEF 32
#define DRBD_DIALOG_REFRESH_MIN 0
#define DRBD_DIALOG_REFRESH_MAX 600
@@ -129,6 +130,7 @@
#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
#define DRBD_ON_CONGESTION_DEF OC_BLOCK
#define DRBD_MAX_BIO_BVECS_MIN 0
#define DRBD_MAX_BIO_BVECS_MAX 128
@@ -154,5 +156,13 @@
#define DRBD_C_MIN_RATE_MAX (4 << 20)
#define DRBD_C_MIN_RATE_DEF 4096
#define DRBD_CONG_FILL_MIN 0
#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */
#define DRBD_CONG_FILL_DEF 0
#define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN
#define DRBD_CONG_EXTENTS_MAX DRBD_AL_EXTENTS_MAX
#define DRBD_CONG_EXTENTS_DEF DRBD_AL_EXTENTS_DEF
#undef RANGE
#endif

View File

@@ -56,6 +56,9 @@ NL_PACKET(net_conf, 5,
NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict)
NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo)
NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size)
NL_INTEGER( 81, T_MAY_IGNORE, on_congestion)
NL_INTEGER( 82, T_MAY_IGNORE, cong_fill)
NL_INTEGER( 83, T_MAY_IGNORE, cong_extents)
/* 59 addr_family was available in GIT, never released */
NL_BIT( 60, T_MANDATORY, mind_af)
NL_BIT( 27, T_MAY_IGNORE, want_lose)
@@ -66,7 +69,9 @@ NL_PACKET(net_conf, 5,
NL_BIT( 70, T_MANDATORY, dry_run)
)
NL_PACKET(disconnect, 6, )
NL_PACKET(disconnect, 6,
NL_BIT( 84, T_MAY_IGNORE, force)
)
NL_PACKET(resize, 7,
NL_INT64( 29, T_MAY_IGNORE, resize_size)
@@ -143,9 +148,13 @@ NL_PACKET(new_c_uuid, 26,
NL_BIT( 63, T_MANDATORY, clear_bm)
)
#ifdef NL_RESPONSE
NL_RESPONSE(return_code_only, 27)
#endif
#undef NL_PACKET
#undef NL_INTEGER
#undef NL_INT64
#undef NL_BIT
#undef NL_STRING
#undef NL_RESPONSE

Some files were not shown because too many files have changed in this diff Show More