Merge branch 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block: (95 commits)
  cciss: fix PCI IDs for new Smart Array controllers
  drbd: add race-breaker to drbd_go_diskless
  drbd: use dynamic_dev_dbg to optionally log uuid changes
  dynamic_debug.h: Fix dynamic_dev_dbg() macro if CONFIG_DYNAMIC_DEBUG not set
  drbd: cleanup: change "<= 0" to "== 0"
  drbd: relax the grace period of the md_sync timer again
  drbd: add some more explicit drbd_md_sync
  drbd: drop wrong debug asserts, fix recently introduced race
  drbd: cleanup useless leftover warn/error printk's
  drbd: add explicit drbd_md_sync to drbd_resync_finished
  drbd: Do not log an ASSERT for P_OV_REQUEST packets while C_CONNECTED
  drbd: fix for possible deadlock on IO error during resync
  drbd: fix unlikely access after free and list corruption
  drbd: fix for spurious fullsync (uuids rotated too fast)
  drbd: allow for explicit resync-finished notifications
  drbd: preparation commit, using full state in receive_state()
  drbd: drbd_send_ack_dp must not rely on header information
  drbd: Fix regression in recv_bm_rle_bits (compressed bitmap)
  drbd: Fixed a stupid copy and paste error
  drbd: Allow larger values for c-fill-target.
  ...

Fix up trivial conflict in drivers/block/ataflop.c due to BKL removal
This commit is contained in:
Linus Torvalds 2010-10-22 17:03:12 -07:00
commit 8abfc6e7a4
20 changed files with 2447 additions and 1405 deletions

View File

@ -115,8 +115,6 @@ static unsigned long int fd_def_df0 = FD_DD_3; /* default for df0 if it does
module_param(fd_def_df0, ulong, 0);
MODULE_LICENSE("GPL");
static struct request_queue *floppy_queue;
/*
* Macros
*/
@ -165,6 +163,7 @@ static volatile int selected = -1; /* currently selected drive */
static int writepending;
static int writefromint;
static char *raw_buf;
static int fdc_queue;
static DEFINE_SPINLOCK(amiflop_lock);
@ -1335,6 +1334,42 @@ static int get_track(int drive, int track)
return -1;
}
/*
* Round-robin between our available drives, doing one request from each
*/
static struct request *set_next_request(void)
{
struct request_queue *q;
int cnt = FD_MAX_UNITS;
struct request *rq;
/* Find next queue we can dispatch from */
fdc_queue = fdc_queue + 1;
if (fdc_queue == FD_MAX_UNITS)
fdc_queue = 0;
for(cnt = FD_MAX_UNITS; cnt > 0; cnt--) {
if (unit[fdc_queue].type->code == FD_NODRIVE) {
if (++fdc_queue == FD_MAX_UNITS)
fdc_queue = 0;
continue;
}
q = unit[fdc_queue].gendisk->queue;
if (q) {
rq = blk_fetch_request(q);
if (rq)
break;
}
if (++fdc_queue == FD_MAX_UNITS)
fdc_queue = 0;
}
return rq;
}
static void redo_fd_request(void)
{
struct request *rq;
@ -1346,7 +1381,7 @@ static void redo_fd_request(void)
int err;
next_req:
rq = blk_fetch_request(floppy_queue);
rq = set_next_request();
if (!rq) {
/* Nothing left to do */
return;
@ -1683,6 +1718,13 @@ static int __init fd_probe_drives(void)
continue;
}
unit[drive].gendisk = disk;
disk->queue = blk_init_queue(do_fd_request, &amiflop_lock);
if (!disk->queue) {
unit[drive].type->code = FD_NODRIVE;
continue;
}
drives++;
if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) {
printk("no mem for ");
@ -1696,7 +1738,6 @@ static int __init fd_probe_drives(void)
disk->fops = &floppy_fops;
sprintf(disk->disk_name, "fd%d", drive);
disk->private_data = &unit[drive];
disk->queue = floppy_queue;
set_capacity(disk, 880*2);
add_disk(disk);
}
@ -1744,11 +1785,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
goto out_irq2;
}
ret = -ENOMEM;
floppy_queue = blk_init_queue(do_fd_request, &amiflop_lock);
if (!floppy_queue)
goto out_queue;
ret = -ENODEV;
if (fd_probe_drives() < 1) /* No usable drives */
goto out_probe;
@ -1792,8 +1828,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
return 0;
out_probe:
blk_cleanup_queue(floppy_queue);
out_queue:
free_irq(IRQ_AMIGA_CIAA_TB, NULL);
out_irq2:
free_irq(IRQ_AMIGA_DSKBLK, NULL);
@ -1811,9 +1845,12 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev)
for( i = 0; i < FD_MAX_UNITS; i++) {
if (unit[i].type->code != FD_NODRIVE) {
struct request_queue *q = unit[i].gendisk->queue;
del_gendisk(unit[i].gendisk);
put_disk(unit[i].gendisk);
kfree(unit[i].trackbuf);
if (q)
blk_cleanup_queue(q);
}
}
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
@ -1821,7 +1858,6 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev)
free_irq(IRQ_AMIGA_DSKBLK, NULL);
custom.dmacon = DMAF_DISK; /* disable DMA */
amiga_chip_free(raw_buf);
blk_cleanup_queue(floppy_queue);
unregister_blkdev(FLOPPY_MAJOR, "fd");
}
#endif

View File

@ -80,8 +80,8 @@
#undef DEBUG
static DEFINE_MUTEX(ataflop_mutex);
static struct request_queue *floppy_queue;
static struct request *fd_request;
static int fdc_queue;
/* Disk types: DD, HD, ED */
static struct atari_disk_type {
@ -1392,6 +1392,29 @@ static void setup_req_params( int drive )
ReqTrack, ReqSector, (unsigned long)ReqData ));
}
/*
* Round-robin between our available drives, doing one request from each
*/
static struct request *set_next_request(void)
{
struct request_queue *q;
int old_pos = fdc_queue;
struct request *rq;
do {
q = unit[fdc_queue].disk->queue;
if (++fdc_queue == FD_MAX_UNITS)
fdc_queue = 0;
if (q) {
rq = blk_fetch_request(q);
if (rq)
break;
}
} while (fdc_queue != old_pos);
return rq;
}
static void redo_fd_request(void)
{
@ -1406,7 +1429,7 @@ static void redo_fd_request(void)
repeat:
if (!fd_request) {
fd_request = blk_fetch_request(floppy_queue);
fd_request = set_next_request();
if (!fd_request)
goto the_end;
}
@ -1933,10 +1956,6 @@ static int __init atari_floppy_init (void)
PhysTrackBuffer = virt_to_phys(TrackBuffer);
BufferDrive = BufferSide = BufferTrack = -1;
floppy_queue = blk_init_queue(do_fd_request, &ataflop_lock);
if (!floppy_queue)
goto Enomem;
for (i = 0; i < FD_MAX_UNITS; i++) {
unit[i].track = -1;
unit[i].flags = 0;
@ -1945,7 +1964,10 @@ static int __init atari_floppy_init (void)
sprintf(unit[i].disk->disk_name, "fd%d", i);
unit[i].disk->fops = &floppy_fops;
unit[i].disk->private_data = &unit[i];
unit[i].disk->queue = floppy_queue;
unit[i].disk->queue = blk_init_queue(do_fd_request,
&ataflop_lock);
if (!unit[i].disk->queue)
goto Enomem;
set_capacity(unit[i].disk, MAX_DISK_SIZE * 2);
add_disk(unit[i].disk);
}
@ -1960,10 +1982,14 @@ static int __init atari_floppy_init (void)
return 0;
Enomem:
while (i--)
while (i--) {
struct request_queue *q = unit[i].disk->queue;
put_disk(unit[i].disk);
if (floppy_queue)
blk_cleanup_queue(floppy_queue);
if (q)
blk_cleanup_queue(q);
}
unregister_blkdev(FLOPPY_MAJOR, "fd");
return -ENOMEM;
}
@ -2012,12 +2038,14 @@ static void __exit atari_floppy_exit(void)
int i;
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
for (i = 0; i < FD_MAX_UNITS; i++) {
struct request_queue *q = unit[i].disk->queue;
del_gendisk(unit[i].disk);
put_disk(unit[i].disk);
blk_cleanup_queue(q);
}
unregister_blkdev(FLOPPY_MAJOR, "fd");
blk_cleanup_queue(floppy_queue);
del_timer_sync(&fd_timer);
atari_stram_free( DMABuffer );
}

View File

@ -105,11 +105,12 @@ static const struct pci_device_id cciss_pci_device_id[] = {
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3250},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3251},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3252},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3253},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3254},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3350},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3351},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3352},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3353},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3354},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3355},
{0,}
};
@ -149,11 +150,12 @@ static struct board_type products[] = {
{0x3249103C, "Smart Array P812", &SA5_access},
{0x324A103C, "Smart Array P712m", &SA5_access},
{0x324B103C, "Smart Array P711m", &SA5_access},
{0x3250103C, "Smart Array", &SA5_access},
{0x3251103C, "Smart Array", &SA5_access},
{0x3252103C, "Smart Array", &SA5_access},
{0x3253103C, "Smart Array", &SA5_access},
{0x3254103C, "Smart Array", &SA5_access},
{0x3350103C, "Smart Array", &SA5_access},
{0x3351103C, "Smart Array", &SA5_access},
{0x3352103C, "Smart Array", &SA5_access},
{0x3353103C, "Smart Array", &SA5_access},
{0x3354103C, "Smart Array", &SA5_access},
{0x3355103C, "Smart Array", &SA5_access},
};
/* How long to wait (in milliseconds) for board to go into simple mode */
@ -1232,470 +1234,452 @@ static void check_ioctl_unit_attention(ctlr_info_t *h, CommandList_struct *c)
c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
(void)check_for_unit_attention(h, c);
}
/*
* ioctl
*/
static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp)
{
cciss_pci_info_struct pciinfo;
if (!argp)
return -EINVAL;
pciinfo.domain = pci_domain_nr(h->pdev->bus);
pciinfo.bus = h->pdev->bus->number;
pciinfo.dev_fn = h->pdev->devfn;
pciinfo.board_id = h->board_id;
if (copy_to_user(argp, &pciinfo, sizeof(cciss_pci_info_struct)))
return -EFAULT;
return 0;
}
static int cciss_getintinfo(ctlr_info_t *h, void __user *argp)
{
cciss_coalint_struct intinfo;
if (!argp)
return -EINVAL;
intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay);
intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount);
if (copy_to_user
(argp, &intinfo, sizeof(cciss_coalint_struct)))
return -EFAULT;
return 0;
}
static int cciss_setintinfo(ctlr_info_t *h, void __user *argp)
{
cciss_coalint_struct intinfo;
unsigned long flags;
int i;
if (!argp)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&intinfo, argp, sizeof(intinfo)))
return -EFAULT;
if ((intinfo.delay == 0) && (intinfo.count == 0))
return -EINVAL;
spin_lock_irqsave(&h->lock, flags);
/* Update the field, and then ring the doorbell */
writel(intinfo.delay, &(h->cfgtable->HostWrite.CoalIntDelay));
writel(intinfo.count, &(h->cfgtable->HostWrite.CoalIntCount));
writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
break;
udelay(1000); /* delay and try again */
}
spin_unlock_irqrestore(&h->lock, flags);
if (i >= MAX_IOCTL_CONFIG_WAIT)
return -EAGAIN;
return 0;
}
static int cciss_getnodename(ctlr_info_t *h, void __user *argp)
{
NodeName_type NodeName;
int i;
if (!argp)
return -EINVAL;
for (i = 0; i < 16; i++)
NodeName[i] = readb(&h->cfgtable->ServerName[i]);
if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
return -EFAULT;
return 0;
}
static int cciss_setnodename(ctlr_info_t *h, void __user *argp)
{
NodeName_type NodeName;
unsigned long flags;
int i;
if (!argp)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(NodeName, argp, sizeof(NodeName_type)))
return -EFAULT;
spin_lock_irqsave(&h->lock, flags);
/* Update the field, and then ring the doorbell */
for (i = 0; i < 16; i++)
writeb(NodeName[i], &h->cfgtable->ServerName[i]);
writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
break;
udelay(1000); /* delay and try again */
}
spin_unlock_irqrestore(&h->lock, flags);
if (i >= MAX_IOCTL_CONFIG_WAIT)
return -EAGAIN;
return 0;
}
static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp)
{
Heartbeat_type heartbeat;
if (!argp)
return -EINVAL;
heartbeat = readl(&h->cfgtable->HeartBeat);
if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type)))
return -EFAULT;
return 0;
}
static int cciss_getbustypes(ctlr_info_t *h, void __user *argp)
{
BusTypes_type BusTypes;
if (!argp)
return -EINVAL;
BusTypes = readl(&h->cfgtable->BusTypes);
if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type)))
return -EFAULT;
return 0;
}
static int cciss_getfirmver(ctlr_info_t *h, void __user *argp)
{
FirmwareVer_type firmware;
if (!argp)
return -EINVAL;
memcpy(firmware, h->firm_ver, 4);
if (copy_to_user
(argp, firmware, sizeof(FirmwareVer_type)))
return -EFAULT;
return 0;
}
static int cciss_getdrivver(ctlr_info_t *h, void __user *argp)
{
DriverVer_type DriverVer = DRIVER_VERSION;
if (!argp)
return -EINVAL;
if (copy_to_user(argp, &DriverVer, sizeof(DriverVer_type)))
return -EFAULT;
return 0;
}
static int cciss_getluninfo(ctlr_info_t *h,
struct gendisk *disk, void __user *argp)
{
LogvolInfo_struct luninfo;
drive_info_struct *drv = get_drv(disk);
if (!argp)
return -EINVAL;
memcpy(&luninfo.LunID, drv->LunID, sizeof(luninfo.LunID));
luninfo.num_opens = drv->usage_count;
luninfo.num_parts = 0;
if (copy_to_user(argp, &luninfo, sizeof(LogvolInfo_struct)))
return -EFAULT;
return 0;
}
static int cciss_passthru(ctlr_info_t *h, void __user *argp)
{
IOCTL_Command_struct iocommand;
CommandList_struct *c;
char *buff = NULL;
u64bit temp64;
DECLARE_COMPLETION_ONSTACK(wait);
if (!argp)
return -EINVAL;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
if (copy_from_user
(&iocommand, argp, sizeof(IOCTL_Command_struct)))
return -EFAULT;
if ((iocommand.buf_size < 1) &&
(iocommand.Request.Type.Direction != XFER_NONE)) {
return -EINVAL;
}
if (iocommand.buf_size > 0) {
buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
if (buff == NULL)
return -EFAULT;
}
if (iocommand.Request.Type.Direction == XFER_WRITE) {
/* Copy the data into the buffer we created */
if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) {
kfree(buff);
return -EFAULT;
}
} else {
memset(buff, 0, iocommand.buf_size);
}
c = cmd_special_alloc(h);
if (!c) {
kfree(buff);
return -ENOMEM;
}
/* Fill in the command type */
c->cmd_type = CMD_IOCTL_PEND;
/* Fill in Command Header */
c->Header.ReplyQueue = 0; /* unused in simple mode */
if (iocommand.buf_size > 0) { /* buffer to fill */
c->Header.SGList = 1;
c->Header.SGTotal = 1;
} else { /* no buffers to fill */
c->Header.SGList = 0;
c->Header.SGTotal = 0;
}
c->Header.LUN = iocommand.LUN_info;
/* use the kernel address the cmd block for tag */
c->Header.Tag.lower = c->busaddr;
/* Fill in Request block */
c->Request = iocommand.Request;
/* Fill in the scatter gather information */
if (iocommand.buf_size > 0) {
temp64.val = pci_map_single(h->pdev, buff,
iocommand.buf_size, PCI_DMA_BIDIRECTIONAL);
c->SG[0].Addr.lower = temp64.val32.lower;
c->SG[0].Addr.upper = temp64.val32.upper;
c->SG[0].Len = iocommand.buf_size;
c->SG[0].Ext = 0; /* we are not chaining */
}
c->waiting = &wait;
enqueue_cmd_and_start_io(h, c);
wait_for_completion(&wait);
/* unlock the buffers from DMA */
temp64.val32.lower = c->SG[0].Addr.lower;
temp64.val32.upper = c->SG[0].Addr.upper;
pci_unmap_single(h->pdev, (dma_addr_t) temp64.val, iocommand.buf_size,
PCI_DMA_BIDIRECTIONAL);
check_ioctl_unit_attention(h, c);
/* Copy the error information out */
iocommand.error_info = *(c->err_info);
if (copy_to_user(argp, &iocommand, sizeof(IOCTL_Command_struct))) {
kfree(buff);
cmd_special_free(h, c);
return -EFAULT;
}
if (iocommand.Request.Type.Direction == XFER_READ) {
/* Copy the data out of the buffer we created */
if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) {
kfree(buff);
cmd_special_free(h, c);
return -EFAULT;
}
}
kfree(buff);
cmd_special_free(h, c);
return 0;
}
static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp)
{
BIG_IOCTL_Command_struct *ioc;
CommandList_struct *c;
unsigned char **buff = NULL;
int *buff_size = NULL;
u64bit temp64;
BYTE sg_used = 0;
int status = 0;
int i;
DECLARE_COMPLETION_ONSTACK(wait);
__u32 left;
__u32 sz;
BYTE __user *data_ptr;
if (!argp)
return -EINVAL;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
ioc = (BIG_IOCTL_Command_struct *)
kmalloc(sizeof(*ioc), GFP_KERNEL);
if (!ioc) {
status = -ENOMEM;
goto cleanup1;
}
if (copy_from_user(ioc, argp, sizeof(*ioc))) {
status = -EFAULT;
goto cleanup1;
}
if ((ioc->buf_size < 1) &&
(ioc->Request.Type.Direction != XFER_NONE)) {
status = -EINVAL;
goto cleanup1;
}
/* Check kmalloc limits using all SGs */
if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
status = -EINVAL;
goto cleanup1;
}
if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
status = -EINVAL;
goto cleanup1;
}
buff = kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
if (!buff) {
status = -ENOMEM;
goto cleanup1;
}
buff_size = kmalloc(MAXSGENTRIES * sizeof(int), GFP_KERNEL);
if (!buff_size) {
status = -ENOMEM;
goto cleanup1;
}
left = ioc->buf_size;
data_ptr = ioc->buf;
while (left) {
sz = (left > ioc->malloc_size) ? ioc->malloc_size : left;
buff_size[sg_used] = sz;
buff[sg_used] = kmalloc(sz, GFP_KERNEL);
if (buff[sg_used] == NULL) {
status = -ENOMEM;
goto cleanup1;
}
if (ioc->Request.Type.Direction == XFER_WRITE) {
if (copy_from_user(buff[sg_used], data_ptr, sz)) {
status = -EFAULT;
goto cleanup1;
}
} else {
memset(buff[sg_used], 0, sz);
}
left -= sz;
data_ptr += sz;
sg_used++;
}
c = cmd_special_alloc(h);
if (!c) {
status = -ENOMEM;
goto cleanup1;
}
c->cmd_type = CMD_IOCTL_PEND;
c->Header.ReplyQueue = 0;
c->Header.SGList = sg_used;
c->Header.SGTotal = sg_used;
c->Header.LUN = ioc->LUN_info;
c->Header.Tag.lower = c->busaddr;
c->Request = ioc->Request;
for (i = 0; i < sg_used; i++) {
temp64.val = pci_map_single(h->pdev, buff[i], buff_size[i],
PCI_DMA_BIDIRECTIONAL);
c->SG[i].Addr.lower = temp64.val32.lower;
c->SG[i].Addr.upper = temp64.val32.upper;
c->SG[i].Len = buff_size[i];
c->SG[i].Ext = 0; /* we are not chaining */
}
c->waiting = &wait;
enqueue_cmd_and_start_io(h, c);
wait_for_completion(&wait);
/* unlock the buffers from DMA */
for (i = 0; i < sg_used; i++) {
temp64.val32.lower = c->SG[i].Addr.lower;
temp64.val32.upper = c->SG[i].Addr.upper;
pci_unmap_single(h->pdev,
(dma_addr_t) temp64.val, buff_size[i],
PCI_DMA_BIDIRECTIONAL);
}
check_ioctl_unit_attention(h, c);
/* Copy the error information out */
ioc->error_info = *(c->err_info);
if (copy_to_user(argp, ioc, sizeof(*ioc))) {
cmd_special_free(h, c);
status = -EFAULT;
goto cleanup1;
}
if (ioc->Request.Type.Direction == XFER_READ) {
/* Copy the data out of the buffer we created */
BYTE __user *ptr = ioc->buf;
for (i = 0; i < sg_used; i++) {
if (copy_to_user(ptr, buff[i], buff_size[i])) {
cmd_special_free(h, c);
status = -EFAULT;
goto cleanup1;
}
ptr += buff_size[i];
}
}
cmd_special_free(h, c);
status = 0;
cleanup1:
if (buff) {
for (i = 0; i < sg_used; i++)
kfree(buff[i]);
kfree(buff);
}
kfree(buff_size);
kfree(ioc);
return status;
}
static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
unsigned int cmd, unsigned long arg)
{
struct gendisk *disk = bdev->bd_disk;
ctlr_info_t *h = get_host(disk);
drive_info_struct *drv = get_drv(disk);
void __user *argp = (void __user *)arg;
dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n",
cmd, arg);
switch (cmd) {
case CCISS_GETPCIINFO:
{
cciss_pci_info_struct pciinfo;
if (!arg)
return -EINVAL;
pciinfo.domain = pci_domain_nr(h->pdev->bus);
pciinfo.bus = h->pdev->bus->number;
pciinfo.dev_fn = h->pdev->devfn;
pciinfo.board_id = h->board_id;
if (copy_to_user
(argp, &pciinfo, sizeof(cciss_pci_info_struct)))
return -EFAULT;
return 0;
}
return cciss_getpciinfo(h, argp);
case CCISS_GETINTINFO:
{
cciss_coalint_struct intinfo;
if (!arg)
return -EINVAL;
intinfo.delay =
readl(&h->cfgtable->HostWrite.CoalIntDelay);
intinfo.count =
readl(&h->cfgtable->HostWrite.CoalIntCount);
if (copy_to_user
(argp, &intinfo, sizeof(cciss_coalint_struct)))
return -EFAULT;
return 0;
}
return cciss_getintinfo(h, argp);
case CCISS_SETINTINFO:
{
cciss_coalint_struct intinfo;
unsigned long flags;
int i;
if (!arg)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user
(&intinfo, argp, sizeof(cciss_coalint_struct)))
return -EFAULT;
if ((intinfo.delay == 0) && (intinfo.count == 0))
return -EINVAL;
spin_lock_irqsave(&h->lock, flags);
/* Update the field, and then ring the doorbell */
writel(intinfo.delay,
&(h->cfgtable->HostWrite.CoalIntDelay));
writel(intinfo.count,
&(h->cfgtable->HostWrite.CoalIntCount));
writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
if (!(readl(h->vaddr + SA5_DOORBELL)
& CFGTBL_ChangeReq))
break;
/* delay and try again */
udelay(1000);
}
spin_unlock_irqrestore(&h->lock, flags);
if (i >= MAX_IOCTL_CONFIG_WAIT)
return -EAGAIN;
return 0;
}
return cciss_setintinfo(h, argp);
case CCISS_GETNODENAME:
{
NodeName_type NodeName;
int i;
if (!arg)
return -EINVAL;
for (i = 0; i < 16; i++)
NodeName[i] =
readb(&h->cfgtable->ServerName[i]);
if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
return -EFAULT;
return 0;
}
return cciss_getnodename(h, argp);
case CCISS_SETNODENAME:
{
NodeName_type NodeName;
unsigned long flags;
int i;
if (!arg)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user
(NodeName, argp, sizeof(NodeName_type)))
return -EFAULT;
spin_lock_irqsave(&h->lock, flags);
/* Update the field, and then ring the doorbell */
for (i = 0; i < 16; i++)
writeb(NodeName[i],
&h->cfgtable->ServerName[i]);
writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
if (!(readl(h->vaddr + SA5_DOORBELL)
& CFGTBL_ChangeReq))
break;
/* delay and try again */
udelay(1000);
}
spin_unlock_irqrestore(&h->lock, flags);
if (i >= MAX_IOCTL_CONFIG_WAIT)
return -EAGAIN;
return 0;
}
return cciss_setnodename(h, argp);
case CCISS_GETHEARTBEAT:
{
Heartbeat_type heartbeat;
if (!arg)
return -EINVAL;
heartbeat = readl(&h->cfgtable->HeartBeat);
if (copy_to_user
(argp, &heartbeat, sizeof(Heartbeat_type)))
return -EFAULT;
return 0;
}
return cciss_getheartbeat(h, argp);
case CCISS_GETBUSTYPES:
{
BusTypes_type BusTypes;
if (!arg)
return -EINVAL;
BusTypes = readl(&h->cfgtable->BusTypes);
if (copy_to_user
(argp, &BusTypes, sizeof(BusTypes_type)))
return -EFAULT;
return 0;
}
return cciss_getbustypes(h, argp);
case CCISS_GETFIRMVER:
{
FirmwareVer_type firmware;
if (!arg)
return -EINVAL;
memcpy(firmware, h->firm_ver, 4);
if (copy_to_user
(argp, firmware, sizeof(FirmwareVer_type)))
return -EFAULT;
return 0;
}
return cciss_getfirmver(h, argp);
case CCISS_GETDRIVVER:
{
DriverVer_type DriverVer = DRIVER_VERSION;
if (!arg)
return -EINVAL;
if (copy_to_user
(argp, &DriverVer, sizeof(DriverVer_type)))
return -EFAULT;
return 0;
}
return cciss_getdrivver(h, argp);
case CCISS_DEREGDISK:
case CCISS_REGNEWD:
case CCISS_REVALIDVOLS:
return rebuild_lun_table(h, 0, 1);
case CCISS_GETLUNINFO:{
LogvolInfo_struct luninfo;
memcpy(&luninfo.LunID, drv->LunID,
sizeof(luninfo.LunID));
luninfo.num_opens = drv->usage_count;
luninfo.num_parts = 0;
if (copy_to_user(argp, &luninfo,
sizeof(LogvolInfo_struct)))
return -EFAULT;
return 0;
}
case CCISS_GETLUNINFO:
return cciss_getluninfo(h, disk, argp);
case CCISS_PASSTHRU:
{
IOCTL_Command_struct iocommand;
CommandList_struct *c;
char *buff = NULL;
u64bit temp64;
DECLARE_COMPLETION_ONSTACK(wait);
if (!arg)
return -EINVAL;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
if (copy_from_user
(&iocommand, argp, sizeof(IOCTL_Command_struct)))
return -EFAULT;
if ((iocommand.buf_size < 1) &&
(iocommand.Request.Type.Direction != XFER_NONE)) {
return -EINVAL;
}
#if 0 /* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */
/* Check kmalloc limits */
if (iocommand.buf_size > 128000)
return -EINVAL;
#endif
if (iocommand.buf_size > 0) {
buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
if (buff == NULL)
return -EFAULT;
}
if (iocommand.Request.Type.Direction == XFER_WRITE) {
/* Copy the data into the buffer we created */
if (copy_from_user
(buff, iocommand.buf, iocommand.buf_size)) {
kfree(buff);
return -EFAULT;
}
} else {
memset(buff, 0, iocommand.buf_size);
}
c = cmd_special_alloc(h);
if (!c) {
kfree(buff);
return -ENOMEM;
}
/* Fill in the command type */
c->cmd_type = CMD_IOCTL_PEND;
/* Fill in Command Header */
c->Header.ReplyQueue = 0; /* unused in simple mode */
if (iocommand.buf_size > 0) /* buffer to fill */
{
c->Header.SGList = 1;
c->Header.SGTotal = 1;
} else /* no buffers to fill */
{
c->Header.SGList = 0;
c->Header.SGTotal = 0;
}
c->Header.LUN = iocommand.LUN_info;
/* use the kernel address the cmd block for tag */
c->Header.Tag.lower = c->busaddr;
/* Fill in Request block */
c->Request = iocommand.Request;
/* Fill in the scatter gather information */
if (iocommand.buf_size > 0) {
temp64.val = pci_map_single(h->pdev, buff,
iocommand.buf_size,
PCI_DMA_BIDIRECTIONAL);
c->SG[0].Addr.lower = temp64.val32.lower;
c->SG[0].Addr.upper = temp64.val32.upper;
c->SG[0].Len = iocommand.buf_size;
c->SG[0].Ext = 0; /* we are not chaining */
}
c->waiting = &wait;
enqueue_cmd_and_start_io(h, c);
wait_for_completion(&wait);
/* unlock the buffers from DMA */
temp64.val32.lower = c->SG[0].Addr.lower;
temp64.val32.upper = c->SG[0].Addr.upper;
pci_unmap_single(h->pdev, (dma_addr_t) temp64.val,
iocommand.buf_size,
PCI_DMA_BIDIRECTIONAL);
check_ioctl_unit_attention(h, c);
/* Copy the error information out */
iocommand.error_info = *(c->err_info);
if (copy_to_user
(argp, &iocommand, sizeof(IOCTL_Command_struct))) {
kfree(buff);
cmd_special_free(h, c);
return -EFAULT;
}
if (iocommand.Request.Type.Direction == XFER_READ) {
/* Copy the data out of the buffer we created */
if (copy_to_user
(iocommand.buf, buff, iocommand.buf_size)) {
kfree(buff);
cmd_special_free(h, c);
return -EFAULT;
}
}
kfree(buff);
cmd_special_free(h, c);
return 0;
}
case CCISS_BIG_PASSTHRU:{
BIG_IOCTL_Command_struct *ioc;
CommandList_struct *c;
unsigned char **buff = NULL;
int *buff_size = NULL;
u64bit temp64;
BYTE sg_used = 0;
int status = 0;
int i;
DECLARE_COMPLETION_ONSTACK(wait);
__u32 left;
__u32 sz;
BYTE __user *data_ptr;
if (!arg)
return -EINVAL;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
ioc = (BIG_IOCTL_Command_struct *)
kmalloc(sizeof(*ioc), GFP_KERNEL);
if (!ioc) {
status = -ENOMEM;
goto cleanup1;
}
if (copy_from_user(ioc, argp, sizeof(*ioc))) {
status = -EFAULT;
goto cleanup1;
}
if ((ioc->buf_size < 1) &&
(ioc->Request.Type.Direction != XFER_NONE)) {
status = -EINVAL;
goto cleanup1;
}
/* Check kmalloc limits using all SGs */
if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
status = -EINVAL;
goto cleanup1;
}
if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
status = -EINVAL;
goto cleanup1;
}
buff =
kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
if (!buff) {
status = -ENOMEM;
goto cleanup1;
}
buff_size = kmalloc(MAXSGENTRIES * sizeof(int),
GFP_KERNEL);
if (!buff_size) {
status = -ENOMEM;
goto cleanup1;
}
left = ioc->buf_size;
data_ptr = ioc->buf;
while (left) {
sz = (left >
ioc->malloc_size) ? ioc->
malloc_size : left;
buff_size[sg_used] = sz;
buff[sg_used] = kmalloc(sz, GFP_KERNEL);
if (buff[sg_used] == NULL) {
status = -ENOMEM;
goto cleanup1;
}
if (ioc->Request.Type.Direction == XFER_WRITE) {
if (copy_from_user
(buff[sg_used], data_ptr, sz)) {
status = -EFAULT;
goto cleanup1;
}
} else {
memset(buff[sg_used], 0, sz);
}
left -= sz;
data_ptr += sz;
sg_used++;
}
c = cmd_special_alloc(h);
if (!c) {
status = -ENOMEM;
goto cleanup1;
}
c->cmd_type = CMD_IOCTL_PEND;
c->Header.ReplyQueue = 0;
if (ioc->buf_size > 0) {
c->Header.SGList = sg_used;
c->Header.SGTotal = sg_used;
} else {
c->Header.SGList = 0;
c->Header.SGTotal = 0;
}
c->Header.LUN = ioc->LUN_info;
c->Header.Tag.lower = c->busaddr;
c->Request = ioc->Request;
if (ioc->buf_size > 0) {
for (i = 0; i < sg_used; i++) {
temp64.val =
pci_map_single(h->pdev, buff[i],
buff_size[i],
PCI_DMA_BIDIRECTIONAL);
c->SG[i].Addr.lower =
temp64.val32.lower;
c->SG[i].Addr.upper =
temp64.val32.upper;
c->SG[i].Len = buff_size[i];
c->SG[i].Ext = 0; /* we are not chaining */
}
}
c->waiting = &wait;
enqueue_cmd_and_start_io(h, c);
wait_for_completion(&wait);
/* unlock the buffers from DMA */
for (i = 0; i < sg_used; i++) {
temp64.val32.lower = c->SG[i].Addr.lower;
temp64.val32.upper = c->SG[i].Addr.upper;
pci_unmap_single(h->pdev,
(dma_addr_t) temp64.val, buff_size[i],
PCI_DMA_BIDIRECTIONAL);
}
check_ioctl_unit_attention(h, c);
/* Copy the error information out */
ioc->error_info = *(c->err_info);
if (copy_to_user(argp, ioc, sizeof(*ioc))) {
cmd_special_free(h, c);
status = -EFAULT;
goto cleanup1;
}
if (ioc->Request.Type.Direction == XFER_READ) {
/* Copy the data out of the buffer we created */
BYTE __user *ptr = ioc->buf;
for (i = 0; i < sg_used; i++) {
if (copy_to_user
(ptr, buff[i], buff_size[i])) {
cmd_special_free(h, c);
status = -EFAULT;
goto cleanup1;
}
ptr += buff_size[i];
}
}
cmd_special_free(h, c);
status = 0;
cleanup1:
if (buff) {
for (i = 0; i < sg_used; i++)
kfree(buff[i]);
kfree(buff);
}
kfree(buff_size);
kfree(ioc);
return status;
}
return cciss_passthru(h, argp);
case CCISS_BIG_PASSTHRU:
return cciss_bigpassthru(h, argp);
/* scsi_cmd_ioctl handles these, below, though some are not */
/* very meaningful for cciss. SG_IO is the main one people want. */

View File

@ -965,29 +965,30 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
* ok, (capacity & 7) != 0 sometimes, but who cares...
* we count rs_{total,left} in bits, not sectors.
*/
spin_lock_irqsave(&mdev->al_lock, flags);
count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
if (count) {
/* we need the lock for drbd_try_clear_on_disk_bm */
if (jiffies - mdev->rs_mark_time > HZ*10) {
/* should be rolling marks,
* but we estimate only anyways. */
if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) &&
if (count && get_ldev(mdev)) {
unsigned long now = jiffies;
unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
unsigned long tw = drbd_bm_total_weight(mdev);
if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
mdev->state.conn != C_PAUSED_SYNC_T &&
mdev->state.conn != C_PAUSED_SYNC_S) {
mdev->rs_mark_time = jiffies;
mdev->rs_mark_left = drbd_bm_total_weight(mdev);
mdev->rs_mark_time[next] = now;
mdev->rs_mark_left[next] = tw;
mdev->rs_last_mark = next;
}
}
if (get_ldev(mdev)) {
drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
put_ldev(mdev);
}
spin_lock_irqsave(&mdev->al_lock, flags);
drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
spin_unlock_irqrestore(&mdev->al_lock, flags);
/* just wake_up unconditional now, various lc_chaged(),
* lc_put() in drbd_try_clear_on_disk_bm(). */
wake_up = 1;
put_ldev(mdev);
}
spin_unlock_irqrestore(&mdev->al_lock, flags);
if (wake_up)
wake_up(&mdev->al_wait);
}
@ -1118,7 +1119,7 @@ static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
* @mdev: DRBD device.
* @sector: The sector number.
*
* This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
* This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
*/
int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
{
@ -1129,10 +1130,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
sig = wait_event_interruptible(mdev->al_wait,
(bm_ext = _bme_get(mdev, enr)));
if (sig)
return 0;
return -EINTR;
if (test_bit(BME_LOCKED, &bm_ext->flags))
return 1;
return 0;
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(mdev->al_wait,
@ -1145,13 +1146,11 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
wake_up(&mdev->al_wait);
}
spin_unlock_irq(&mdev->al_lock);
return 0;
return -EINTR;
}
}
set_bit(BME_LOCKED, &bm_ext->flags);
return 1;
return 0;
}
/**

View File

@ -569,7 +569,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
*
* maybe bm_set should be atomic_t ?
*/
static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
{
struct drbd_bitmap *b = mdev->bitmap;
unsigned long s;

View File

@ -337,13 +337,25 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
* NOTE that the payload starts at a long aligned offset,
* regardless of 32 or 64 bit arch!
*/
struct p_header {
struct p_header80 {
u32 magic;
u16 command;
u16 length; /* bytes of data after this header */
u8 payload[0];
} __packed;
/* 8 bytes. packet FIXED for the next century! */
/* Header for big packets, Used for data packets exceeding 64kB */
struct p_header95 {
u16 magic; /* use DRBD_MAGIC_BIG here */
u16 command;
u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */
u8 payload[0];
} __packed;
union p_header {
struct p_header80 h80;
struct p_header95 h95;
};
/*
* short commands, packets without payload, plain p_header:
@ -362,12 +374,16 @@ struct p_header {
*/
/* these defines must not be changed without changing the protocol version */
#define DP_HARDBARRIER 1
#define DP_RW_SYNC 2
#define DP_HARDBARRIER 1 /* depricated */
#define DP_RW_SYNC 2 /* equals REQ_SYNC */
#define DP_MAY_SET_IN_SYNC 4
#define DP_UNPLUG 8 /* equals REQ_UNPLUG */
#define DP_FUA 16 /* equals REQ_FUA */
#define DP_FLUSH 32 /* equals REQ_FLUSH */
#define DP_DISCARD 64 /* equals REQ_DISCARD */
struct p_data {
struct p_header head;
union p_header head;
u64 sector; /* 64 bits sector number */
u64 block_id; /* to identify the request in protocol B&C */
u32 seq_num;
@ -383,7 +399,7 @@ struct p_data {
* P_DATA_REQUEST, P_RS_DATA_REQUEST
*/
struct p_block_ack {
struct p_header head;
struct p_header80 head;
u64 sector;
u64 block_id;
u32 blksize;
@ -392,7 +408,7 @@ struct p_block_ack {
struct p_block_req {
struct p_header head;
struct p_header80 head;
u64 sector;
u64 block_id;
u32 blksize;
@ -409,7 +425,7 @@ struct p_block_req {
*/
struct p_handshake {
struct p_header head; /* 8 bytes */
struct p_header80 head; /* 8 bytes */
u32 protocol_min;
u32 feature_flags;
u32 protocol_max;
@ -424,19 +440,19 @@ struct p_handshake {
/* 80 bytes, FIXED for the next century */
struct p_barrier {
struct p_header head;
struct p_header80 head;
u32 barrier; /* barrier number _handle_ only */
u32 pad; /* to multiple of 8 Byte */
} __packed;
struct p_barrier_ack {
struct p_header head;
struct p_header80 head;
u32 barrier;
u32 set_size;
} __packed;
struct p_rs_param {
struct p_header head;
struct p_header80 head;
u32 rate;
/* Since protocol version 88 and higher. */
@ -444,20 +460,31 @@ struct p_rs_param {
} __packed;
struct p_rs_param_89 {
struct p_header head;
struct p_header80 head;
u32 rate;
/* protocol version 89: */
char verify_alg[SHARED_SECRET_MAX];
char csums_alg[SHARED_SECRET_MAX];
} __packed;
struct p_rs_param_95 {
struct p_header80 head;
u32 rate;
char verify_alg[SHARED_SECRET_MAX];
char csums_alg[SHARED_SECRET_MAX];
u32 c_plan_ahead;
u32 c_delay_target;
u32 c_fill_target;
u32 c_max_rate;
} __packed;
enum drbd_conn_flags {
CF_WANT_LOSE = 1,
CF_DRY_RUN = 2,
};
struct p_protocol {
struct p_header head;
struct p_header80 head;
u32 protocol;
u32 after_sb_0p;
u32 after_sb_1p;
@ -471,17 +498,17 @@ struct p_protocol {
} __packed;
struct p_uuids {
struct p_header head;
struct p_header80 head;
u64 uuid[UI_EXTENDED_SIZE];
} __packed;
struct p_rs_uuid {
struct p_header head;
struct p_header80 head;
u64 uuid;
} __packed;
struct p_sizes {
struct p_header head;
struct p_header80 head;
u64 d_size; /* size of disk */
u64 u_size; /* user requested size */
u64 c_size; /* current exported size */
@ -491,18 +518,18 @@ struct p_sizes {
} __packed;
struct p_state {
struct p_header head;
struct p_header80 head;
u32 state;
} __packed;
struct p_req_state {
struct p_header head;
struct p_header80 head;
u32 mask;
u32 val;
} __packed;
struct p_req_state_reply {
struct p_header head;
struct p_header80 head;
u32 retcode;
} __packed;
@ -517,7 +544,7 @@ struct p_drbd06_param {
} __packed;
struct p_discard {
struct p_header head;
struct p_header80 head;
u64 block_id;
u32 seq_num;
u32 pad;
@ -533,7 +560,7 @@ enum drbd_bitmap_code {
};
struct p_compressed_bm {
struct p_header head;
struct p_header80 head;
/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
* (encoding & 0x80): polarity (set/unset) of first runlength
* ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@ -544,10 +571,10 @@ struct p_compressed_bm {
u8 code[0];
} __packed;
struct p_delay_probe {
struct p_header head;
u32 seq_num; /* sequence number to match the two probe packets */
u32 offset; /* usecs the probe got sent after the reference time point */
struct p_delay_probe93 {
struct p_header80 head;
u32 seq_num; /* sequence number to match the two probe packets */
u32 offset; /* usecs the probe got sent after the reference time point */
} __packed;
/* DCBP: Drbd Compressed Bitmap Packet ... */
@ -594,7 +621,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
* so we need to use the fixed size 4KiB page size
* most architechtures have used for a long time.
*/
#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header))
#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
#if (PAGE_SIZE < 4096)
@ -603,13 +630,14 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
#endif
union p_polymorph {
struct p_header header;
union p_header header;
struct p_handshake handshake;
struct p_data data;
struct p_block_ack block_ack;
struct p_barrier barrier;
struct p_barrier_ack barrier_ack;
struct p_rs_param_89 rs_param_89;
struct p_rs_param_95 rs_param_95;
struct p_protocol protocol;
struct p_sizes sizes;
struct p_uuids uuids;
@ -617,6 +645,8 @@ union p_polymorph {
struct p_req_state req_state;
struct p_req_state_reply req_state_reply;
struct p_block_req block_req;
struct p_delay_probe93 delay_probe93;
struct p_rs_uuid rs_uuid;
} __packed;
/**********************************************************************/
@ -697,7 +727,7 @@ struct drbd_tl_epoch {
struct list_head requests; /* requests before */
struct drbd_tl_epoch *next; /* pointer to the next barrier */
unsigned int br_number; /* the barriers identifier. */
int n_req; /* number of requests attached before this barrier */
int n_writes; /* number of requests attached before this barrier */
};
struct drbd_request;
@ -747,7 +777,7 @@ struct digest_info {
struct drbd_epoch_entry {
struct drbd_work w;
struct hlist_node colision;
struct drbd_epoch *epoch;
struct drbd_epoch *epoch; /* for writes */
struct drbd_conf *mdev;
struct page *pages;
atomic_t pending_bios;
@ -755,7 +785,10 @@ struct drbd_epoch_entry {
/* see comments on ee flag bits below */
unsigned long flags;
sector_t sector;
u64 block_id;
union {
u64 block_id;
struct digest_info *digest;
};
};
/* ee flag bits.
@ -781,12 +814,16 @@ enum {
* if any of those fail, we set this flag atomically
* from the endio callback */
__EE_WAS_ERROR,
/* This ee has a pointer to a digest instead of a block id */
__EE_HAS_DIGEST,
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
#define EE_IS_BARRIER (1<<__EE_IS_BARRIER)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
/* global flag bits */
enum {
@ -794,7 +831,6 @@ enum {
SIGNAL_ASENDER, /* whether asender wants to be interrupted */
SEND_PING, /* whether asender should send a ping asap */
STOP_SYNC_TIMER, /* tell timer to cancel itself */
UNPLUG_QUEUED, /* only relevant with kernel 2.4 */
UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */
MD_DIRTY, /* current uuids and flags not yet on disk */
@ -816,6 +852,7 @@ enum {
BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
@ -829,6 +866,8 @@ enum {
* the peer, if it changed there as well. */
CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
NEW_CUR_UUID, /* Create new current UUID when thawing IO */
AL_SUSPENDED, /* Activity logging is currently suspended. */
};
struct drbd_bitmap; /* opaque for drbd_conf */
@ -838,10 +877,6 @@ struct drbd_bitmap; /* opaque for drbd_conf */
/* THINK maybe we actually want to use the default "event/%s" worker threads
* or similar in linux 2.6, which uses per cpu data and threads.
*
* To be general, this might need a spin_lock member.
* For now, please use the mdev->req_lock to protect list_head,
* see drbd_queue_work below.
*/
struct drbd_work_queue {
struct list_head q;
@ -915,6 +950,12 @@ enum write_ordering_e {
WO_bio_barrier
};
struct fifo_buffer {
int *values;
unsigned int head_index;
unsigned int size;
};
struct drbd_conf {
/* things that are stored as / read from meta data on disk */
unsigned long flags;
@ -936,9 +977,16 @@ struct drbd_conf {
unsigned int ko_count;
struct drbd_work resync_work,
unplug_work,
go_diskless,
md_sync_work;
struct timer_list resync_timer;
struct timer_list md_sync_timer;
#ifdef DRBD_DEBUG_MD_SYNC
struct {
unsigned int line;
const char* func;
} last_md_mark_dirty;
#endif
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp;
@ -946,6 +994,7 @@ struct drbd_conf {
union drbd_state state;
wait_queue_head_t misc_wait;
wait_queue_head_t state_wait; /* upon each state change. */
wait_queue_head_t net_cnt_wait;
unsigned int send_cnt;
unsigned int recv_cnt;
unsigned int read_cnt;
@ -974,12 +1023,16 @@ struct drbd_conf {
unsigned long rs_start;
/* cumulated time in PausedSyncX state [unit jiffies] */
unsigned long rs_paused;
/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
unsigned long rs_mark_left;
/* marks's time [unit jiffies] */
unsigned long rs_mark_time;
/* skipped because csum was equeal [unit BM_BLOCK_SIZE] */
/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
unsigned long rs_same_csum;
#define DRBD_SYNC_MARKS 8
#define DRBD_SYNC_MARK_STEP (3*HZ)
/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
unsigned long rs_mark_left[DRBD_SYNC_MARKS];
/* marks's time [unit jiffies] */
unsigned long rs_mark_time[DRBD_SYNC_MARKS];
/* current index into rs_mark_{left,time} */
int rs_last_mark;
/* where does the admin want us to start? (sector) */
sector_t ov_start_sector;
@ -1012,10 +1065,10 @@ struct drbd_conf {
spinlock_t epoch_lock;
unsigned int epochs;
enum write_ordering_e write_ordering;
struct list_head active_ee; /* IO in progress */
struct list_head sync_ee; /* IO in progress */
struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
struct list_head done_ee; /* send ack */
struct list_head read_ee; /* IO in progress */
struct list_head read_ee; /* IO in progress (any read) */
struct list_head net_ee; /* zero-copy network send in progress */
struct hlist_head *ee_hash; /* is proteced by req_lock! */
unsigned int ee_hash_s;
@ -1026,7 +1079,8 @@ struct drbd_conf {
int next_barrier_nr;
struct hlist_head *app_reads_hash; /* is proteced by req_lock */
struct list_head resync_reads;
atomic_t pp_in_use;
atomic_t pp_in_use; /* allocated from page pool */
atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */
wait_queue_head_t ee_wait;
struct page *md_io_page; /* one page buffer for md_io */
struct page *md_io_tmpp; /* for logical_block_size != 512 */
@ -1054,6 +1108,15 @@ struct drbd_conf {
u64 ed_uuid; /* UUID of the exposed data */
struct mutex state_mutex;
char congestion_reason; /* Why we where congested... */
atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
atomic_t rs_sect_ev; /* for submitted resync data rate, both */
int rs_last_sect_ev; /* counter to compare with */
int rs_last_events; /* counter of read or write "events" (unit sectors)
* on the lower level device when we last looked. */
int c_sync_rate; /* current resync rate after syncer throttle magic */
struct fifo_buffer rs_plan_s; /* correction values of resync planer */
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
int rs_planed; /* resync sectors already planed */
};
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@ -1138,6 +1201,8 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
unsigned int set_size);
extern void tl_clear(struct drbd_conf *mdev);
enum drbd_req_event;
extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
extern void drbd_free_sock(struct drbd_conf *mdev);
extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
@ -1150,12 +1215,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f
extern int _drbd_send_state(struct drbd_conf *mdev);
extern int drbd_send_state(struct drbd_conf *mdev);
extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
enum drbd_packets cmd, struct p_header *h,
enum drbd_packets cmd, struct p_header80 *h,
size_t size, unsigned msg_flags);
#define USE_DATA_SOCKET 1
#define USE_META_SOCKET 0
extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
enum drbd_packets cmd, struct p_header *h,
enum drbd_packets cmd, struct p_header80 *h,
size_t size);
extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
char *data, size_t size);
@ -1167,7 +1232,7 @@ extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
struct p_block_req *rp);
extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
struct p_data *dp);
struct p_data *dp, int data_size);
extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
sector_t sector, int blksize, u64 block_id);
extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
@ -1201,7 +1266,13 @@ extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
#ifndef DRBD_DEBUG_MD_SYNC
extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
#else
#define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ )
extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
unsigned int line, const char *func);
#endif
extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
int (*io_fn)(struct drbd_conf *),
void (*done)(struct drbd_conf *, int),
@ -1209,6 +1280,7 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
extern void drbd_go_diskless(struct drbd_conf *mdev);
/* Meta data layout
@ -1264,6 +1336,8 @@ struct bm_extent {
* Bit 1 ==> local node thinks this block needs to be synced.
*/
#define SLEEP_TIME (HZ/10)
#define BM_BLOCK_SHIFT 12 /* 4k per bit */
#define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT)
/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
@ -1335,11 +1409,13 @@ struct bm_extent {
#endif
/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
* With a value of 6 all IO in one 32K block make it to the same slot of the
* With a value of 8 all IO in one 128K block make it to the same slot of the
* hash table. */
#define HT_SHIFT 6
#define HT_SHIFT 8
#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
/* Number of elements in the app_reads_hash */
#define APP_R_HSIZE 15
@ -1369,6 +1445,7 @@ extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_
/* bm_find_next variants for use while you hold drbd_bm_lock() */
extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev);
extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
extern int drbd_bm_rs_done(struct drbd_conf *mdev);
/* for receive_bitmap */
@ -1421,7 +1498,8 @@ extern void resync_after_online_grow(struct drbd_conf *);
extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
int force);
enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
/* drbd_worker.c */
@ -1467,10 +1545,12 @@ extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data);
/* drbd_receiver.c */
extern int drbd_rs_should_slow_down(struct drbd_conf *mdev);
extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
const unsigned rw, const int fault_type);
extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
@ -1479,7 +1559,10 @@ extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
sector_t sector,
unsigned int data_size,
gfp_t gfp_mask) __must_hold(local);
extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e);
extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
int is_net);
#define drbd_free_ee(m,e) drbd_free_some_ee(m, e, 0)
#define drbd_free_net_ee(m,e) drbd_free_some_ee(m, e, 1)
extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
struct list_head *head);
extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
@ -1487,6 +1570,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
extern void drbd_flush_workqueue(struct drbd_conf *mdev);
extern void drbd_free_tl_hash(struct drbd_conf *mdev);
/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
* mess with get_fs/set_fs, we know we are KERNEL_DS always. */
@ -1600,6 +1684,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
#define susp_MASK 1
#define user_isp_MASK 1
#define aftr_isp_MASK 1
#define susp_nod_MASK 1
#define susp_fen_MASK 1
#define NS(T, S) \
({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
@ -1855,13 +1941,6 @@ static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
}
}
static inline void
_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
{
list_add_tail(&w->list, &q->q);
up(&q->s);
}
static inline void
drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
{
@ -1899,19 +1978,19 @@ static inline void request_ping(struct drbd_conf *mdev)
static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
enum drbd_packets cmd)
{
struct p_header h;
struct p_header80 h;
return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
}
static inline int drbd_send_ping(struct drbd_conf *mdev)
{
struct p_header h;
struct p_header80 h;
return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
}
static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
{
struct p_header h;
struct p_header80 h;
return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
}
@ -2013,7 +2092,7 @@ static inline void inc_unacked(struct drbd_conf *mdev)
static inline void put_net_conf(struct drbd_conf *mdev)
{
if (atomic_dec_and_test(&mdev->net_cnt))
wake_up(&mdev->misc_wait);
wake_up(&mdev->net_cnt_wait);
}
/**
@ -2044,10 +2123,14 @@ static inline int get_net_conf(struct drbd_conf *mdev)
static inline void put_ldev(struct drbd_conf *mdev)
{
int i = atomic_dec_return(&mdev->local_cnt);
__release(local);
if (atomic_dec_and_test(&mdev->local_cnt))
D_ASSERT(i >= 0);
if (i == 0) {
if (mdev->state.disk == D_FAILED)
drbd_go_diskless(mdev);
wake_up(&mdev->misc_wait);
D_ASSERT(atomic_read(&mdev->local_cnt) >= 0);
}
}
#ifndef __CHECKER__
@ -2179,11 +2262,16 @@ static inline int drbd_state_is_stable(union drbd_state s)
return 1;
}
static inline int is_susp(union drbd_state s)
{
return s.susp || s.susp_nod || s.susp_fen;
}
static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
{
int mxb = drbd_get_max_buffers(mdev);
if (mdev->state.susp)
if (is_susp(mdev->state))
return 0;
if (test_bit(SUSPEND_IO, &mdev->flags))
return 0;

File diff suppressed because it is too large Load Diff

View File

@ -33,10 +33,13 @@
#include <linux/blkpg.h>
#include <linux/cpumask.h>
#include "drbd_int.h"
#include "drbd_req.h"
#include "drbd_wrappers.h"
#include <asm/unaligned.h>
#include <linux/drbd_tag_magic.h>
#include <linux/drbd_limits.h>
#include <linux/compiler.h>
#include <linux/kthread.h>
static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
@ -169,6 +172,10 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
put_net_conf(mdev);
}
/* The helper may take some time.
* write out any unsynced meta data changes now */
drbd_md_sync(mdev);
dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
drbd_bcast_ev_helper(mdev, cmd);
@ -202,12 +209,10 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
put_ldev(mdev);
} else {
dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
return mdev->state.pdsk;
nps = mdev->state.pdsk;
goto out;
}
if (fp == FP_STONITH)
_drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE);
r = drbd_khelper(mdev, "fence-peer");
switch ((r>>8) & 0xff) {
@ -252,9 +257,36 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
dev_info(DEV, "fence-peer helper returned %d (%s)\n",
(r>>8) & 0xff, ex_to_string);
out:
if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
/* The handler was not successful... unfreeze here, the
state engine can not unfreeze... */
_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
}
return nps;
}
static int _try_outdate_peer_async(void *data)
{
struct drbd_conf *mdev = (struct drbd_conf *)data;
enum drbd_disk_state nps;
nps = drbd_try_outdate_peer(mdev);
drbd_request_state(mdev, NS(pdsk, nps));
return 0;
}
void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
{
struct task_struct *opa;
opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
if (IS_ERR(opa))
dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
}
int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
{
@ -394,6 +426,39 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
return r;
}
static struct drbd_conf *ensure_mdev(int minor, int create)
{
struct drbd_conf *mdev;
if (minor >= minor_count)
return NULL;
mdev = minor_to_mdev(minor);
if (!mdev && create) {
struct gendisk *disk = NULL;
mdev = drbd_new_device(minor);
spin_lock_irq(&drbd_pp_lock);
if (minor_table[minor] == NULL) {
minor_table[minor] = mdev;
disk = mdev->vdisk;
mdev = NULL;
} /* else: we lost the race */
spin_unlock_irq(&drbd_pp_lock);
if (disk) /* we won the race above */
/* in case we ever add a drbd_delete_device(),
* don't forget the del_gendisk! */
add_disk(disk);
else /* we lost the race above */
drbd_free_mdev(mdev);
mdev = minor_to_mdev(minor);
}
return mdev;
}
static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
@ -494,6 +559,8 @@ char *ppsize(char *buf, unsigned long long size)
void drbd_suspend_io(struct drbd_conf *mdev)
{
set_bit(SUSPEND_IO, &mdev->flags);
if (is_susp(mdev->state))
return;
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
}
@ -713,9 +780,6 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
blk_queue_segment_boundary(q, PAGE_SIZE-1);
blk_stack_limits(&q->limits, &b->limits, 0);
if (b->merge_bvec_fn)
dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n",
b->merge_bvec_fn);
dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
@ -729,14 +793,16 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
/* serialize deconfig (worker exiting, doing cleanup)
* and reconfig (drbdsetup disk, drbdsetup net)
*
* wait for a potentially exiting worker, then restart it,
* or start a new one.
* Wait for a potentially exiting worker, then restart it,
* or start a new one. Flush any pending work, there may still be an
* after_state_change queued.
*/
static void drbd_reconfig_start(struct drbd_conf *mdev)
{
wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
drbd_thread_start(&mdev->worker);
drbd_flush_workqueue(mdev);
}
/* if still unconfigured, stops worker again.
@ -756,6 +822,29 @@ static void drbd_reconfig_done(struct drbd_conf *mdev)
wake_up(&mdev->state_wait);
}
/* Make sure IO is suspended before calling this function(). */
static void drbd_suspend_al(struct drbd_conf *mdev)
{
int s = 0;
if (lc_try_lock(mdev->act_log)) {
drbd_al_shrink(mdev);
lc_unlock(mdev->act_log);
} else {
dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
return;
}
spin_lock_irq(&mdev->req_lock);
if (mdev->state.conn < C_CONNECTED)
s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
spin_unlock_irq(&mdev->req_lock);
if (s)
dev_info(DEV, "Suspended AL updates\n");
}
/* does always return 0;
* interesting return code is in reply->ret_code */
static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
@ -769,6 +858,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
struct inode *inode, *inode2;
struct lru_cache *resync_lru = NULL;
union drbd_state ns, os;
unsigned int max_seg_s;
int rv;
int cp_discovered = 0;
int logical_block_size;
@ -803,6 +893,15 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
goto fail;
}
if (get_net_conf(mdev)) {
int prot = mdev->net_conf->wire_protocol;
put_net_conf(mdev);
if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
retcode = ERR_STONITH_AND_PROT_A;
goto fail;
}
}
nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
if (IS_ERR(nbc->lo_file)) {
dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
@ -924,7 +1023,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
drbd_suspend_io(mdev);
/* also wait for the last barrier ack. */
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt));
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
/* and for any other previously queued work */
drbd_flush_workqueue(mdev);
@ -1021,7 +1120,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
else
clear_bit(CRASHED_PRIMARY, &mdev->flags);
if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) {
if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
!(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
set_bit(CRASHED_PRIMARY, &mdev->flags);
cp_discovered = 1;
}
@ -1031,7 +1131,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
mdev->read_cnt = 0;
mdev->writ_cnt = 0;
drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE);
max_seg_s = DRBD_MAX_SEGMENT_SIZE;
if (mdev->state.conn == C_CONNECTED) {
/* We are Primary, Connected, and now attach a new local
* backing store. We must not increase the user visible maximum
* bio size on this device to something the peer may not be
* able to handle. */
if (mdev->agreed_pro_version < 94)
max_seg_s = queue_max_segment_size(mdev->rq_queue);
else if (mdev->agreed_pro_version == 94)
max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
/* else: drbd 8.3.9 and later, stay with default */
}
drbd_setup_queue_param(mdev, max_seg_s);
/* If I am currently not R_PRIMARY,
* but meta data primary indicator is set,
@ -1079,6 +1192,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
drbd_al_to_on_disk_bm(mdev);
}
if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
drbd_suspend_al(mdev); /* IO is still suspended here... */
spin_lock_irq(&mdev->req_lock);
os = mdev->state;
ns.i = os.i;
@ -1235,7 +1351,16 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
&& (new_conf->wire_protocol != DRBD_PROT_C)) {
retcode = ERR_NOT_PROTO_C;
goto fail;
};
}
if (get_ldev(mdev)) {
enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
put_ldev(mdev);
if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
retcode = ERR_STONITH_AND_PROT_A;
goto fail;
}
}
if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
retcode = ERR_DISCARD;
@ -1350,6 +1475,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
}
}
drbd_flush_workqueue(mdev);
spin_lock_irq(&mdev->req_lock);
if (mdev->net_conf != NULL) {
retcode = ERR_NET_CONFIGURED;
@ -1388,10 +1514,9 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
mdev->int_dig_out=int_dig_out;
mdev->int_dig_in=int_dig_in;
mdev->int_dig_vv=int_dig_vv;
retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL);
spin_unlock_irq(&mdev->req_lock);
retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE);
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
reply->ret_code = retcode;
drbd_reconfig_done(mdev);
@ -1546,6 +1671,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
struct crypto_hash *csums_tfm = NULL;
struct syncer_conf sc;
cpumask_var_t new_cpu_mask;
int *rs_plan_s = NULL;
int fifo_size;
if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
retcode = ERR_NOMEM;
@ -1557,6 +1684,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
sc.rate = DRBD_RATE_DEF;
sc.after = DRBD_AFTER_DEF;
sc.al_extents = DRBD_AL_EXTENTS_DEF;
sc.on_no_data = DRBD_ON_NO_DATA_DEF;
sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
} else
memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
@ -1634,6 +1767,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
}
#undef AL_MAX
/* to avoid spurious errors when configuring minors before configuring
* the minors they depend on: if necessary, first create the minor we
* depend on */
if (sc.after >= 0)
ensure_mdev(sc.after, 1);
/* most sanity checks done, try to assign the new sync-after
* dependency. need to hold the global lock in there,
* to avoid a race in the dependency loop check. */
@ -1641,6 +1780,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
if (retcode != NO_ERROR)
goto fail;
fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
if (!rs_plan_s) {
dev_err(DEV, "kmalloc of fifo_buffer failed");
retcode = ERR_NOMEM;
goto fail;
}
}
/* ok, assign the rest of it as well.
* lock against receive_SyncParam() */
spin_lock(&mdev->peer_seq_lock);
@ -1657,6 +1806,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
mdev->verify_tfm = verify_tfm;
verify_tfm = NULL;
}
if (fifo_size != mdev->rs_plan_s.size) {
kfree(mdev->rs_plan_s.values);
mdev->rs_plan_s.values = rs_plan_s;
mdev->rs_plan_s.size = fifo_size;
mdev->rs_planed = 0;
rs_plan_s = NULL;
}
spin_unlock(&mdev->peer_seq_lock);
if (get_ldev(mdev)) {
@ -1688,6 +1846,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
fail:
kfree(rs_plan_s);
free_cpumask_var(new_cpu_mask);
crypto_free_hash(csums_tfm);
crypto_free_hash(verify_tfm);
@ -1721,12 +1880,38 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
return 0;
}
static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
{
int rv;
rv = drbd_bmio_set_n_write(mdev);
drbd_suspend_al(mdev);
return rv;
}
static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
int retcode;
reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
if (retcode < SS_SUCCESS) {
if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
/* The peer will get a resync upon connect anyways. Just make that
into a full resync. */
retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
if (retcode >= SS_SUCCESS) {
/* open coded drbd_bitmap_io() */
if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
"set_n_write from invalidate_peer"))
retcode = ERR_IO_MD_DISK;
}
} else
retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
}
reply->ret_code = retcode;
return 0;
}
@ -1765,7 +1950,21 @@ static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
{
reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
drbd_uuid_new_current(mdev);
clear_bit(NEW_CUR_UUID, &mdev->flags);
drbd_md_sync(mdev);
}
drbd_suspend_io(mdev);
reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
if (reply->ret_code == SS_SUCCESS) {
if (mdev->state.conn < C_CONNECTED)
tl_clear(mdev);
if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
tl_restart(mdev, fail_frozen_disk_io);
}
drbd_resume_io(mdev);
return 0;
}
@ -1941,40 +2140,6 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
return 0;
}
static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp)
{
struct drbd_conf *mdev;
if (nlp->drbd_minor >= minor_count)
return NULL;
mdev = minor_to_mdev(nlp->drbd_minor);
if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) {
struct gendisk *disk = NULL;
mdev = drbd_new_device(nlp->drbd_minor);
spin_lock_irq(&drbd_pp_lock);
if (minor_table[nlp->drbd_minor] == NULL) {
minor_table[nlp->drbd_minor] = mdev;
disk = mdev->vdisk;
mdev = NULL;
} /* else: we lost the race */
spin_unlock_irq(&drbd_pp_lock);
if (disk) /* we won the race above */
/* in case we ever add a drbd_delete_device(),
* don't forget the del_gendisk! */
add_disk(disk);
else /* we lost the race above */
drbd_free_mdev(mdev);
mdev = minor_to_mdev(nlp->drbd_minor);
}
return mdev;
}
struct cn_handler_struct {
int (*function)(struct drbd_conf *,
struct drbd_nl_cfg_req *,
@ -2035,7 +2200,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
goto fail;
}
mdev = ensure_mdev(nlp);
mdev = ensure_mdev(nlp->drbd_minor,
(nlp->flags & DRBD_NL_CREATE_DEVICE));
if (!mdev) {
retcode = ERR_MINOR_INVALID;
goto fail;

View File

@ -57,6 +57,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
unsigned long db, dt, dbdt, rt, rs_left;
unsigned int res;
int i, x, y;
int stalled = 0;
drbd_get_syncer_progress(mdev, &rs_left, &res);
@ -90,18 +91,17 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
* db: blocks written from mark until now
* rt: remaining time
*/
dt = (jiffies - mdev->rs_mark_time) / HZ;
if (dt > 20) {
/* if we made no update to rs_mark_time for too long,
* we are stalled. show that. */
seq_printf(seq, "stalled\n");
return;
}
/* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is
* at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
* least DRBD_SYNC_MARK_STEP time before it will be modified. */
i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
stalled = 1;
if (!dt)
dt++;
db = mdev->rs_mark_left - rs_left;
db = mdev->rs_mark_left[i] - rs_left;
rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
seq_printf(seq, "finish: %lu:%02lu:%02lu",
@ -118,7 +118,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
/* mean speed since syncer started
* we do account for PausedSync periods */
dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
if (dt <= 0)
if (dt == 0)
dt = 1;
db = mdev->rs_total - rs_left;
dbdt = Bit2KB(db/dt);
@ -128,7 +128,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
else
seq_printf(seq, " (%ld)", dbdt);
seq_printf(seq, " K/sec\n");
if (mdev->state.conn == C_SYNC_TARGET) {
if (mdev->c_sync_rate > 1000)
seq_printf(seq, " want: %d,%03d",
mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
else
seq_printf(seq, " want: %d", mdev->c_sync_rate);
}
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
}
static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
@ -196,7 +203,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%2d: cs:Unconfigured\n", i);
} else {
seq_printf(seq,
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n"
"%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
" ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
"lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
i, sn,
@ -206,11 +213,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
drbd_disk_str(mdev->state.pdsk),
(mdev->net_conf == NULL ? ' ' :
(mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
mdev->state.susp ? 's' : 'r',
is_susp(mdev->state) ? 's' : 'r',
mdev->state.aftr_isp ? 'a' : '-',
mdev->state.peer_isp ? 'p' : '-',
mdev->state.user_isp ? 'u' : '-',
mdev->congestion_reason ?: '-',
test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
mdev->send_cnt/2,
mdev->recv_cnt/2,
mdev->writ_cnt/2,

File diff suppressed because it is too large Load Diff

View File

@ -59,17 +59,19 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
{
const unsigned long s = req->rq_state;
/* remove it from the transfer log.
* well, only if it had been there in the first
* place... if it had not (local only or conflicting
* and never sent), it should still be "empty" as
* initialized in drbd_req_new(), so we can list_del() it
* here unconditionally */
list_del(&req->tl_requests);
/* if it was a write, we may have to set the corresponding
* bit(s) out-of-sync first. If it had a local part, we need to
* release the reference to the activity log. */
if (rw == WRITE) {
/* remove it from the transfer log.
* well, only if it had been there in the first
* place... if it had not (local only or conflicting
* and never sent), it should still be "empty" as
* initialized in drbd_req_new(), so we can list_del() it
* here unconditionally */
list_del(&req->tl_requests);
/* Set out-of-sync unless both OK flags are set
* (local only or remote failed).
* Other places where we set out-of-sync:
@ -92,7 +94,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
*/
if (s & RQ_LOCAL_MASK) {
if (get_ldev_if_state(mdev, D_FAILED)) {
drbd_al_complete_io(mdev, req->sector);
if (s & RQ_IN_ACT_LOG)
drbd_al_complete_io(mdev, req->sector);
put_ldev(mdev);
} else if (__ratelimit(&drbd_ratelimit_state)) {
dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
@ -280,6 +283,14 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
* protocol A or B, barrier ack still pending... */
}
static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
{
struct drbd_conf *mdev = req->mdev;
if (!is_susp(mdev->state))
_req_may_be_done(req, m);
}
/*
* checks whether there was an overlapping request
* or ee already registered.
@ -380,10 +391,11 @@ static int _req_conflicts(struct drbd_request *req)
* and it enforces that we have to think in a very structured manner
* about the "events" that may happen to a request during its life time ...
*/
void __req_mod(struct drbd_request *req, enum drbd_req_event what,
int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m)
{
struct drbd_conf *mdev = req->mdev;
int rv = 0;
m->bio = NULL;
switch (what) {
@ -420,7 +432,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
req->rq_state &= ~RQ_LOCAL_PENDING;
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
@ -429,7 +441,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~RQ_LOCAL_PENDING;
__drbd_chk_io_error(mdev, FALSE);
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
@ -437,7 +449,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* it is legal to fail READA */
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
put_ldev(mdev);
break;
@ -455,7 +467,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* no point in retrying if there is no good remote data,
* or we have no connection. */
if (mdev->state.pdsk != D_UP_TO_DATE) {
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
}
@ -517,11 +529,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
req->epoch = mdev->newest_tle->br_number;
list_add_tail(&req->tl_requests,
&mdev->newest_tle->requests);
/* increment size of current epoch */
mdev->newest_tle->n_req++;
mdev->newest_tle->n_writes++;
/* queue work item to send data */
D_ASSERT(req->rq_state & RQ_NET_PENDING);
@ -530,7 +540,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
drbd_queue_work(&mdev->data.work, &req->w);
/* close the epoch, in case it outgrew the limit */
if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size)
if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size)
queue_barrier(mdev);
break;
@ -543,7 +553,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~RQ_NET_QUEUED;
/* if we did it right, tl_clear should be scheduled only after
* this, so this should not be necessary! */
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
case handed_over_to_network:
@ -568,7 +578,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
* "completed_ok" events came in, once we return from
* _drbd_send_zc_bio (drbd_send_dblock), we have to check
* whether it is done already, and end it. */
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
case read_retry_remote_canceled:
@ -584,7 +594,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
/* if it is still queued, we may not complete it here.
* it will be canceled soon. */
if (!(req->rq_state & RQ_NET_QUEUED))
_req_may_be_done(req, m);
_req_may_be_done(req, m); /* Allowed while state.susp */
break;
case write_acked_by_peer_and_sis:
@ -619,7 +629,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
D_ASSERT(req->rq_state & RQ_NET_PENDING);
dec_ap_pending(mdev);
req->rq_state &= ~RQ_NET_PENDING;
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
case neg_acked:
@ -629,11 +639,50 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
req->rq_state |= RQ_NET_DONE;
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
/* else: done by handed_over_to_network */
break;
case fail_frozen_disk_io:
if (!(req->rq_state & RQ_LOCAL_COMPLETED))
break;
_req_may_be_done(req, m); /* Allowed while state.susp */
break;
case restart_frozen_disk_io:
if (!(req->rq_state & RQ_LOCAL_COMPLETED))
break;
req->rq_state &= ~RQ_LOCAL_COMPLETED;
rv = MR_READ;
if (bio_data_dir(req->master_bio) == WRITE)
rv = MR_WRITE;
get_ldev(mdev);
req->w.cb = w_restart_disk_io;
drbd_queue_work(&mdev->data.work, &req->w);
break;
case resend:
/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
before the connection loss (B&C only); only P_BARRIER_ACK was missing.
Trowing them out of the TL here by pretending we got a BARRIER_ACK
We ensure that the peer was not rebooted */
if (!(req->rq_state & RQ_NET_OK)) {
if (req->w.cb) {
drbd_queue_work(&mdev->data.work, &req->w);
rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
}
break;
}
/* else, fall through to barrier_acked */
case barrier_acked:
if (!(req->rq_state & RQ_WRITE))
break;
if (req->rq_state & RQ_NET_PENDING) {
/* barrier came in before all requests have been acked.
* this is bad, because if the connection is lost now,
@ -643,7 +692,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
}
D_ASSERT(req->rq_state & RQ_NET_SENT);
req->rq_state |= RQ_NET_DONE;
_req_may_be_done(req, m);
_req_may_be_done(req, m); /* Allowed while state.susp */
break;
case data_received:
@ -651,9 +700,11 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
dec_ap_pending(mdev);
req->rq_state &= ~RQ_NET_PENDING;
req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
_req_may_be_done(req, m);
_req_may_be_done_not_susp(req, m);
break;
};
return rv;
}
/* we may do a local read if:
@ -752,14 +803,16 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
* resync extent to finish, and, if necessary, pulls in the target
* extent into the activity log, which involves further disk io because
* of transactional on-disk meta data updates. */
if (rw == WRITE && local)
if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
req->rq_state |= RQ_IN_ACT_LOG;
drbd_al_begin_io(mdev, sector);
}
remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED));
if (!(local || remote) && !mdev->state.susp) {
if (!(local || remote) && !is_susp(mdev->state)) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
goto fail_free_complete;
}
@ -785,7 +838,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
/* GOOD, everything prepared, grab the spin_lock */
spin_lock_irq(&mdev->req_lock);
if (mdev->state.susp) {
if (is_susp(mdev->state)) {
/* If we got suspended, use the retry mechanism of
generic_make_request() to restart processing of this
bio. In the next call to drbd_make_request_26
@ -867,30 +920,10 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
/* check this request on the collision detection hash tables.
* if we have a conflict, just complete it here.
* THINK do we want to check reads, too? (I don't think so...) */
if (rw == WRITE && _req_conflicts(req)) {
/* this is a conflicting request.
* even though it may have been only _partially_
* overlapping with one of the currently pending requests,
* without even submitting or sending it, we will
* pretend that it was successfully served right now.
*/
if (local) {
bio_put(req->private_bio);
req->private_bio = NULL;
drbd_al_complete_io(mdev, req->sector);
put_ldev(mdev);
local = 0;
}
if (remote)
dec_ap_pending(mdev);
_drbd_end_io_acct(mdev, req);
/* THINK: do we want to fail it (-EIO), or pretend success? */
bio_endio(req->master_bio, 0);
req->master_bio = NULL;
dec_ap_bio(mdev);
drbd_req_free(req);
remote = 0;
}
if (rw == WRITE && _req_conflicts(req))
goto fail_conflicting;
list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
/* NOTE remote first: to get the concurrent write detection right,
* we must register the request before start of local IO. */
@ -923,6 +956,21 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
return 0;
fail_conflicting:
/* this is a conflicting request.
* even though it may have been only _partially_
* overlapping with one of the currently pending requests,
* without even submitting or sending it, we will
* pretend that it was successfully served right now.
*/
_drbd_end_io_acct(mdev, req);
spin_unlock_irq(&mdev->req_lock);
if (remote)
dec_ap_pending(mdev);
/* THINK: do we want to fail it (-EIO), or pretend success?
* this pretends success. */
err = 0;
fail_free_complete:
if (rw == WRITE && local)
drbd_al_complete_io(mdev, sector);
@ -961,21 +1009,6 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
return 1;
}
/*
* Paranoia: we might have been primary, but sync target, or
* even diskless, then lost the connection.
* This should have been handled (panic? suspend?) somewhere
* else. But maybe it was not, so check again here.
* Caution: as long as we do not have a read/write lock on mdev,
* to serialize state changes, this is racy, since we may lose
* the connection *after* we test for the cstate.
*/
if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Sorry, I have no access to good data anymore.\n");
return 1;
}
return 0;
}

View File

@ -104,6 +104,9 @@ enum drbd_req_event {
read_ahead_completed_with_error,
write_completed_with_error,
completed_ok,
resend,
fail_frozen_disk_io,
restart_frozen_disk_io,
nothing, /* for tracing only */
};
@ -183,6 +186,12 @@ enum drbd_req_state_bits {
/* keep this last, its for the RQ_NET_MASK */
__RQ_NET_MAX,
/* Set when this is a write, clear for a read */
__RQ_WRITE,
/* Should call drbd_al_complete_io() for this request... */
__RQ_IN_ACT_LOG,
};
#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
@ -201,6 +210,16 @@ enum drbd_req_state_bits {
/* 0x1f8 */
#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
#define RQ_WRITE (1UL << __RQ_WRITE)
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
/* For waking up the frozen transfer log mod_req() has to return if the request
should be counted in the epoch object*/
#define MR_WRITE_SHIFT 0
#define MR_WRITE (1 << MR_WRITE_SHIFT)
#define MR_READ_SHIFT 1
#define MR_READ (1 << MR_READ_SHIFT)
/* epoch entries */
static inline
struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
@ -244,30 +263,36 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
return NULL;
}
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
{
struct bio *bio;
bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
req->private_bio = bio;
bio->bi_private = req;
bio->bi_end_io = drbd_endio_pri;
bio->bi_next = NULL;
}
static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
struct bio *bio_src)
{
struct bio *bio;
struct drbd_request *req =
mempool_alloc(drbd_request_mempool, GFP_NOIO);
if (likely(req)) {
bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
drbd_req_make_private_bio(req, bio_src);
req->rq_state = 0;
req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
req->mdev = mdev;
req->master_bio = bio_src;
req->private_bio = bio;
req->epoch = 0;
req->sector = bio->bi_sector;
req->size = bio->bi_size;
req->sector = bio_src->bi_sector;
req->size = bio_src->bi_size;
req->start_time = jiffies;
INIT_HLIST_NODE(&req->colision);
INIT_LIST_HEAD(&req->tl_requests);
INIT_LIST_HEAD(&req->w.list);
bio->bi_private = req;
bio->bi_end_io = drbd_endio_pri;
bio->bi_next = NULL;
}
return req;
}
@ -292,36 +317,43 @@ struct bio_and_error {
extern void _req_may_be_done(struct drbd_request *req,
struct bio_and_error *m);
extern void __req_mod(struct drbd_request *req, enum drbd_req_event what,
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
struct bio_and_error *m);
extern void complete_master_bio(struct drbd_conf *mdev,
struct bio_and_error *m);
/* use this if you don't want to deal with calling complete_master_bio()
* outside the spinlock, e.g. when walking some list on cleanup. */
static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what)
static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
{
struct drbd_conf *mdev = req->mdev;
struct bio_and_error m;
int rv;
/* __req_mod possibly frees req, do not touch req after that! */
__req_mod(req, what, &m);
rv = __req_mod(req, what, &m);
if (m.bio)
complete_master_bio(mdev, &m);
return rv;
}
/* completion of master bio is outside of spinlock.
* If you need it irqsave, do it your self! */
static inline void req_mod(struct drbd_request *req,
static inline int req_mod(struct drbd_request *req,
enum drbd_req_event what)
{
struct drbd_conf *mdev = req->mdev;
struct bio_and_error m;
int rv;
spin_lock_irq(&mdev->req_lock);
__req_mod(req, what, &m);
rv = __req_mod(req, what, &m);
spin_unlock_irq(&mdev->req_lock);
if (m.bio)
complete_master_bio(mdev, &m);
return rv;
}
#endif

View File

@ -39,8 +39,6 @@
#include "drbd_int.h"
#include "drbd_req.h"
#define SLEEP_TIME (HZ/10)
static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
@ -217,10 +215,8 @@ void drbd_endio_sec(struct bio *bio, int error)
*/
void drbd_endio_pri(struct bio *bio, int error)
{
unsigned long flags;
struct drbd_request *req = bio->bi_private;
struct drbd_conf *mdev = req->mdev;
struct bio_and_error m;
enum drbd_req_event what;
int uptodate = bio_flagged(bio, BIO_UPTODATE);
@ -246,12 +242,7 @@ void drbd_endio_pri(struct bio *bio, int error)
bio_put(req->private_bio);
req->private_bio = ERR_PTR(error);
spin_lock_irqsave(&mdev->req_lock, flags);
__req_mod(req, what, &m);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (m.bio)
complete_master_bio(mdev, &m);
req_mod(req, what);
}
int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@ -376,54 +367,145 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
struct drbd_epoch_entry *e;
if (!get_ldev(mdev))
return 0;
return -EIO;
if (drbd_rs_should_slow_down(mdev))
goto defer;
/* GFP_TRY, because if there is no memory available right now, this may
* be rescheduled for later. It is "only" background resync, after all. */
e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
if (!e)
goto fail;
goto defer;
e->w.cb = w_e_send_csum;
spin_lock_irq(&mdev->req_lock);
list_add(&e->w.list, &mdev->read_ee);
spin_unlock_irq(&mdev->req_lock);
e->w.cb = w_e_send_csum;
atomic_add(size >> 9, &mdev->rs_sect_ev);
if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
return 1;
return 0;
/* drbd_submit_ee currently fails for one reason only:
* not being able to allocate enough bios.
* Is dropping the connection going to help? */
spin_lock_irq(&mdev->req_lock);
list_del(&e->w.list);
spin_unlock_irq(&mdev->req_lock);
drbd_free_ee(mdev, e);
fail:
defer:
put_ldev(mdev);
return 2;
return -EAGAIN;
}
void resync_timer_fn(unsigned long data)
{
unsigned long flags;
struct drbd_conf *mdev = (struct drbd_conf *) data;
int queue;
spin_lock_irqsave(&mdev->req_lock, flags);
if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) {
queue = 1;
if (mdev->state.conn == C_VERIFY_S)
mdev->resync_work.cb = w_make_ov_request;
else
mdev->resync_work.cb = w_make_resync_request;
} else {
queue = 1;
switch (mdev->state.conn) {
case C_VERIFY_S:
mdev->resync_work.cb = w_make_ov_request;
break;
case C_SYNC_TARGET:
mdev->resync_work.cb = w_make_resync_request;
break;
default:
queue = 0;
mdev->resync_work.cb = w_resync_inactive;
}
spin_unlock_irqrestore(&mdev->req_lock, flags);
/* harmless race: list_empty outside data.work.q_lock */
if (list_empty(&mdev->resync_work.list) && queue)
drbd_queue_work(&mdev->data.work, &mdev->resync_work);
}
static void fifo_set(struct fifo_buffer *fb, int value)
{
int i;
for (i = 0; i < fb->size; i++)
fb->values[i] = value;
}
static int fifo_push(struct fifo_buffer *fb, int value)
{
int ov;
ov = fb->values[fb->head_index];
fb->values[fb->head_index++] = value;
if (fb->head_index >= fb->size)
fb->head_index = 0;
return ov;
}
static void fifo_add_val(struct fifo_buffer *fb, int value)
{
int i;
for (i = 0; i < fb->size; i++)
fb->values[i] += value;
}
int drbd_rs_controller(struct drbd_conf *mdev)
{
unsigned int sect_in; /* Number of sectors that came in since the last turn */
unsigned int want; /* The number of sectors we want in the proxy */
int req_sect; /* Number of sectors to request in this turn */
int correction; /* Number of sectors more we need in the proxy*/
int cps; /* correction per invocation of drbd_rs_controller() */
int steps; /* Number of time steps to plan ahead */
int curr_corr;
int max_sect;
sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
mdev->rs_in_flight -= sect_in;
spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
} else { /* normal path */
want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
}
correction = want - mdev->rs_in_flight - mdev->rs_planed;
/* Plan ahead */
cps = correction / steps;
fifo_add_val(&mdev->rs_plan_s, cps);
mdev->rs_planed += cps * steps;
/* What we do in this step */
curr_corr = fifo_push(&mdev->rs_plan_s, 0);
spin_unlock(&mdev->peer_seq_lock);
mdev->rs_planed -= curr_corr;
req_sect = sect_in + curr_corr;
if (req_sect < 0)
req_sect = 0;
max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
if (req_sect > max_sect)
req_sect = max_sect;
/*
dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
sect_in, mdev->rs_in_flight, want, correction,
steps, cps, mdev->rs_planed, curr_corr, req_sect);
*/
return req_sect;
}
int w_make_resync_request(struct drbd_conf *mdev,
struct drbd_work *w, int cancel)
{
@ -431,8 +513,9 @@ int w_make_resync_request(struct drbd_conf *mdev,
sector_t sector;
const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
int max_segment_size;
int number, i, size, pe, mx;
int number, rollback_i, size, pe, mx;
int align, queued, sndbuf;
int i = 0;
if (unlikely(cancel))
return 1;
@ -446,6 +529,12 @@ int w_make_resync_request(struct drbd_conf *mdev,
dev_err(DEV, "%s in w_make_resync_request\n",
drbd_conn_str(mdev->state.conn));
if (mdev->rs_total == 0) {
/* empty resync? */
drbd_resync_finished(mdev);
return 1;
}
if (!get_ldev(mdev)) {
/* Since we only need to access mdev->rsync a
get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
@ -458,11 +547,25 @@ int w_make_resync_request(struct drbd_conf *mdev,
/* starting with drbd 8.3.8, we can handle multi-bio EEs,
* if it should be necessary */
max_segment_size = mdev->agreed_pro_version < 94 ?
queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
max_segment_size =
mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) :
mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE;
number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE / 1024) * HZ);
pe = atomic_read(&mdev->rs_pending_cnt);
if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
} else {
mdev->c_sync_rate = mdev->sync_conf.rate;
number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
}
/* Throttle resync on lower level disk activity, which may also be
* caused by application IO on Primary/SyncTarget.
* Keep this after the call to drbd_rs_controller, as that assumes
* to be called as precisely as possible every SLEEP_TIME,
* and would be confused otherwise. */
if (drbd_rs_should_slow_down(mdev))
goto requeue;
mutex_lock(&mdev->data.mutex);
if (mdev->data.socket)
@ -476,6 +579,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
mx = number;
/* Limit the number of pending RS requests to no more than the peer's receive buffer */
pe = atomic_read(&mdev->rs_pending_cnt);
if ((pe + number) > mx) {
number = mx - pe;
}
@ -526,6 +630,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
* be prepared for all stripe sizes of software RAIDs.
*/
align = 1;
rollback_i = i;
for (;;) {
if (size + BM_BLOCK_SIZE > max_segment_size)
break;
@ -561,14 +666,19 @@ int w_make_resync_request(struct drbd_conf *mdev,
size = (capacity-sector)<<9;
if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
switch (read_for_csum(mdev, sector, size)) {
case 0: /* Disk failure*/
case -EIO: /* Disk failure */
put_ldev(mdev);
return 0;
case 2: /* Allocation failed */
case -EAGAIN: /* allocation failed, or ldev busy */
drbd_rs_complete_io(mdev, sector);
mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
i = rollback_i;
goto requeue;
/* case 1: everything ok */
case 0:
/* everything ok */
break;
default:
BUG();
}
} else {
inc_rs_pending(mdev);
@ -595,6 +705,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
}
requeue:
mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
put_ldev(mdev);
return 1;
@ -670,6 +781,14 @@ static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int ca
return 1;
}
static void ping_peer(struct drbd_conf *mdev)
{
clear_bit(GOT_PING_ACK, &mdev->flags);
request_ping(mdev);
wait_event(mdev->misc_wait,
test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
}
int drbd_resync_finished(struct drbd_conf *mdev)
{
unsigned long db, dt, dbdt;
@ -709,6 +828,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
if (!get_ldev(mdev))
goto out;
ping_peer(mdev);
spin_lock_irq(&mdev->req_lock);
os = mdev->state;
@ -801,6 +922,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
mdev->rs_paused = 0;
mdev->ov_start_sector = 0;
drbd_md_sync(mdev);
if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
@ -817,9 +940,13 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent
{
if (drbd_ee_has_active_page(e)) {
/* This might happen if sendpage() has not finished */
int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT;
atomic_add(i, &mdev->pp_in_use_by_net);
atomic_sub(i, &mdev->pp_in_use);
spin_lock_irq(&mdev->req_lock);
list_add_tail(&e->w.list, &mdev->net_ee);
spin_unlock_irq(&mdev->req_lock);
wake_up(&drbd_pp_wait);
} else
drbd_free_ee(mdev, e);
}
@ -926,9 +1053,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return 1;
}
drbd_rs_complete_io(mdev, e->sector);
if (get_ldev(mdev)) {
drbd_rs_complete_io(mdev, e->sector);
put_ldev(mdev);
}
di = (struct digest_info *)(unsigned long)e->block_id;
di = e->digest;
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
/* quick hack to try to avoid a race against reconfiguration.
@ -952,7 +1082,9 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
} else {
inc_rs_pending(mdev);
e->block_id = ID_SYNCER;
e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */
kfree(di);
ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
}
} else {
@ -962,9 +1094,6 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
dec_unacked(mdev);
kfree(di);
move_to_net_ee_or_free(mdev, e);
if (unlikely(!ok))
@ -1034,9 +1163,12 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
* the resync lru has been cleaned up already */
drbd_rs_complete_io(mdev, e->sector);
if (get_ldev(mdev)) {
drbd_rs_complete_io(mdev, e->sector);
put_ldev(mdev);
}
di = (struct digest_info *)(unsigned long)e->block_id;
di = e->digest;
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
digest_size = crypto_hash_digestsize(mdev->verify_tfm);
@ -1055,9 +1187,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
dec_unacked(mdev);
kfree(di);
if (!eq)
drbd_ov_oos_found(mdev, e->sector, e->size);
else
@ -1108,7 +1237,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
* dec_ap_pending will be done in got_BarrierAck
* or (on connection loss) in w_clear_epoch. */
ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
(struct p_header *)p, sizeof(*p), 0);
(struct p_header80 *)p, sizeof(*p), 0);
drbd_put_data_sock(mdev);
return ok;
@ -1173,6 +1302,24 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
return ok;
}
int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
drbd_al_begin_io(mdev, req->sector);
/* Calling drbd_al_begin_io() out of the worker might deadlocks
theoretically. Practically it can not deadlock, since this is
only used when unfreezing IOs. All the extents of the requests
that made it into the TL are already active */
drbd_req_make_private_bio(req, req->master_bio);
req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
generic_make_request(req->private_bio);
return 1;
}
static int _drbd_may_sync_now(struct drbd_conf *mdev)
{
struct drbd_conf *odev = mdev;
@ -1298,14 +1445,6 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
return retcode;
}
static void ping_peer(struct drbd_conf *mdev)
{
clear_bit(GOT_PING_ACK, &mdev->flags);
request_ping(mdev);
wait_event(mdev->misc_wait,
test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
}
/**
* drbd_start_resync() - Start the resync process
* @mdev: DRBD device.
@ -1379,13 +1518,21 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
r = SS_UNKNOWN_ERROR;
if (r == SS_SUCCESS) {
mdev->rs_total =
mdev->rs_mark_left = drbd_bm_total_weight(mdev);
unsigned long tw = drbd_bm_total_weight(mdev);
unsigned long now = jiffies;
int i;
mdev->rs_failed = 0;
mdev->rs_paused = 0;
mdev->rs_start =
mdev->rs_mark_time = jiffies;
mdev->rs_same_csum = 0;
mdev->rs_last_events = 0;
mdev->rs_last_sect_ev = 0;
mdev->rs_total = tw;
mdev->rs_start = now;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
mdev->rs_mark_left[i] = tw;
mdev->rs_mark_time[i] = now;
}
_drbd_pause_after(mdev);
}
write_unlock_irq(&global_state_lock);
@ -1397,12 +1544,31 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
(unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
(unsigned long) mdev->rs_total);
if (mdev->rs_total == 0) {
/* Peer still reachable? Beware of failing before-resync-target handlers! */
ping_peer(mdev);
if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
/* This still has a race (about when exactly the peers
* detect connection loss) that can lead to a full sync
* on next handshake. In 8.3.9 we fixed this with explicit
* resync-finished notifications, but the fix
* introduces a protocol change. Sleeping for some
* time longer than the ping interval + timeout on the
* SyncSource, to give the SyncTarget the chance to
* detect connection loss, then waiting for a ping
* response (implicit in drbd_resync_finished) reduces
* the race considerably, but does not solve it. */
if (side == C_SYNC_SOURCE)
schedule_timeout_interruptible(
mdev->net_conf->ping_int * HZ +
mdev->net_conf->ping_timeo*HZ/9);
drbd_resync_finished(mdev);
}
atomic_set(&mdev->rs_sect_in, 0);
atomic_set(&mdev->rs_sect_ev, 0);
mdev->rs_in_flight = 0;
mdev->rs_planed = 0;
spin_lock(&mdev->peer_seq_lock);
fifo_set(&mdev->rs_plan_s, 0);
spin_unlock(&mdev->peer_seq_lock);
/* ns.conn may already be != mdev->state.conn,
* we may have been paused in between, or become paused until
* the timer triggers.

View File

@ -258,8 +258,8 @@ static int irqdma_allocated;
#include <linux/completion.h>
static struct request *current_req;
static struct request_queue *floppy_queue;
static void do_fd_request(struct request_queue *q);
static int set_next_request(void);
#ifndef fd_get_dma_residue
#define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA)
@ -413,6 +413,7 @@ static struct gendisk *disks[N_DRIVE];
static struct block_device *opened_bdev[N_DRIVE];
static DEFINE_MUTEX(open_lock);
static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
static int fdc_queue;
/*
* This struct defines the different floppy types.
@ -890,8 +891,8 @@ static void unlock_fdc(void)
del_timer(&fd_timeout);
cont = NULL;
clear_bit(0, &fdc_busy);
if (current_req || blk_peek_request(floppy_queue))
do_fd_request(floppy_queue);
if (current_req || set_next_request())
do_fd_request(current_req->q);
spin_unlock_irqrestore(&floppy_lock, flags);
wake_up(&fdc_wait);
}
@ -2243,8 +2244,8 @@ static void floppy_end_request(struct request *req, int error)
* logical buffer */
static void request_done(int uptodate)
{
struct request_queue *q = floppy_queue;
struct request *req = current_req;
struct request_queue *q;
unsigned long flags;
int block;
char msg[sizeof("request done ") + sizeof(int) * 3];
@ -2258,6 +2259,8 @@ static void request_done(int uptodate)
return;
}
q = req->q;
if (uptodate) {
/* maintain values for invalidation on geometry
* change */
@ -2811,6 +2814,28 @@ static int make_raw_rw_request(void)
return 2;
}
/*
* Round-robin between our available drives, doing one request from each
*/
static int set_next_request(void)
{
struct request_queue *q;
int old_pos = fdc_queue;
do {
q = disks[fdc_queue]->queue;
if (++fdc_queue == N_DRIVE)
fdc_queue = 0;
if (q) {
current_req = blk_fetch_request(q);
if (current_req)
break;
}
} while (fdc_queue != old_pos);
return current_req != NULL;
}
static void redo_fd_request(void)
{
int drive;
@ -2822,17 +2847,17 @@ static void redo_fd_request(void)
do_request:
if (!current_req) {
struct request *req;
int pending;
spin_lock_irq(floppy_queue->queue_lock);
req = blk_fetch_request(floppy_queue);
spin_unlock_irq(floppy_queue->queue_lock);
if (!req) {
spin_lock_irq(&floppy_lock);
pending = set_next_request();
spin_unlock_irq(&floppy_lock);
if (!pending) {
do_floppy = NULL;
unlock_fdc();
return;
}
current_req = req;
}
drive = (long)current_req->rq_disk->private_data;
set_fdc(drive);
@ -4165,6 +4190,13 @@ static int __init floppy_init(void)
goto out_put_disk;
}
disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);
if (!disks[dr]->queue) {
err = -ENOMEM;
goto out_put_disk;
}
blk_queue_max_hw_sectors(disks[dr]->queue, 64);
disks[dr]->major = FLOPPY_MAJOR;
disks[dr]->first_minor = TOMINOR(dr);
disks[dr]->fops = &floppy_fops;
@ -4183,13 +4215,6 @@ static int __init floppy_init(void)
if (err)
goto out_unreg_blkdev;
floppy_queue = blk_init_queue(do_fd_request, &floppy_lock);
if (!floppy_queue) {
err = -ENOMEM;
goto out_unreg_driver;
}
blk_queue_max_hw_sectors(floppy_queue, 64);
blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
floppy_find, NULL, NULL);
@ -4317,7 +4342,6 @@ static int __init floppy_init(void)
/* to be cleaned up... */
disks[drive]->private_data = (void *)(long)drive;
disks[drive]->queue = floppy_queue;
disks[drive]->flags |= GENHD_FL_REMOVABLE;
disks[drive]->driverfs_dev = &floppy_device[drive].dev;
add_disk(disks[drive]);
@ -4333,8 +4357,6 @@ static int __init floppy_init(void)
floppy_release_irq_and_dma();
out_unreg_region:
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
blk_cleanup_queue(floppy_queue);
out_unreg_driver:
platform_driver_unregister(&floppy_driver);
out_unreg_blkdev:
unregister_blkdev(FLOPPY_MAJOR, "fd");
@ -4342,6 +4364,8 @@ static int __init floppy_init(void)
while (dr--) {
del_timer(&motor_off_timer[dr]);
put_disk(disks[dr]);
if (disks[dr]->queue)
blk_cleanup_queue(disks[dr]->queue);
}
return err;
}
@ -4550,11 +4574,11 @@ static void __exit floppy_module_exit(void)
platform_device_unregister(&floppy_device[drive]);
}
put_disk(disks[drive]);
blk_cleanup_queue(disks[drive]->queue);
}
del_timer_sync(&fd_timeout);
del_timer_sync(&fd_timer);
blk_cleanup_queue(floppy_queue);
if (atomic_read(&usage_count))
floppy_release_irq_and_dma();

View File

@ -74,6 +74,7 @@
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/splice.h>
#include <linux/sysfs.h>
#include <asm/uaccess.h>
@ -738,6 +739,103 @@ static inline int is_loop_device(struct file *file)
return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
}
/* loop sysfs attributes */
static ssize_t loop_attr_show(struct device *dev, char *page,
ssize_t (*callback)(struct loop_device *, char *))
{
struct loop_device *l, *lo = NULL;
mutex_lock(&loop_devices_mutex);
list_for_each_entry(l, &loop_devices, lo_list)
if (disk_to_dev(l->lo_disk) == dev) {
lo = l;
break;
}
mutex_unlock(&loop_devices_mutex);
return lo ? callback(lo, page) : -EIO;
}
#define LOOP_ATTR_RO(_name) \
static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \
static ssize_t loop_attr_do_show_##_name(struct device *d, \
struct device_attribute *attr, char *b) \
{ \
return loop_attr_show(d, b, loop_attr_##_name##_show); \
} \
static struct device_attribute loop_attr_##_name = \
__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
{
ssize_t ret;
char *p = NULL;
mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_backing_file)
p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
mutex_unlock(&lo->lo_ctl_mutex);
if (IS_ERR_OR_NULL(p))
ret = PTR_ERR(p);
else {
ret = strlen(p);
memmove(buf, p, ret);
buf[ret++] = '\n';
buf[ret] = 0;
}
return ret;
}
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
{
return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
}
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
{
return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
}
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
{
int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
return sprintf(buf, "%s\n", autoclear ? "1" : "0");
}
LOOP_ATTR_RO(backing_file);
LOOP_ATTR_RO(offset);
LOOP_ATTR_RO(sizelimit);
LOOP_ATTR_RO(autoclear);
static struct attribute *loop_attrs[] = {
&loop_attr_backing_file.attr,
&loop_attr_offset.attr,
&loop_attr_sizelimit.attr,
&loop_attr_autoclear.attr,
NULL,
};
static struct attribute_group loop_attribute_group = {
.name = "loop",
.attrs= loop_attrs,
};
static int loop_sysfs_init(struct loop_device *lo)
{
return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
&loop_attribute_group);
}
static void loop_sysfs_exit(struct loop_device *lo)
{
sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
&loop_attribute_group);
}
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct block_device *bdev, unsigned int arg)
{
@ -837,6 +935,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
set_capacity(lo->lo_disk, size);
bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
@ -855,6 +954,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
return 0;
out_clr:
loop_sysfs_exit(lo);
lo->lo_thread = NULL;
lo->lo_device = NULL;
lo->lo_backing_file = NULL;
@ -951,6 +1051,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
set_capacity(lo->lo_disk, 0);
if (bdev) {
bd_set_size(bdev, 0);
loop_sysfs_exit(lo);
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
}

View File

@ -53,10 +53,10 @@
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.8.1"
#define REL_VERSION "8.3.9rc2"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 94
#define PRO_VERSION_MAX 95
enum drbd_io_error_p {
@ -91,6 +91,11 @@ enum drbd_after_sb_p {
ASB_VIOLENTLY
};
enum drbd_on_no_data {
OND_IO_ERROR,
OND_SUSPEND_IO
};
/* KEEP the order, do not delete or insert. Only append. */
enum drbd_ret_codes {
ERR_CODE_BASE = 100,
@ -140,6 +145,7 @@ enum drbd_ret_codes {
ERR_CONNECTED = 151, /* DRBD 8.3 only */
ERR_PERM = 152,
ERR_NEED_APV_93 = 153,
ERR_STONITH_AND_PROT_A = 154,
/* insert new ones above this line */
AFTER_LAST_ERR_CODE
@ -226,13 +232,17 @@ union drbd_state {
unsigned conn:5 ; /* 17/32 cstates */
unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */
unsigned susp:1 ; /* 2/2 IO suspended no/yes */
unsigned susp:1 ; /* 2/2 IO suspended no/yes (by user) */
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
unsigned peer_isp:1 ;
unsigned user_isp:1 ;
unsigned _pad:11; /* 0 unused */
unsigned susp_nod:1 ; /* IO suspended because no data */
unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/
unsigned _pad:9; /* 0 unused */
#elif defined(__BIG_ENDIAN_BITFIELD)
unsigned _pad:11; /* 0 unused */
unsigned _pad:9;
unsigned susp_fen:1 ;
unsigned susp_nod:1 ;
unsigned user_isp:1 ;
unsigned peer_isp:1 ;
unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
@ -312,6 +322,8 @@ enum drbd_timeout_flag {
#define DRBD_MAGIC 0x83740267
#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
#define DRBD_MAGIC_BIG 0x835a
#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
/* these are of type "int" */
#define DRBD_MD_INDEX_INTERNAL -1

View File

@ -128,26 +128,31 @@
#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
#define DRBD_MAX_BIO_BVECS_MIN 0
#define DRBD_MAX_BIO_BVECS_MAX 128
#define DRBD_MAX_BIO_BVECS_DEF 0
#define DRBD_DP_VOLUME_MIN 4
#define DRBD_DP_VOLUME_MAX 1048576
#define DRBD_DP_VOLUME_DEF 16384
#define DRBD_C_PLAN_AHEAD_MIN 0
#define DRBD_C_PLAN_AHEAD_MAX 300
#define DRBD_C_PLAN_AHEAD_DEF 0 /* RS rate controller disabled by default */
#define DRBD_DP_INTERVAL_MIN 1
#define DRBD_DP_INTERVAL_MAX 600
#define DRBD_DP_INTERVAL_DEF 5
#define DRBD_C_DELAY_TARGET_MIN 1
#define DRBD_C_DELAY_TARGET_MAX 100
#define DRBD_C_DELAY_TARGET_DEF 10
#define DRBD_RS_THROTTLE_TH_MIN 1
#define DRBD_RS_THROTTLE_TH_MAX 600
#define DRBD_RS_THROTTLE_TH_DEF 20
#define DRBD_C_FILL_TARGET_MIN 0
#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
#define DRBD_RS_HOLD_OFF_TH_MIN 1
#define DRBD_RS_HOLD_OFF_TH_MAX 6000
#define DRBD_RS_HOLD_OFF_TH_DEF 100
#define DRBD_C_MAX_RATE_MIN 250 /* kByte/sec */
#define DRBD_C_MAX_RATE_MAX (4 << 20)
#define DRBD_C_MAX_RATE_DEF 102400
#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */
#define DRBD_C_MIN_RATE_MAX (4 << 20)
#define DRBD_C_MIN_RATE_DEF 4096
#undef RANGE
#endif

View File

@ -87,6 +87,12 @@ NL_PACKET(syncer_conf, 8,
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
NL_BIT( 65, T_MAY_IGNORE, use_rle)
NL_INTEGER( 75, T_MAY_IGNORE, on_no_data)
NL_INTEGER( 76, T_MAY_IGNORE, c_plan_ahead)
NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target)
NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target)
NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate)
NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate)
)
NL_PACKET(invalidate, 9, )

View File

@ -83,7 +83,7 @@ static inline int ddebug_remove_module(const char *mod)
#define dynamic_pr_debug(fmt, ...) \
do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
#define dynamic_dev_dbg(dev, format, ...) \
#define dynamic_dev_dbg(dev, fmt, ...) \
do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
#endif

View File

@ -743,6 +743,7 @@
#define PCI_DEVICE_ID_HP_CISSC 0x3230
#define PCI_DEVICE_ID_HP_CISSD 0x3238
#define PCI_DEVICE_ID_HP_CISSE 0x323a
#define PCI_DEVICE_ID_HP_CISSF 0x323b
#define PCI_DEVICE_ID_HP_ZX2_IOC 0x4031
#define PCI_VENDOR_ID_PCTECH 0x1042