Logo Search packages:      
Sourcecode: linux version File versions  Download package

xen-blkfront.c

/*
 * blkfront.c
 *
 * XenLinux virtual block device driver.
 *
 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
 * Copyright (c) 2004, Christian Limpach
 * Copyright (c) 2004, Andrew Warfield
 * Copyright (c) 2005, Christopher Clark
 * Copyright (c) 2005, XenSource Ltd
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version 2
 * as published by the Free Software Foundation; or, when distributed
 * separately from the Linux kernel or incorporated into other
 * software packages, subject to the following license:
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this source file (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy, modify,
 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include <linux/interrupt.h>
#include <linux/blkdev.h>
#include <linux/module.h>

#include <xen/xenbus.h>
#include <xen/grant_table.h>
#include <xen/events.h>
#include <xen/page.h>

#include <xen/interface/grant_table.h>
#include <xen/interface/io/blkif.h>

#include <asm/xen/hypervisor.h>

enum blkif_state {
      BLKIF_STATE_DISCONNECTED,
      BLKIF_STATE_CONNECTED,
      BLKIF_STATE_SUSPENDED,
};

struct blk_shadow {
      struct blkif_request req;
      unsigned long request;
      unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};

static struct block_device_operations xlvbd_block_fops;

#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)

/*
 * We have one of these per vbd, whether ide, scsi or 'other'.  They
 * hang in private_data off the gendisk structure. We may end up
 * putting all kinds of interesting stuff here :-)
 */
struct blkfront_info
{
      struct xenbus_device *xbdev;
      dev_t dev;
      struct gendisk *gd;
      int vdevice;
      blkif_vdev_t handle;
      enum blkif_state connected;
      int ring_ref;
      struct blkif_front_ring ring;
      unsigned int evtchn, irq;
      struct request_queue *rq;
      struct work_struct work;
      struct gnttab_free_callback callback;
      struct blk_shadow shadow[BLK_RING_SIZE];
      unsigned long shadow_free;
      int feature_barrier;

      /**
       * The number of people holding this device open.  We won't allow a
       * hot-unplug unless this is 0.
       */
      int users;
};

static DEFINE_SPINLOCK(blkif_io_lock);

#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
      (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
#define GRANT_INVALID_REF     0

#define PARTS_PER_DISK        16

#define BLKIF_MAJOR(dev) ((dev)>>8)
#define BLKIF_MINOR(dev) ((dev) & 0xff)

#define DEV_NAME  "xvd" /* name in /dev */

/* Information about our VBDs. */
#define MAX_VBDS 64
static LIST_HEAD(vbds_list);

static int get_id_from_freelist(struct blkfront_info *info)
{
      unsigned long free = info->shadow_free;
      BUG_ON(free > BLK_RING_SIZE);
      info->shadow_free = info->shadow[free].req.id;
      info->shadow[free].req.id = 0x0fffffee; /* debug */
      return free;
}

static void add_id_to_freelist(struct blkfront_info *info,
                         unsigned long id)
{
      info->shadow[id].req.id  = info->shadow_free;
      info->shadow[id].request = 0;
      info->shadow_free = id;
}

static void blkif_restart_queue_callback(void *arg)
{
      struct blkfront_info *info = (struct blkfront_info *)arg;
      schedule_work(&info->work);
}

/*
 * blkif_queue_request
 *
 * request block io
 *
 * id: for guest use only.
 * operation: BLKIF_OP_{READ,WRITE,PROBE}
 * buffer: buffer to read/write into. this should be a
 *   virtual address in the guest os.
 */
static int blkif_queue_request(struct request *req)
{
      struct blkfront_info *info = req->rq_disk->private_data;
      unsigned long buffer_mfn;
      struct blkif_request *ring_req;
      struct req_iterator iter;
      struct bio_vec *bvec;
      unsigned long id;
      unsigned int fsect, lsect;
      int ref;
      grant_ref_t gref_head;

      if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
            return 1;

      if (gnttab_alloc_grant_references(
            BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
            gnttab_request_free_callback(
                  &info->callback,
                  blkif_restart_queue_callback,
                  info,
                  BLKIF_MAX_SEGMENTS_PER_REQUEST);
            return 1;
      }

      /* Fill out a communications ring structure. */
      ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
      id = get_id_from_freelist(info);
      info->shadow[id].request = (unsigned long)req;

      ring_req->id = id;
      ring_req->sector_number = (blkif_sector_t)req->sector;
      ring_req->handle = info->handle;

      ring_req->operation = rq_data_dir(req) ?
            BLKIF_OP_WRITE : BLKIF_OP_READ;
      if (blk_barrier_rq(req))
            ring_req->operation = BLKIF_OP_WRITE_BARRIER;

      ring_req->nr_segments = 0;
      rq_for_each_segment(bvec, req, iter) {
            BUG_ON(ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST);
            buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
            fsect = bvec->bv_offset >> 9;
            lsect = fsect + (bvec->bv_len >> 9) - 1;
            /* install a grant reference. */
            ref = gnttab_claim_grant_reference(&gref_head);
            BUG_ON(ref == -ENOSPC);

            gnttab_grant_foreign_access_ref(
                        ref,
                        info->xbdev->otherend_id,
                        buffer_mfn,
                        rq_data_dir(req) );

            info->shadow[id].frame[ring_req->nr_segments] =
                        mfn_to_pfn(buffer_mfn);

            ring_req->seg[ring_req->nr_segments] =
                        (struct blkif_request_segment) {
                              .gref       = ref,
                              .first_sect = fsect,
                              .last_sect  = lsect };

            ring_req->nr_segments++;
      }

      info->ring.req_prod_pvt++;

      /* Keep a private copy so we can reissue requests when recovering. */
      info->shadow[id].req = *ring_req;

      gnttab_free_grant_references(gref_head);

      return 0;
}


static inline void flush_requests(struct blkfront_info *info)
{
      int notify;

      RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);

      if (notify)
            notify_remote_via_irq(info->irq);
}

/*
 * do_blkif_request
 *  read a block; request is in a request queue
 */
static void do_blkif_request(struct request_queue *rq)
{
      struct blkfront_info *info = NULL;
      struct request *req;
      int queued;

      pr_debug("Entered do_blkif_request\n");

      queued = 0;

      while ((req = elv_next_request(rq)) != NULL) {
            info = req->rq_disk->private_data;
            if (!blk_fs_request(req)) {
                  end_request(req, 0);
                  continue;
            }

            if (RING_FULL(&info->ring))
                  goto wait;

            pr_debug("do_blk_req %p: cmd %p, sec %lx, "
                   "(%u/%li) buffer:%p [%s]\n",
                   req, req->cmd, (unsigned long)req->sector,
                   req->current_nr_sectors,
                   req->nr_sectors, req->buffer,
                   rq_data_dir(req) ? "write" : "read");


            blkdev_dequeue_request(req);
            if (blkif_queue_request(req)) {
                  blk_requeue_request(rq, req);
wait:
                  /* Avoid pointless unplugs. */
                  blk_stop_queue(rq);
                  break;
            }

            queued++;
      }

      if (queued != 0)
            flush_requests(info);
}

static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
{
      struct request_queue *rq;

      rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
      if (rq == NULL)
            return -1;

      elevator_init(rq, "noop");

      /* Hard sector size and max sectors impersonate the equiv. hardware. */
      blk_queue_hardsect_size(rq, sector_size);
      blk_queue_max_sectors(rq, 512);

      /* Each segment in a request is up to an aligned page in size. */
      blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
      blk_queue_max_segment_size(rq, PAGE_SIZE);

      /* Ensure a merged request will fit in a single I/O ring slot. */
      blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
      blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);

      /* Make sure buffer addresses are sector-aligned. */
      blk_queue_dma_alignment(rq, 511);

      gd->queue = rq;

      return 0;
}


static int xlvbd_barrier(struct blkfront_info *info)
{
      int err;

      err = blk_queue_ordered(info->rq,
                        info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
                        NULL);

      if (err)
            return err;

      printk(KERN_INFO "blkfront: %s: barriers %s\n",
             info->gd->disk_name,
             info->feature_barrier ? "enabled" : "disabled");
      return 0;
}


static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
                         int vdevice, u16 vdisk_info, u16 sector_size,
                         struct blkfront_info *info)
{
      struct gendisk *gd;
      int nr_minors = 1;
      int err = -ENODEV;

      BUG_ON(info->gd != NULL);
      BUG_ON(info->rq != NULL);

      if ((minor % PARTS_PER_DISK) == 0)
            nr_minors = PARTS_PER_DISK;

      gd = alloc_disk(nr_minors);
      if (gd == NULL)
            goto out;

      if (nr_minors > 1)
            sprintf(gd->disk_name, "%s%c", DEV_NAME,
                  'a' + minor / PARTS_PER_DISK);
      else
            sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
                  'a' + minor / PARTS_PER_DISK,
                  minor % PARTS_PER_DISK);

      gd->major = XENVBD_MAJOR;
      gd->first_minor = minor;
      gd->fops = &xlvbd_block_fops;
      gd->private_data = info;
      gd->driverfs_dev = &(info->xbdev->dev);
      set_capacity(gd, capacity);

      if (xlvbd_init_blk_queue(gd, sector_size)) {
            del_gendisk(gd);
            goto out;
      }

      info->rq = gd->queue;
      info->gd = gd;

      if (info->feature_barrier)
            xlvbd_barrier(info);

      if (vdisk_info & VDISK_READONLY)
            set_disk_ro(gd, 1);

      if (vdisk_info & VDISK_REMOVABLE)
            gd->flags |= GENHD_FL_REMOVABLE;

      if (vdisk_info & VDISK_CDROM)
            gd->flags |= GENHD_FL_CD;

      return 0;

 out:
      return err;
}

static void kick_pending_request_queues(struct blkfront_info *info)
{
      if (!RING_FULL(&info->ring)) {
            /* Re-enable calldowns. */
            blk_start_queue(info->rq);
            /* Kick things off immediately. */
            do_blkif_request(info->rq);
      }
}

static void blkif_restart_queue(struct work_struct *work)
{
      struct blkfront_info *info = container_of(work, struct blkfront_info, work);

      spin_lock_irq(&blkif_io_lock);
      if (info->connected == BLKIF_STATE_CONNECTED)
            kick_pending_request_queues(info);
      spin_unlock_irq(&blkif_io_lock);
}

static void blkif_free(struct blkfront_info *info, int suspend)
{
      /* Prevent new requests being issued until we fix things up. */
      spin_lock_irq(&blkif_io_lock);
      info->connected = suspend ?
            BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
      /* No more blkif_request(). */
      if (info->rq)
            blk_stop_queue(info->rq);
      /* No more gnttab callback work. */
      gnttab_cancel_free_callback(&info->callback);
      spin_unlock_irq(&blkif_io_lock);

      /* Flush gnttab callback work. Must be done with no locks held. */
      flush_scheduled_work();

      /* Free resources associated with old device channel. */
      if (info->ring_ref != GRANT_INVALID_REF) {
            gnttab_end_foreign_access(info->ring_ref, 0,
                                (unsigned long)info->ring.sring);
            info->ring_ref = GRANT_INVALID_REF;
            info->ring.sring = NULL;
      }
      if (info->irq)
            unbind_from_irqhandler(info->irq, info);
      info->evtchn = info->irq = 0;

}

static void blkif_completion(struct blk_shadow *s)
{
      int i;
      for (i = 0; i < s->req.nr_segments; i++)
            gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
}

static irqreturn_t blkif_interrupt(int irq, void *dev_id)
{
      struct request *req;
      struct blkif_response *bret;
      RING_IDX i, rp;
      unsigned long flags;
      struct blkfront_info *info = (struct blkfront_info *)dev_id;
      int uptodate;

      spin_lock_irqsave(&blkif_io_lock, flags);

      if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
            spin_unlock_irqrestore(&blkif_io_lock, flags);
            return IRQ_HANDLED;
      }

 again:
      rp = info->ring.sring->rsp_prod;
      rmb(); /* Ensure we see queued responses up to 'rp'. */

      for (i = info->ring.rsp_cons; i != rp; i++) {
            unsigned long id;
            int ret;

            bret = RING_GET_RESPONSE(&info->ring, i);
            id   = bret->id;
            req  = (struct request *)info->shadow[id].request;

            blkif_completion(&info->shadow[id]);

            add_id_to_freelist(info, id);

            uptodate = (bret->status == BLKIF_RSP_OKAY);
            switch (bret->operation) {
            case BLKIF_OP_WRITE_BARRIER:
                  if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
                        printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
                               info->gd->disk_name);
                        uptodate = -EOPNOTSUPP;
                        info->feature_barrier = 0;
                        xlvbd_barrier(info);
                  }
                  /* fall through */
            case BLKIF_OP_READ:
            case BLKIF_OP_WRITE:
                  if (unlikely(bret->status != BLKIF_RSP_OKAY))
                        dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
                              "request: %x\n", bret->status);

                  ret = end_that_request_first(req, uptodate,
                        req->hard_nr_sectors);
                  BUG_ON(ret);
                  end_that_request_last(req, uptodate);
                  break;
            default:
                  BUG();
            }
      }

      info->ring.rsp_cons = i;

      if (i != info->ring.req_prod_pvt) {
            int more_to_do;
            RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
            if (more_to_do)
                  goto again;
      } else
            info->ring.sring->rsp_event = i + 1;

      kick_pending_request_queues(info);

      spin_unlock_irqrestore(&blkif_io_lock, flags);

      return IRQ_HANDLED;
}


static int setup_blkring(struct xenbus_device *dev,
                   struct blkfront_info *info)
{
      struct blkif_sring *sring;
      int err;

      info->ring_ref = GRANT_INVALID_REF;

      sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL);
      if (!sring) {
            xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
            return -ENOMEM;
      }
      SHARED_RING_INIT(sring);
      FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);

      err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
      if (err < 0) {
            free_page((unsigned long)sring);
            info->ring.sring = NULL;
            goto fail;
      }
      info->ring_ref = err;

      err = xenbus_alloc_evtchn(dev, &info->evtchn);
      if (err)
            goto fail;

      err = bind_evtchn_to_irqhandler(info->evtchn,
                              blkif_interrupt,
                              IRQF_SAMPLE_RANDOM, "blkif", info);
      if (err <= 0) {
            xenbus_dev_fatal(dev, err,
                         "bind_evtchn_to_irqhandler failed");
            goto fail;
      }
      info->irq = err;

      return 0;
fail:
      blkif_free(info, 0);
      return err;
}


/* Common code used when first setting up, and when resuming. */
static int talk_to_backend(struct xenbus_device *dev,
                     struct blkfront_info *info)
{
      const char *message = NULL;
      struct xenbus_transaction xbt;
      int err;

      /* Create shared ring, alloc event channel. */
      err = setup_blkring(dev, info);
      if (err)
            goto out;

again:
      err = xenbus_transaction_start(&xbt);
      if (err) {
            xenbus_dev_fatal(dev, err, "starting transaction");
            goto destroy_blkring;
      }

      err = xenbus_printf(xbt, dev->nodename,
                      "ring-ref", "%u", info->ring_ref);
      if (err) {
            message = "writing ring-ref";
            goto abort_transaction;
      }
      err = xenbus_printf(xbt, dev->nodename,
                      "event-channel", "%u", info->evtchn);
      if (err) {
            message = "writing event-channel";
            goto abort_transaction;
      }

      err = xenbus_transaction_end(xbt, 0);
      if (err) {
            if (err == -EAGAIN)
                  goto again;
            xenbus_dev_fatal(dev, err, "completing transaction");
            goto destroy_blkring;
      }

      xenbus_switch_state(dev, XenbusStateInitialised);

      return 0;

 abort_transaction:
      xenbus_transaction_end(xbt, 1);
      if (message)
            xenbus_dev_fatal(dev, err, "%s", message);
 destroy_blkring:
      blkif_free(info, 0);
 out:
      return err;
}


/**
 * Entry point to this code when a new device is created.  Allocate the basic
 * structures and the ring buffer for communication with the backend, and
 * inform the backend of the appropriate details for those.  Switch to
 * Initialised state.
 */
static int blkfront_probe(struct xenbus_device *dev,
                    const struct xenbus_device_id *id)
{
      int err, vdevice, i;
      struct blkfront_info *info;

      /* FIXME: Use dynamic device id if this is not set. */
      err = xenbus_scanf(XBT_NIL, dev->nodename,
                     "virtual-device", "%i", &vdevice);
      if (err != 1) {
            xenbus_dev_fatal(dev, err, "reading virtual-device");
            return err;
      }

      info = kzalloc(sizeof(*info), GFP_KERNEL);
      if (!info) {
            xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
            return -ENOMEM;
      }

      info->xbdev = dev;
      info->vdevice = vdevice;
      info->connected = BLKIF_STATE_DISCONNECTED;
      INIT_WORK(&info->work, blkif_restart_queue);

      for (i = 0; i < BLK_RING_SIZE; i++)
            info->shadow[i].req.id = i+1;
      info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;

      /* Front end dir is a number, which is used as the id. */
      info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
      dev->dev.driver_data = info;

      err = talk_to_backend(dev, info);
      if (err) {
            kfree(info);
            dev->dev.driver_data = NULL;
            return err;
      }

      return 0;
}


static int blkif_recover(struct blkfront_info *info)
{
      int i;
      struct blkif_request *req;
      struct blk_shadow *copy;
      int j;

      /* Stage 1: Make a safe copy of the shadow state. */
      copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
      if (!copy)
            return -ENOMEM;
      memcpy(copy, info->shadow, sizeof(info->shadow));

      /* Stage 2: Set up free list. */
      memset(&info->shadow, 0, sizeof(info->shadow));
      for (i = 0; i < BLK_RING_SIZE; i++)
            info->shadow[i].req.id = i+1;
      info->shadow_free = info->ring.req_prod_pvt;
      info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;

      /* Stage 3: Find pending requests and requeue them. */
      for (i = 0; i < BLK_RING_SIZE; i++) {
            /* Not in use? */
            if (copy[i].request == 0)
                  continue;

            /* Grab a request slot and copy shadow state into it. */
            req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
            *req = copy[i].req;

            /* We get a new request id, and must reset the shadow state. */
            req->id = get_id_from_freelist(info);
            memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));

            /* Rewrite any grant references invalidated by susp/resume. */
            for (j = 0; j < req->nr_segments; j++)
                  gnttab_grant_foreign_access_ref(
                        req->seg[j].gref,
                        info->xbdev->otherend_id,
                        pfn_to_mfn(info->shadow[req->id].frame[j]),
                        rq_data_dir(
                              (struct request *)
                              info->shadow[req->id].request));
            info->shadow[req->id].req = *req;

            info->ring.req_prod_pvt++;
      }

      kfree(copy);

      xenbus_switch_state(info->xbdev, XenbusStateConnected);

      spin_lock_irq(&blkif_io_lock);

      /* Now safe for us to use the shared ring */
      info->connected = BLKIF_STATE_CONNECTED;

      /* Send off requeued requests */
      flush_requests(info);

      /* Kick any other new requests queued since we resumed */
      kick_pending_request_queues(info);

      spin_unlock_irq(&blkif_io_lock);

      return 0;
}

/**
 * We are reconnecting to the backend, due to a suspend/resume, or a backend
 * driver restart.  We tear down our blkif structure and recreate it, but
 * leave the device-layer structures intact so that this is transparent to the
 * rest of the kernel.
 */
static int blkfront_resume(struct xenbus_device *dev)
{
      struct blkfront_info *info = dev->dev.driver_data;
      int err;

      dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);

      blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);

      err = talk_to_backend(dev, info);
      if (info->connected == BLKIF_STATE_SUSPENDED && !err)
            err = blkif_recover(info);

      return err;
}


/*
 * Invoked when the backend is finally 'ready' (and has told produced
 * the details about the physical device - #sectors, size, etc).
 */
static void blkfront_connect(struct blkfront_info *info)
{
      unsigned long long sectors;
      unsigned long sector_size;
      unsigned int binfo;
      int err;

      if ((info->connected == BLKIF_STATE_CONNECTED) ||
          (info->connected == BLKIF_STATE_SUSPENDED) )
            return;

      dev_dbg(&info->xbdev->dev, "%s:%s.\n",
            __func__, info->xbdev->otherend);

      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
                      "sectors", "%llu", &sectors,
                      "info", "%u", &binfo,
                      "sector-size", "%lu", &sector_size,
                      NULL);
      if (err) {
            xenbus_dev_fatal(info->xbdev, err,
                         "reading backend fields at %s",
                         info->xbdev->otherend);
            return;
      }

      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
                      "feature-barrier", "%lu", &info->feature_barrier,
                      NULL);
      if (err)
            info->feature_barrier = 0;

      err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
                          sectors, info->vdevice,
                          binfo, sector_size, info);
      if (err) {
            xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
                         info->xbdev->otherend);
            return;
      }

      xenbus_switch_state(info->xbdev, XenbusStateConnected);

      /* Kick pending requests. */
      spin_lock_irq(&blkif_io_lock);
      info->connected = BLKIF_STATE_CONNECTED;
      kick_pending_request_queues(info);
      spin_unlock_irq(&blkif_io_lock);

      add_disk(info->gd);
}

/**
 * Handle the change of state of the backend to Closing.  We must delete our
 * device-layer structures now, to ensure that writes are flushed through to
 * the backend.  Once is this done, we can switch to Closed in
 * acknowledgement.
 */
static void blkfront_closing(struct xenbus_device *dev)
{
      struct blkfront_info *info = dev->dev.driver_data;
      unsigned long flags;

      dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);

      if (info->rq == NULL)
            goto out;

      spin_lock_irqsave(&blkif_io_lock, flags);

      del_gendisk(info->gd);

      /* No more blkif_request(). */
      blk_stop_queue(info->rq);

      /* No more gnttab callback work. */
      gnttab_cancel_free_callback(&info->callback);
      spin_unlock_irqrestore(&blkif_io_lock, flags);

      /* Flush gnttab callback work. Must be done with no locks held. */
      flush_scheduled_work();

      blk_cleanup_queue(info->rq);
      info->rq = NULL;

 out:
      xenbus_frontend_closed(dev);
}

/**
 * Callback received when the backend's state changes.
 */
static void backend_changed(struct xenbus_device *dev,
                      enum xenbus_state backend_state)
{
      struct blkfront_info *info = dev->dev.driver_data;
      struct block_device *bd;

      dev_dbg(&dev->dev, "blkfront:backend_changed.\n");

      switch (backend_state) {
      case XenbusStateInitialising:
      case XenbusStateInitWait:
      case XenbusStateInitialised:
      case XenbusStateUnknown:
      case XenbusStateClosed:
            break;

      case XenbusStateConnected:
            blkfront_connect(info);
            break;

      case XenbusStateClosing:
            bd = bdget(info->dev);
            if (bd == NULL)
                  xenbus_dev_fatal(dev, -ENODEV, "bdget failed");

            mutex_lock(&bd->bd_mutex);
            if (info->users > 0)
                  xenbus_dev_error(dev, -EBUSY,
                               "Device in use; refusing to close");
            else
                  blkfront_closing(dev);
            mutex_unlock(&bd->bd_mutex);
            bdput(bd);
            break;
      }
}

static int blkfront_remove(struct xenbus_device *dev)
{
      struct blkfront_info *info = dev->dev.driver_data;

      dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);

      blkif_free(info, 0);

      kfree(info);

      return 0;
}

static int blkif_open(struct inode *inode, struct file *filep)
{
      struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
      info->users++;
      return 0;
}

static int blkif_release(struct inode *inode, struct file *filep)
{
      struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
      info->users--;
      if (info->users == 0) {
            /* Check whether we have been instructed to close.  We will
               have ignored this request initially, as the device was
               still mounted. */
            struct xenbus_device *dev = info->xbdev;
            enum xenbus_state state = xenbus_read_driver_state(dev->otherend);

            if (state == XenbusStateClosing)
                  blkfront_closing(dev);
      }
      return 0;
}

static struct block_device_operations xlvbd_block_fops =
{
      .owner = THIS_MODULE,
      .open = blkif_open,
      .release = blkif_release,
};


static struct xenbus_device_id blkfront_ids[] = {
      { "vbd" },
      { "" }
};

static struct xenbus_driver blkfront = {
      .name = "vbd",
      .owner = THIS_MODULE,
      .ids = blkfront_ids,
      .probe = blkfront_probe,
      .remove = blkfront_remove,
      .resume = blkfront_resume,
      .otherend_changed = backend_changed,
};

static int __init xlblk_init(void)
{
      if (!is_running_on_xen())
            return -ENODEV;

      if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
            printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
                   XENVBD_MAJOR, DEV_NAME);
            return -ENODEV;
      }

      return xenbus_register_frontend(&blkfront);
}
module_init(xlblk_init);


static void xlblk_exit(void)
{
      return xenbus_unregister_driver(&blkfront);
}
module_exit(xlblk_exit);

MODULE_DESCRIPTION("Xen virtual block device frontend");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);

Generated by  Doxygen 1.6.0   Back to index