Guest User

xen_disk.c patched manually

a guest
Nov 25th, 2013
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 24.02 KB | None | 0 0
  1. /*
  2.  *  xen paravirt block device backend
  3.  *
  4.  *  (c) Gerd Hoffmann <[email protected]>
  5.  *
  6.  *  This program is free software; you can redistribute it and/or modify
  7.  *  it under the terms of the GNU General Public License as published by
  8.  *  the Free Software Foundation; under version 2 of the License.
  9.  *
  10.  *  This program is distributed in the hope that it will be useful,
  11.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13.  *  GNU General Public License for more details.
  14.  *
  15.  *  You should have received a copy of the GNU General Public License along
  16.  *  with this program; if not, write to the Free Software Foundation, Inc.,
  17.  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18.  */
  19.  
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <stdarg.h>
  23. #include <string.h>
  24. #include <unistd.h>
  25. #include <signal.h>
  26. #include <inttypes.h>
  27. #include <time.h>
  28. #include <fcntl.h>
  29. #include <errno.h>
  30. #include <sys/ioctl.h>
  31. #include <sys/types.h>
  32. #include <sys/stat.h>
  33. #include <sys/mman.h>
  34. #include <sys/uio.h>
  35.  
  36. #include <xs.h>
  37. #include <xenctrl.h>
  38. #include <xen/io/xenbus.h>
  39.  
  40. #include "hw.h"
  41. #include "block_int.h"
  42. #include "qemu-char.h"
  43. #include "xen_blkif.h"
  44. #include "xen_backend.h"
  45. #include "sysemu.h"
  46.  
  47. /* ------------------------------------------------------------- */
  48.  
  49. static int syncwrite    = 0;
  50. static int batch_maps   = 1;
  51.  
  52. static int max_requests = 32;
  53. static int use_aio      = 1;
  54.  
  55. /* ------------------------------------------------------------- */
  56.  
  57. #define BLOCK_SIZE  512
  58. #define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
  59.  
  60. struct ioreq {
  61.     blkif_request_t     req;
  62.     int16_t             status;
  63.  
  64.     /* parsed request */
  65.     off_t               start;
  66.     QEMUIOVector        v;
  67.     int                 presync;
  68.     int                 postsync;
  69.  
  70.     /* grant mapping */
  71.     uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  72.     uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  73.     int                 prot;
  74.     void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  75.     void                *pages;
  76.  
  77.     /* aio status */
  78.     int                 aio_inflight;
  79.     int                 aio_errors;
  80.  
  81.     struct XenBlkDev    *blkdev;
  82.     LIST_ENTRY(ioreq)   list;
  83. };
  84.  
  85. struct XenBlkDev {
  86.     struct XenDevice    xendev;  /* must be first */
  87.     char                *params;
  88.     char                *mode;
  89.     char                *type;
  90.     char                *dev;
  91.     char                *devtype;
  92.     const char          *fileproto;
  93.     const char          *filename;
  94.     int                 ring_ref;
  95.     void                *sring;
  96.     int64_t             file_blk;
  97.     int64_t             file_size;
  98.     int                 protocol;
  99.     blkif_back_rings_t  rings;
  100.     int                 more_work;
  101.     int                 cnt_map;
  102.  
  103.     /* request lists */
  104.     LIST_HEAD(inflight_head, ioreq) inflight;
  105.     LIST_HEAD(finished_head, ioreq) finished;
  106.     LIST_HEAD(freelist_head, ioreq) freelist;
  107.     int                 requests_total;
  108.     int                 requests_inflight;
  109.     int                 requests_finished;
  110.  
  111.     /* qemu block driver */
  112.     int                 index;
  113.     BlockDriverState    *bs;
  114.     QEMUBH              *bh;
  115. };
  116.  
  117. /* ------------------------------------------------------------- */
  118.  
  119. static void ioreq_reset(struct ioreq *ioreq)
  120. {
  121.     memset(&ioreq->req, 0, sizeof(ioreq->req));
  122.     ioreq->status = 0;
  123.     ioreq->start = 0;
  124.     ioreq->presync = 0;
  125.     ioreq->postsync = 0;
  126.     ioreq->mapped = 0;
  127.  
  128.     memset(ioreq->domids, 0, sizeof(ioreq->domids));
  129.     memset(ioreq->refs, 0, sizeof(ioreq->refs));
  130.     ioreq->prot = 0;
  131.     memset(ioreq->page, 0, sizeof(ioreq->page));
  132.     ioreq->pages = NULL;
  133.  
  134.     ioreq->aio_inflight = 0;
  135.     ioreq->aio_errors = 0;
  136.  
  137.     ioreq->blkdev = NULL;
  138.     memset(&ioreq->list, 0, sizeof(ioreq->list));
  139.     memset(&ioreq->acct, 0, sizeof(ioreq->acct));
  140.  
  141.     qemu_iovec_reset(&ioreq->v);
  142. }
  143.  
  144. static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
  145. {
  146.     struct ioreq *ioreq = NULL;
  147.  
  148.     if (LIST_EMPTY(&blkdev->freelist)) {
  149.     if (blkdev->requests_total >= max_requests)
  150.         goto out;
  151.     /* allocate new struct */
  152.     ioreq = qemu_mallocz(sizeof(*ioreq));
  153.     ioreq->blkdev = blkdev;
  154.     blkdev->requests_total++;
  155.         /* qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST); */
  156.     } else {
  157.     /* get one from freelist */
  158.     ioreq = LIST_FIRST(&blkdev->freelist);
  159.     LIST_REMOVE(ioreq, list);
  160.         qemu_iovec_reset(&ioreq->v);
  161.     }
  162.     LIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
  163.     blkdev->requests_inflight++;
  164.  
  165. out:
  166.     return ioreq;
  167. }
  168.  
  169. static void ioreq_finish(struct ioreq *ioreq)
  170. {
  171.     struct XenBlkDev *blkdev = ioreq->blkdev;
  172.  
  173.     LIST_REMOVE(ioreq, list);
  174.     LIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
  175.     blkdev->requests_inflight--;
  176.     blkdev->requests_finished++;
  177. }
  178.  
  179. static void ioreq_release(struct ioreq *ioreq)
  180. {
  181.     struct XenBlkDev *blkdev = ioreq->blkdev;
  182.  
  183.     LIST_REMOVE(ioreq, list);
  184.     /*memset(ioreq, 0, sizeof(*ioreq)); */
  185.     ioreq_reset(ioreq);
  186.     ioreq->blkdev = blkdev;
  187.     LIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
  188.     blkdev->requests_finished--;
  189. }
  190.  
  191. /*
  192.  * translate request into iovec + start offset
  193.  * do sanity checks along the way
  194.  */
  195. static int ioreq_parse(struct ioreq *ioreq)
  196. {
  197.     struct XenBlkDev *blkdev = ioreq->blkdev;
  198.     uintptr_t mem;
  199.     size_t len;
  200.     int i;
  201.  
  202.     xen_be_printf(&blkdev->xendev, 3,
  203.           "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
  204.           ioreq->req.operation, ioreq->req.nr_segments,
  205.           ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
  206.     switch (ioreq->req.operation) {
  207.     case BLKIF_OP_READ:
  208.     ioreq->prot = PROT_WRITE; /* to memory */
  209.     break;
  210.     case BLKIF_OP_WRITE_BARRIER:
  211.         if (!ioreq->req.nr_segments) {
  212.             ioreq->presync = 1;
  213.             return 0;
  214.         }
  215.     if (!syncwrite)
  216.         ioreq->presync = ioreq->postsync = 1;
  217.     /* fall through */
  218.     case BLKIF_OP_WRITE:
  219.     ioreq->prot = PROT_READ; /* from memory */
  220.     if (syncwrite)
  221.         ioreq->postsync = 1;
  222.     break;
  223.     default:
  224.     xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
  225.               ioreq->req.operation);
  226.     goto err;
  227.     };
  228.  
  229.     if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
  230.         xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
  231.         goto err;
  232.     }
  233.  
  234.     ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
  235.     for (i = 0; i < ioreq->req.nr_segments; i++) {
  236.     if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
  237.         xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
  238.         goto err;
  239.     }
  240.     if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
  241.         xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
  242.         goto err;
  243.     }
  244.     if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
  245.         xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
  246.         goto err;
  247.     }
  248.  
  249.     ioreq->domids[i] = blkdev->xendev.dom;
  250.     ioreq->refs[i]   = ioreq->req.seg[i].gref;
  251.  
  252.     mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
  253.     len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
  254.         qemu_iovec_add(&ioreq->v, (void*)mem, len);
  255.     }
  256.     if (ioreq->start + ioreq->v.size > blkdev->file_size) {
  257.     xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
  258.     goto err;
  259.     }
  260.     return 0;
  261.  
  262. err:
  263.     ioreq->status = BLKIF_RSP_ERROR;
  264.     return -1;
  265. }
  266.  
  267. static void ioreq_unmap(struct ioreq *ioreq)
  268. {
  269.     xc_gnttab *gnt = ioreq->blkdev->xendev.gnttabdev;
  270.     int i;
  271.  
  272.     if (ioreq->v.niov == 0)
  273.         return;
  274.     if (batch_maps) {
  275.     if (!ioreq->pages)
  276.         return;
  277.     if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0)
  278.         xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
  279.               strerror(errno));
  280.     ioreq->blkdev->cnt_map -= ioreq->v.niov;
  281.     ioreq->pages = NULL;
  282.     } else {
  283.     for (i = 0; i < ioreq->v.niov; i++) {
  284.         if (!ioreq->page[i])
  285.         continue;
  286.         if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0)
  287.         xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
  288.                   strerror(errno));
  289.         ioreq->blkdev->cnt_map--;
  290.         ioreq->page[i] = NULL;
  291.     }
  292.     }
  293. }
  294.  
  295. static int ioreq_map(struct ioreq *ioreq)
  296. {
  297.     xc_gnttab *gnt = ioreq->blkdev->xendev.gnttabdev;
  298.     int i;
  299.  
  300.     if (ioreq->v.niov == 0)
  301.         return 0;
  302.     if (batch_maps) {
  303.     ioreq->pages = xc_gnttab_map_grant_refs
  304.         (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
  305.     if (ioreq->pages == NULL) {
  306.         xen_be_printf(&ioreq->blkdev->xendev, 0,
  307.               "can't map %d grant refs (%s, %d maps)\n",
  308.               ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
  309.         return -1;
  310.     }
  311.     for (i = 0; i < ioreq->v.niov; i++)
  312.         ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
  313.         (uintptr_t)ioreq->v.iov[i].iov_base;
  314.     ioreq->blkdev->cnt_map += ioreq->v.niov;
  315.     } else  {
  316.     for (i = 0; i < ioreq->v.niov; i++) {
  317.         ioreq->page[i] = xc_gnttab_map_grant_ref
  318.         (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
  319.         if (ioreq->page[i] == NULL) {
  320.         xen_be_printf(&ioreq->blkdev->xendev, 0,
  321.                   "can't map grant ref %d (%s, %d maps)\n",
  322.                   ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
  323.         ioreq_unmap(ioreq);
  324.         return -1;
  325.         }
  326.         ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
  327.         ioreq->blkdev->cnt_map++;
  328.     }
  329.     }
  330.     return 0;
  331. }
  332.  
  333. static int ioreq_runio_qemu_sync(struct ioreq *ioreq)
  334. {
  335.     struct XenBlkDev *blkdev = ioreq->blkdev;
  336.     int i, rc, len = 0;
  337.     off_t pos;
  338.  
  339.     if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1)
  340.     goto err;
  341.     if (ioreq->presync)
  342.     bdrv_flush(blkdev->bs);
  343.  
  344.     switch (ioreq->req.operation) {
  345.     case BLKIF_OP_READ:
  346.     pos = ioreq->start;
  347.     for (i = 0; i < ioreq->v.niov; i++) {
  348.         rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE,
  349.                ioreq->v.iov[i].iov_base,
  350.                ioreq->v.iov[i].iov_len / BLOCK_SIZE);
  351.         if (rc != 0) {
  352.         xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n",
  353.                   ioreq->v.iov[i].iov_base,
  354.                   ioreq->v.iov[i].iov_len);
  355.         goto err;
  356.         }
  357.         len += ioreq->v.iov[i].iov_len;
  358.         pos += ioreq->v.iov[i].iov_len;
  359.     }
  360.     break;
  361.     case BLKIF_OP_WRITE:
  362.     case BLKIF_OP_WRITE_BARRIER:
  363.         if (!ioreq->req.nr_segments)
  364.             break;
  365.     pos = ioreq->start;
  366.     for (i = 0; i < ioreq->v.niov; i++) {
  367.         rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE,
  368.                 ioreq->v.iov[i].iov_base,
  369.                 ioreq->v.iov[i].iov_len / BLOCK_SIZE);
  370.         if (rc != 0) {
  371.         xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n",
  372.                   ioreq->v.iov[i].iov_base,
  373.                   ioreq->v.iov[i].iov_len);
  374.         goto err;
  375.         }
  376.         len += ioreq->v.iov[i].iov_len;
  377.         pos += ioreq->v.iov[i].iov_len;
  378.     }
  379.     break;
  380.     default:
  381.     /* unknown operation (shouldn't happen -- parse catches this) */
  382.     goto err;
  383.     }
  384.  
  385.     if (ioreq->postsync)
  386.     bdrv_flush(blkdev->bs);
  387.     ioreq->status = BLKIF_RSP_OKAY;
  388.  
  389.     ioreq_unmap(ioreq);
  390.     ioreq_finish(ioreq);
  391.     return 0;
  392.  
  393. err:
  394.     ioreq->status = BLKIF_RSP_ERROR;
  395.     return -1;
  396. }
  397.  
  398. static void qemu_aio_complete(void *opaque, int ret)
  399. {
  400.     struct ioreq *ioreq = opaque;
  401.  
  402.     if (ret != 0) {
  403.         xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
  404.                       ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
  405.         ioreq->aio_errors++;
  406.     }
  407.  
  408.     ioreq->aio_inflight--;
  409.     if (ioreq->aio_inflight > 0)
  410.         return;
  411.     if (ioreq->postsync)
  412.     bdrv_flush(ioreq->blkdev->bs);
  413.  
  414.     ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
  415.     ioreq_unmap(ioreq);
  416.     ioreq_finish(ioreq);
  417.     qemu_bh_schedule(ioreq->blkdev->bh);
  418. }
  419.  
  420. static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
  421. {
  422.     struct XenBlkDev *blkdev = ioreq->blkdev;
  423.  
  424.     if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1)
  425.     goto err;
  426.  
  427.     ioreq->aio_inflight++;
  428.     if (ioreq->presync)
  429.     bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
  430.  
  431.     switch (ioreq->req.operation) {
  432.     case BLKIF_OP_READ:
  433.         ioreq->aio_inflight++;
  434.         bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
  435.                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
  436.                        qemu_aio_complete, ioreq);
  437.     break;
  438.     case BLKIF_OP_WRITE:
  439.     case BLKIF_OP_WRITE_BARRIER:
  440.         if (!ioreq->req.nr_segments)
  441.             break;
  442.         ioreq->aio_inflight++;
  443.         bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
  444.                         &ioreq->v, ioreq->v.size / BLOCK_SIZE,
  445.                         qemu_aio_complete, ioreq);
  446.     break;
  447.     default:
  448.     /* unknown operation (shouldn't happen -- parse catches this) */
  449.     goto err;
  450.     }
  451.  
  452.     qemu_aio_complete(ioreq, 0);
  453.  
  454.     return 0;
  455.  
  456. err:
  457.     ioreq->status = BLKIF_RSP_ERROR;
  458.     return -1;
  459. }
  460.  
  461. static int blk_send_response_one(struct ioreq *ioreq)
  462. {
  463.     struct XenBlkDev  *blkdev = ioreq->blkdev;
  464.     int               send_notify   = 0;
  465.     int               have_requests = 0;
  466.     blkif_response_t  resp;
  467.     void              *dst;
  468.  
  469.     resp.id        = ioreq->req.id;
  470.     resp.operation = ioreq->req.operation;
  471.     resp.status    = ioreq->status;
  472.  
  473.     /* Place on the response ring for the relevant domain. */
  474.     switch (blkdev->protocol) {
  475.     case BLKIF_PROTOCOL_NATIVE:
  476.     dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
  477.     break;
  478.     case BLKIF_PROTOCOL_X86_32:
  479.     dst = RING_GET_RESPONSE(&blkdev->rings.x86_32, blkdev->rings.x86_32.rsp_prod_pvt);
  480.     break;
  481.     case BLKIF_PROTOCOL_X86_64:
  482.     dst = RING_GET_RESPONSE(&blkdev->rings.x86_64, blkdev->rings.x86_64.rsp_prod_pvt);
  483.     break;
  484.     default:
  485.     dst = NULL;
  486.     }
  487.     memcpy(dst, &resp, sizeof(resp));
  488.     blkdev->rings.common.rsp_prod_pvt++;
  489.  
  490.     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
  491.     if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) {
  492.     /*
  493.      * Tail check for pending requests. Allows frontend to avoid
  494.      * notifications if requests are already in flight (lower
  495.      * overheads and promotes batching).
  496.      */
  497.     RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests);
  498.     } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) {
  499.     have_requests = 1;
  500.     }
  501.  
  502.     if (have_requests)
  503.     blkdev->more_work++;
  504.     return send_notify;
  505. }
  506.  
  507. /* walk finished list, send outstanding responses, free requests */
  508. static void blk_send_response_all(struct XenBlkDev *blkdev)
  509. {
  510.     struct ioreq *ioreq;
  511.     int send_notify = 0;
  512.  
  513.     while (!LIST_EMPTY(&blkdev->finished)) {
  514.         ioreq = LIST_FIRST(&blkdev->finished);
  515.     send_notify += blk_send_response_one(ioreq);
  516.     ioreq_release(ioreq);
  517.     }
  518.     if (send_notify)
  519.     xen_be_send_notify(&blkdev->xendev);
  520. }
  521.  
  522. static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc)
  523. {
  524.     switch (blkdev->protocol) {
  525.     case BLKIF_PROTOCOL_NATIVE:
  526.     memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc),
  527.            sizeof(ioreq->req));
  528.     break;
  529.     case BLKIF_PROTOCOL_X86_32:
  530.     blkif_get_x86_32_req(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.x86_32, rc));
  531.     break;
  532.     case BLKIF_PROTOCOL_X86_64:
  533.     blkif_get_x86_64_req(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.x86_64, rc));
  534.     break;
  535.     }
  536.     return 0;
  537. }
  538.  
  539. static void blk_handle_requests(struct XenBlkDev *blkdev)
  540. {
  541.     RING_IDX rc, rp;
  542.     struct ioreq *ioreq;
  543.  
  544.     blkdev->more_work = 0;
  545.  
  546.     rc = blkdev->rings.common.req_cons;
  547.     rp = blkdev->rings.common.sring->req_prod;
  548.     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
  549.  
  550.     if (use_aio)
  551.         blk_send_response_all(blkdev);
  552.     while ((rc != rp)) {
  553.         /* pull request from ring */
  554.         if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc))
  555.             break;
  556.         ioreq = ioreq_start(blkdev);
  557.         if (ioreq == NULL) {
  558.             blkdev->more_work++;
  559.             break;
  560.         }
  561.         blk_get_request(blkdev, ioreq, rc);
  562.         blkdev->rings.common.req_cons = ++rc;
  563.  
  564.         /* parse them */
  565.         if (ioreq_parse(ioreq) != 0) {
  566.             if (blk_send_response_one(ioreq))
  567.                 xen_be_send_notify(&blkdev->xendev);
  568.             ioreq_release(ioreq);
  569.             continue;
  570.         }
  571.  
  572.         if (use_aio) {
  573.             /* run i/o in aio mode */
  574.             ioreq_runio_qemu_aio(ioreq);
  575.         } else {
  576.             /* run i/o in sync mode */
  577.             ioreq_runio_qemu_sync(ioreq);
  578.         }
  579.     }
  580.     if (!use_aio)
  581.         blk_send_response_all(blkdev);
  582.  
  583.     if (blkdev->more_work && blkdev->requests_inflight < max_requests)
  584.         qemu_bh_schedule(blkdev->bh);
  585. }
  586.  
  587. /* ------------------------------------------------------------- */
  588.  
  589. static void blk_bh(void *opaque)
  590. {
  591.     struct XenBlkDev *blkdev = opaque;
  592.     blk_handle_requests(blkdev);
  593. }
  594.  
  595. static void blk_alloc(struct XenDevice *xendev)
  596. {
  597.     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  598.  
  599.     LIST_INIT(&blkdev->inflight);
  600.     LIST_INIT(&blkdev->finished);
  601.     LIST_INIT(&blkdev->freelist);
  602.     blkdev->bh = qemu_bh_new(blk_bh, blkdev);
  603.     if (xen_mode != XEN_EMULATE)
  604.         batch_maps = 1;
  605. }
  606.  
  607. static int blk_init(struct XenDevice *xendev)
  608. {
  609.     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  610.     int mode, qflags, have_barriers, info = 0;
  611.     char *h = NULL;
  612.  
  613.     /* read xenstore entries */
  614.     if (blkdev->params == NULL) {
  615.     blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params");
  616.         if (blkdev->params != NULL)
  617.             h = strchr(blkdev->params, ':');
  618.     if (h != NULL) {
  619.         blkdev->fileproto = blkdev->params;
  620.         blkdev->filename  = h+1;
  621.         *h = 0;
  622.     } else {
  623.         blkdev->fileproto = "<unset>";
  624.         blkdev->filename  = blkdev->params;
  625.     }
  626.     }
  627.     if (!strcmp("aio", blkdev->fileproto))
  628.         blkdev->fileproto = "raw";
  629.     if (blkdev->mode == NULL)
  630.     blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode");
  631.     if (blkdev->type == NULL)
  632.     blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type");
  633.     if (blkdev->dev == NULL)
  634.     blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev");
  635.     if (blkdev->devtype == NULL)
  636.     blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type");
  637.  
  638.     /* do we have all we need? */
  639.     if (blkdev->params == NULL ||
  640.     blkdev->mode == NULL   ||
  641.     blkdev->type == NULL   ||
  642.     blkdev->dev == NULL)
  643.     return -1;
  644.  
  645.     /* read-only ? */
  646.     qflags = BDRV_O_NOCACHE;
  647.     if (strcmp(blkdev->mode, "w") == 0) {
  648.     mode   = O_RDWR;
  649.     qflags |= BDRV_O_RDWR;
  650.     } else {
  651.     mode   = O_RDONLY;
  652.     qflags |= BDRV_O_RDONLY;
  653.     info  |= VDISK_READONLY;
  654.     }
  655.  
  656.     /* cdrom ? */
  657.     if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom"))
  658.     info  |= VDISK_CDROM;
  659.  
  660.     /* init qemu block driver */
  661.     blkdev->index = (blkdev->xendev.dev - 202 * 256) / 16;
  662.     blkdev->index = drive_get_index(IF_XEN, 0, blkdev->index);
  663.     if (blkdev->index == -1) {
  664.         /* setup via xenbus -> create new block driver instance */
  665.         xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
  666.     blkdev->bs = bdrv_new(blkdev->dev);
  667.     if (blkdev->bs) {
  668.         if (bdrv_open2(blkdev->bs, blkdev->filename, qflags,
  669.                            bdrv_find_format(blkdev->fileproto)) != 0) {
  670.         bdrv_delete(blkdev->bs);
  671.         blkdev->bs = NULL;
  672.         }
  673.     }
  674.     if (!blkdev->bs)
  675.         return -1;
  676.     } else {
  677.         /* setup via qemu cmdline -> already setup for us */
  678.         xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
  679.     blkdev->bs = drives_table[blkdev->index].bdrv;
  680.     }
  681.     blkdev->file_blk  = BLOCK_SIZE;
  682.     blkdev->file_size = bdrv_getlength(blkdev->bs);
  683.     if (blkdev->file_size < 0) {
  684.         xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n",
  685.                       (int)blkdev->file_size, strerror(-blkdev->file_size),
  686.                       blkdev->bs->drv ? blkdev->bs->drv->format_name : "-");
  687.     blkdev->file_size = 0;
  688.     }
  689.     have_barriers = blkdev->bs->drv && blkdev->bs->drv->bdrv_flush ? 1 : 0;
  690.  
  691.     xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
  692.           " size %" PRId64 " (%" PRId64 " MB)\n",
  693.           blkdev->type, blkdev->fileproto, blkdev->filename,
  694.           blkdev->file_size, blkdev->file_size >> 20);
  695.  
  696.     /* fill info */
  697.     xenstore_write_be_int(&blkdev->xendev, "feature-barrier", have_barriers);
  698.     xenstore_write_be_int(&blkdev->xendev, "info",            info);
  699.     xenstore_write_be_int(&blkdev->xendev, "sector-size",     blkdev->file_blk);
  700.     xenstore_write_be_int(&blkdev->xendev, "sectors",
  701.               blkdev->file_size / blkdev->file_blk);
  702.     return 0;
  703. }
  704.  
  705. static int blk_connect(struct XenDevice *xendev)
  706. {
  707.     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  708.  
  709.     if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1)
  710.     return -1;
  711.     if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
  712.                              &blkdev->xendev.remote_port) == -1)
  713.     return -1;
  714.  
  715.     blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
  716.     if (blkdev->xendev.protocol) {
  717.         if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0)
  718.             blkdev->protocol = BLKIF_PROTOCOL_X86_32;
  719.         if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0)
  720.             blkdev->protocol = BLKIF_PROTOCOL_X86_64;
  721.     }
  722.  
  723.     blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev,
  724.                         blkdev->xendev.dom,
  725.                         blkdev->ring_ref,
  726.                         PROT_READ | PROT_WRITE);
  727.     if (!blkdev->sring)
  728.     return -1;
  729.     blkdev->cnt_map++;
  730.  
  731.     switch (blkdev->protocol) {
  732.     case BLKIF_PROTOCOL_NATIVE:
  733.     {
  734.     blkif_sring_t *sring_native = blkdev->sring;
  735.     BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
  736.     break;
  737.     }
  738.     case BLKIF_PROTOCOL_X86_32:
  739.     {
  740.     blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;
  741.     BACK_RING_INIT(&blkdev->rings.x86_32, sring_x86_32, XC_PAGE_SIZE);
  742.     break;
  743.     }
  744.     case BLKIF_PROTOCOL_X86_64:
  745.     {
  746.     blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;
  747.     BACK_RING_INIT(&blkdev->rings.x86_64, sring_x86_64, XC_PAGE_SIZE);
  748.     break;
  749.     }
  750.     }
  751.  
  752.     xen_be_bind_evtchn(&blkdev->xendev);
  753.  
  754.     xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
  755.           "remote port %d, local port %d\n",
  756.           blkdev->xendev.protocol, blkdev->ring_ref,
  757.           blkdev->xendev.remote_port, blkdev->xendev.local_port);
  758.     return 0;
  759. }
  760.  
  761. static void blk_disconnect(struct XenDevice *xendev)
  762. {
  763.     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  764.  
  765.     if (blkdev->bs) {
  766.         if (blkdev->index == -1) {
  767.             /* close/delete only if we created it ourself */
  768.             bdrv_close(blkdev->bs);
  769.             bdrv_delete(blkdev->bs);
  770.         }
  771.     blkdev->bs = NULL;
  772.     }
  773.     xen_be_unbind_evtchn(&blkdev->xendev);
  774.  
  775.     if (blkdev->sring) {
  776.     xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
  777.     blkdev->cnt_map--;
  778.     blkdev->sring = NULL;
  779.     }
  780. }
  781.  
  782. static int blk_free(struct XenDevice *xendev)
  783. {
  784.     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  785.     struct ioreq *ioreq;
  786.  
  787.     while (!LIST_EMPTY(&blkdev->freelist)) {
  788.     ioreq = LIST_FIRST(&blkdev->freelist);
  789.         LIST_REMOVE(ioreq, list);
  790.         qemu_iovec_destroy(&ioreq->v);
  791.     qemu_free(ioreq);
  792.     }
  793.  
  794.     qemu_free(blkdev->params);
  795.     qemu_free(blkdev->mode);
  796.     qemu_free(blkdev->type);
  797.     qemu_free(blkdev->dev);
  798.     qemu_free(blkdev->devtype);
  799.     qemu_bh_delete(blkdev->bh);
  800.     return 0;
  801. }
  802.  
  803. static void blk_event(struct XenDevice *xendev)
  804. {
  805.     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
  806.  
  807.     qemu_bh_schedule(blkdev->bh);
  808. }
  809.  
  810. struct XenDevOps xen_blkdev_ops = {
  811.     .size       = sizeof(struct XenBlkDev),
  812.     .flags      = DEVOPS_FLAG_NEED_GNTDEV,
  813.     .alloc      = blk_alloc,
  814.     .init       = blk_init,
  815.     .initialise    = blk_connect,
  816.     .disconnect = blk_disconnect,
  817.     .event      = blk_event,
  818.     .free       = blk_free,
  819. };
Advertisement
Add Comment
Please, Sign In to add comment