Advertisement
Guest User

stackbd blkmq don't work ((

a guest
Jul 23rd, 2019
294
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 15.64 KB | None | 0 0
  1. #include <linux/version.h>
  2. #include <linux/module.h>
  3. #include <linux/moduleparam.h>
  4. #include <linux/init.h>
  5.  
  6. #include <linux/module.h>
  7. #include <linux/kernel.h> /* printk() */
  8. #include <linux/fs.h>     /* everything... */
  9. #include <linux/errno.h>  /* error codes */
  10. #include <linux/types.h>  /* size_t */
  11. #include <linux/vmalloc.h>
  12. #include <linux/genhd.h>
  13. #include <linux/blkdev.h>
  14. #include <linux/hdreg.h>
  15. #include <linux/kthread.h>
  16.  
  17. #include <trace/events/block.h>
  18.  
  19. #include "../common/stackbd.h"
  20.  
  21. #define USE_BLKMQ 1
  22.  
  23. #if USE_BLKMQ
  24. #include <linux/blk-mq.h>
  25. #endif
  26.  
  27. #define LOGOUT(lvl, fmt, args...) printk(lvl "%s [task=%p] %s: " fmt, THIS_MODULE->name, current, __func__, ## args)
  28. #define PINFO(fmt, args...) LOGOUT(KERN_INFO, fmt, ## args)
  29. #define PWARN(fmt, args...) LOGOUT(KERN_WARNING, fmt, ## args)
  30. #define PERROR(fmt, args...) LOGOUT(KERN_ERR, fmt, ## args)
  31.  
  32. #define STACKBD_BDEV_MODE (FMODE_READ | FMODE_WRITE | FMODE_EXCL)
  33. #define DEBUGGG printk("stackbd: %d\n", __LINE__);
  34. /*
  35.  * We can tweak our hardware sector size, but the kernel talks to us
  36.  * in terms of small sectors, always.
  37.  */
  38. #define KERNEL_SECTOR_SHIFT 9
  39. #define KERNEL_SECTOR_SIZE (1 << KERNEL_SECTOR_SHIFT)
  40. #define KERNEL_PAGE_SHIFT 12
  41. #define KERNEL_PAGE_SIZE (1 << KERNEL_PAGE_SHIFT)
  42.  
  43. #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
  44. #   define DECLARE_BIO_VEC struct bio_vec
  45. #   define ACCESS_BIO_VEC(x) (x)
  46. #else
  47. #   define DECLARE_BIO_VEC struct bio_vec *
  48. #   define ACCESS_BIO_VEC(x) (*(x))
  49. #endif
  50.  
  51. MODULE_LICENSE("Dual BSD/GPL");
  52.  
  53. static int major_num = 0;
  54. module_param(major_num, int, 0);
  55. static int LOGICAL_BLOCK_SIZE = 4096;
  56. module_param(LOGICAL_BLOCK_SIZE, int, 0);
  57.  
  58. /*
  59.  * The internal representation of our device.
  60.  */
  61. static struct stackbd_t {
  62.     sector_t capacity; /* Sectors */
  63.     struct gendisk *gd;
  64.     spinlock_t lock;
  65.     struct bio_list bio_list;    
  66.     struct task_struct *thread;
  67.     int is_active;
  68.     struct block_device *bdev_raw;
  69.     /* Our request queue */
  70.     struct request_queue *queue;
  71. #if USE_BLKMQ
  72.     struct blk_mq_tag_set tag_set;
  73. #endif
  74. } stackbd;
  75.  
  76. struct bio_private
  77. {
  78.     void *bi_private_old;
  79.     void *data;
  80.     bool is_ready;
  81. };
  82.  
  83. typedef struct hidden_cmd_s
  84. {
  85.     long ret;
  86. } hidden_cmd_t;
  87.  
  88.  
  89. static DECLARE_WAIT_QUEUE_HEAD(req_event);
  90.  
  91. int ald_buffer_read(
  92.     unsigned long sector,
  93.     unsigned long nsect,
  94.     char *buffer
  95. )
  96. {
  97.     int result = 0;
  98.     unsigned nsize = nsect << KERNEL_SECTOR_SHIFT;
  99.     int npages = ((nsize - 1) >> KERNEL_PAGE_SHIFT) + 1;
  100.     struct bio *bio = bio_alloc(GFP_ATOMIC, npages);
  101.     struct block_device *bdev = stackbd.bdev_raw;
  102.  
  103.     PINFO("begin; sector=%ld; nsect=%ld; buffer=%p\n", sector, nsect, buffer);
  104.  
  105.     if(unlikely(!bio))
  106.     {
  107.         PINFO("bio_alloc failed!\n");
  108.         result = -ENOMEM;
  109.         return result;
  110.     }
  111.     bio_set_dev(bio, bdev);
  112.     bio->bi_iter.bi_sector = sector;
  113.     bio_set_op_attrs(bio, REQ_OP_READ, 0);
  114.     {
  115.         char *ptr = buffer;
  116.         do
  117.         {
  118.             struct page *page;
  119.             page = virt_to_page(ptr);
  120.             if(unlikely(!page))
  121.             {
  122.                 PINFO("virt_to_page failed!\n");
  123.                 result = -ENOMEM;
  124.                 break;
  125.             }
  126.  
  127.             {
  128.                 unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), nsize);
  129.                 bio_add_page(bio, page, this_step, offset_in_page(ptr));
  130.                 nsize -= this_step;
  131.                 ptr += this_step;
  132.             }
  133.         } while(nsize > 0);
  134.  
  135.         if(likely(!result))
  136.         {
  137.             result = submit_bio_wait(bio);
  138.         }
  139.         bio_put(bio);
  140.     }
  141.     PINFO("end (%d)\n", result);
  142.     return result;
  143. }
  144.  
  145. int ald_buffer_write(
  146.     unsigned long sector,
  147.     unsigned long nsect,
  148.     char *buffer
  149. )
  150. {
  151.     int result = 0;
  152.     unsigned nsize = nsect << KERNEL_SECTOR_SHIFT;
  153.     int npages = ((nsize - 1) >> KERNEL_PAGE_SHIFT) + 1;
  154.     struct bio *bio = bio_alloc(GFP_ATOMIC, npages);
  155.     struct block_device *bdev = stackbd.bdev_raw;
  156.  
  157.     PINFO("begin; sector=%ld; nsect=%ld; buffer=%p\n", sector, nsect, buffer);
  158.  
  159.     if(unlikely(!bio))
  160.     {
  161.         PINFO("bio_alloc failed!\n");
  162.         result = -ENOMEM;
  163.         return result;
  164.     }
  165.     bio_set_dev(bio, bdev);
  166.     bio->bi_iter.bi_sector = sector;
  167.     bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
  168.     {
  169.         char *ptr = buffer;
  170.         do
  171.         {
  172.             struct page *page = virt_to_page(ptr);
  173.  
  174.             if(unlikely(!page))
  175.             {
  176.                 PINFO("alloc page failed!\n");
  177.                 result = -ENOMEM;
  178.                 break;
  179.             }
  180.  
  181.             {
  182.                 unsigned op = offset_in_page(ptr);
  183.                 unsigned this_step = min((unsigned)(KERNEL_PAGE_SIZE - op), nsize);
  184.                 bio_add_page(bio, page, this_step, op);
  185.                 nsize -= this_step;
  186.                 ptr += this_step;
  187.             }
  188.         } while(nsize > 0);
  189.  
  190.         if(likely(!result))
  191.         {
  192.             result = submit_bio_wait(bio);
  193.         }
  194.         bio_put(bio);
  195.     }
  196.     PINFO("end (%d)\n", result);
  197.     return result;
  198. }
  199.  
  200. #if USE_BLKMQ
  201. static void pb_alloc(struct bio *bio, void *data)
  202. {
  203.     struct bio_private *pb = kmalloc(sizeof(struct bio_private), GFP_ATOMIC);
  204.  
  205.     pb->bi_private_old = bio->bi_private;
  206.     pb->data = data;
  207.     pb->is_ready = false;
  208.     bio->bi_private = pb;
  209. }
  210.  
  211. static void pb_free(struct bio *bio)
  212. {
  213.     struct bio_private *pb = bio->bi_private;
  214.     void *t = bio->bi_private;
  215.     bio->bi_private = pb->bi_private_old;
  216.     kfree(t);
  217. }
  218. #endif
  219.  
  220. static void my_bio_complete(struct bio *bio, int ret)
  221. {
  222. #if USE_BLKMQ
  223.    struct bio_private *pb = bio->bi_private;
  224.    struct request *rq = pb->data;
  225.  
  226.    pb_free(bio);
  227. #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
  228.    blk_mq_end_request(rq, ret ? BLK_STS_IOERR : BLK_STS_OK);
  229. #else
  230.    blk_mq_end_io(rq, ret ? BLK_STS_IOERR : BLK_STS_OK);
  231. #endif
  232.  
  233. #else // #if USE_BLKMQ
  234.     bio_endio(bio);
  235. #endif // #if USE_BLKMQ
  236. }
  237.  
  238.  
  239. static void stackbd_io_fn(struct bio *bio)
  240. {
  241.     sector_t sector = bio->bi_iter.bi_sector;
  242.     int size = bio->bi_iter.bi_size;
  243.     int nsect = size >> KERNEL_SECTOR_SHIFT;
  244.     DECLARE_BIO_VEC bvec;
  245.     struct bvec_iter iter;
  246.     u8 *buffer = kmalloc(size, GFP_ATOMIC);
  247.     u8 *ptr = buffer;
  248.  
  249.     if (bio_data_dir(bio) == READ)
  250.     {
  251.         ald_buffer_read(sector, nsect, ptr);
  252.  
  253.         bio_for_each_segment(bvec, bio, iter)
  254.         {
  255.             u8 *dst = page_address(ACCESS_BIO_VEC(bvec).bv_page) + ACCESS_BIO_VEC(bvec).bv_offset;
  256.             int len = ACCESS_BIO_VEC(bvec).bv_len;
  257.             memcpy(dst, ptr, len);
  258.             ptr += len;
  259.         }
  260.     }
  261.     else
  262.     {
  263.         bio_for_each_segment(bvec, bio, iter)
  264.         {
  265.             u8 *src = page_address(ACCESS_BIO_VEC(bvec).bv_page) + ACCESS_BIO_VEC(bvec).bv_offset;
  266.             int len = ACCESS_BIO_VEC(bvec).bv_len;
  267.             memcpy(ptr, src, len);
  268.             ptr += len;
  269.         }
  270.         ald_buffer_write(sector, nsect, buffer);
  271.     }
  272.     kfree(buffer);
  273.     my_bio_complete(bio, 0);
  274. }
  275.  
  276. static int stackbd_threadfn(void *data)
  277. {
  278.     struct bio *bio;
  279.  
  280.     set_user_nice(current, -20);
  281.  
  282.     while (!kthread_should_stop())
  283.     {
  284.         /* wake_up() is after adding bio to list. No need for condition */
  285.         wait_event_interruptible(req_event, kthread_should_stop() ||
  286.                 !bio_list_empty(&stackbd.bio_list));
  287.  
  288.         spin_lock_irq(&stackbd.lock);
  289.         if (bio_list_empty(&stackbd.bio_list))
  290.         {
  291.             spin_unlock_irq(&stackbd.lock);
  292.             continue;
  293.         }
  294.  
  295.         bio = bio_list_pop(&stackbd.bio_list);
  296.         spin_unlock_irq(&stackbd.lock);
  297.  
  298.         stackbd_io_fn(bio);
  299.     }
  300.  
  301.     return 0;
  302. }
  303.  
  304. #if USE_BLKMQ
  305. //#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 3)
  306. #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
  307. static blk_status_t hidden_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data* bd)
  308. #elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
  309. static blk_status_t hidden_queue_rq(struct blk_mq_hw_ctx *hctx, struct request* rq, bool last)
  310. #else
  311. static blk_status_t hidden_queue_rq(struct blk_mq_hw_ctx *hctx, struct request* rq)
  312. #endif
  313. {
  314. #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
  315.     struct request *rq = bd->rq;
  316. #endif
  317.     struct bio *bio = rq->bio;
  318.     pb_alloc(bio, rq);
  319.     spin_lock_irq(&stackbd.lock);
  320.     if (!stackbd.bdev_raw)
  321.     {
  322.         printk("stackbd: Request before bdev_raw is ready, aborting\n");
  323.         goto abort;
  324.     }
  325.     if (!stackbd.is_active)
  326.     {
  327.         printk("stackbd: Device not active yet, aborting\n");
  328.         goto abort;
  329.     }
  330.  
  331. #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
  332.     blk_mq_start_request(rq);
  333. #endif
  334.  
  335.     bio_list_add(&stackbd.bio_list, bio);
  336.     wake_up(&req_event);
  337.     spin_unlock_irq(&stackbd.lock);
  338.  
  339. exit:
  340.     return BLK_STS_OK; //always return ok
  341. abort:
  342.     my_bio_complete(bio, -EIO);
  343.     goto exit;
  344. }
  345.  
  346. static struct blk_mq_ops _mq_ops =
  347. {
  348.     .queue_rq = hidden_queue_rq,
  349. #if LINUX_VERSION_CODE <= KERNEL_VERSION(4, 2, 0)
  350.     .map_queue = blk_mq_map_queue
  351. #endif
  352. };
  353. #else // #if USE_BLKMQ
  354. /*
  355.  * Handle an I/O request.
  356.  */
  357. static blk_qc_t stackbd_make_request(struct request_queue *q, struct bio *bio)
  358. {
  359.     printk("stackbd: make request %-5s block %-12lu #pages %-4hu total-size "
  360.             "%-10u\n", bio_data_dir(bio) == WRITE ? "write" : "read",
  361.             bio->bi_iter.bi_sector, bio->bi_vcnt, bio->bi_iter.bi_size);
  362.  
  363. //    printk("<%p> Make request %s %s %s\n", bio,
  364. //           bio->bi_rw & REQ_SYNC ? "SYNC" : "",
  365. //           bio->bi_rw & REQ_FLUSH ? "FLUSH" : "",
  366. //           bio->bi_rw & REQ_NOIDLE ? "NOIDLE" : "");
  367. //
  368.     spin_lock_irq(&stackbd.lock);
  369.     if (!stackbd.bdev_raw)
  370.     {
  371.         printk("stackbd: Request before bdev_raw is ready, aborting\n");
  372.         goto abort;
  373.     }
  374.     if (!stackbd.is_active)
  375.     {
  376.         printk("stackbd: Device not active yet, aborting\n");
  377.         goto abort;
  378.     }
  379.     bio_list_add(&stackbd.bio_list, bio);
  380.     wake_up(&req_event);
  381.     spin_unlock_irq(&stackbd.lock);
  382.  
  383.     goto exit;
  384.  
  385. abort:
  386.     spin_unlock_irq(&stackbd.lock);
  387.     printk("<%p> Abort request\n\n", bio);
  388.     bio_io_error(bio);
  389. exit:
  390.     return BLK_QC_T_NONE;
  391. }
  392. #endif // #if USE_BLKMQ
  393.  
  394. static struct block_device *stackbd_bdev_open(char dev_path[])
  395. {
  396.     /* Open underlying device */
  397.     struct block_device *bdev_raw = lookup_bdev(dev_path);
  398.     printk("Opened %s\n", dev_path);
  399.  
  400.     if (IS_ERR(bdev_raw))
  401.     {
  402.         printk("stackbd: error opening raw device <%lu>\n", PTR_ERR(bdev_raw));
  403.         return NULL;
  404.     }
  405.  
  406.     if (!bdget(bdev_raw->bd_dev))
  407.     {
  408.         printk("stackbd: error bdget()\n");
  409.         return NULL;
  410.     }
  411.  
  412.     if (blkdev_get(bdev_raw, STACKBD_BDEV_MODE, &stackbd))
  413.     {
  414.         printk("stackbd: error blkdev_get()\n");
  415.         bdput(bdev_raw);
  416.         return NULL;
  417.     }
  418.  
  419.     return bdev_raw;
  420. }
  421.  
  422. static int stackbd_start(char dev_path[])
  423. {
  424.     unsigned max_sectors;
  425.  
  426.     if (!(stackbd.bdev_raw = stackbd_bdev_open(dev_path)))
  427.         return -EFAULT;
  428.  
  429.     /* Set up our internal device */
  430.     stackbd.capacity = get_capacity(stackbd.bdev_raw->bd_disk);
  431.     printk("stackbd: Device real capacity: %lu\n", stackbd.capacity);
  432.  
  433.     set_capacity(stackbd.gd, stackbd.capacity);
  434.  
  435.     max_sectors = queue_max_hw_sectors(bdev_get_queue(stackbd.bdev_raw));
  436.     blk_queue_max_hw_sectors(stackbd.queue, max_sectors);
  437.     printk("stackbd: Max sectors: %u\n", max_sectors);
  438.  
  439.     stackbd.thread = kthread_create(stackbd_threadfn, NULL,
  440.            stackbd.gd->disk_name);
  441.     if (IS_ERR(stackbd.thread))
  442.     {
  443.         printk("stackbd: error kthread_create <%lu>\n",
  444.                PTR_ERR(stackbd.thread));
  445.         goto error_after_bdev;
  446.     }
  447.  
  448.     printk("stackbd: done initializing successfully\n");
  449.     stackbd.is_active = 1;
  450.     wake_up_process(stackbd.thread);
  451.  
  452.     return 0;
  453.  
  454. error_after_bdev:
  455.     blkdev_put(stackbd.bdev_raw, STACKBD_BDEV_MODE);
  456.     bdput(stackbd.bdev_raw);
  457.  
  458.     return -EFAULT;
  459. }
  460.  
  461. static int stackbd_ioctl(struct block_device *bdev, fmode_t mode,
  462.              unsigned int cmd, unsigned long arg)
  463. {
  464.     char dev_path[80];
  465.     void __user *argp = (void __user *)arg;    
  466.  
  467.     switch (cmd)
  468.     {
  469.     case STACKBD_DO_IT:
  470.         printk("\n*** DO IT!!!!!!! ***\n\n");
  471.  
  472.         if (copy_from_user(dev_path, argp, sizeof(dev_path)))
  473.             return -EFAULT;
  474.  
  475.         return stackbd_start(dev_path);
  476.     default:
  477.         return -ENOTTY;
  478.     }
  479. }
  480.  
  481. /*
  482.  * The HDIO_GETGEO ioctl is handled in blkdev_ioctl(), which
  483.  * calls this. We need to implement getgeo, since we can't
  484.  * use tools such as fdisk to partition the drive otherwise.
  485.  */
  486. int stackbd_getgeo(struct block_device * block_device, struct hd_geometry * geo)
  487. {
  488.     long size;
  489.  
  490.     /* We have no real geometry, of course, so make something up. */
  491.     size = stackbd.capacity * (LOGICAL_BLOCK_SIZE / KERNEL_SECTOR_SIZE);
  492.     geo->cylinders = (size & ~0x3f) >> 6;
  493.     geo->heads = 4;
  494.     geo->sectors = 16;
  495.     geo->start = 0;
  496.     return 0;
  497. }
  498.  
  499. /*
  500.  * The device operations structure.
  501.  */
  502. static struct block_device_operations stackbd_ops = {
  503.         .owner  = THIS_MODULE,
  504.         .getgeo = stackbd_getgeo,
  505.         .ioctl  = stackbd_ioctl,
  506. };
  507.  
  508. static int __init stackbd_init(void)
  509. {
  510.     /* Set up our internal device */
  511.     spin_lock_init(&stackbd.lock);
  512.  
  513.     /* blk_alloc_queue() instead of blk_init_queue() so it won't set up the
  514.      * queue for requests.
  515.      */
  516. #if USE_BLKMQ
  517.     stackbd.tag_set.ops = &_mq_ops;
  518.     stackbd.tag_set.nr_hw_queues = 1;
  519.     stackbd.tag_set.queue_depth = 128;
  520.     stackbd.tag_set.numa_node = NUMA_NO_NODE;
  521.     stackbd.tag_set.cmd_size = sizeof(hidden_cmd_t);
  522.     stackbd.tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
  523.     stackbd.tag_set.driver_data = &stackbd;
  524.  
  525.     {
  526.         int res = blk_mq_alloc_tag_set(&stackbd.tag_set);
  527.         if (res)
  528.         {
  529.             PWARN("unable to allocate tag set (%d)\n", res);
  530.             return -EFAULT;
  531.         }
  532.     }
  533.     stackbd.queue = blk_mq_init_queue(&stackbd.tag_set);
  534.     if (IS_ERR(stackbd.queue))
  535.     {
  536.         int res = PTR_ERR(stackbd.queue);
  537.         PWARN("Failed to allocate queue (%d)", res);
  538.         return -EFAULT;
  539.     }
  540. #else
  541.     if (!(stackbd.queue = blk_alloc_queue(GFP_KERNEL)))
  542.     {
  543.         printk("stackbd: alloc_queue failed\n");
  544.         return -EFAULT;
  545.     }
  546.  
  547.     blk_queue_make_request(stackbd.queue, stackbd_make_request);
  548. #endif
  549.     blk_queue_logical_block_size(stackbd.queue, LOGICAL_BLOCK_SIZE);
  550.  
  551.     /* Get registered */
  552.     if ((major_num = register_blkdev(major_num, STACKBD_NAME)) < 0)
  553.     {
  554.         printk("stackbd: unable to get major number\n");
  555.         goto error_after_alloc_queue;
  556.     }
  557.  
  558.     /* Gendisk structure */
  559.     if (!(stackbd.gd = alloc_disk(16)))
  560.         goto error_after_redister_blkdev;
  561.     stackbd.gd->major = major_num;
  562.     stackbd.gd->first_minor = 0;
  563.     stackbd.gd->fops = &stackbd_ops;
  564.     stackbd.gd->private_data = &stackbd;
  565.     strcpy(stackbd.gd->disk_name, STACKBD_NAME_0);
  566.     stackbd.gd->queue = stackbd.queue;
  567.     add_disk(stackbd.gd);
  568.  
  569.     printk("stackbd: init done\n");
  570.  
  571.     return 0;
  572.  
  573. error_after_redister_blkdev:
  574.     unregister_blkdev(major_num, STACKBD_NAME);
  575. error_after_alloc_queue:
  576.     blk_cleanup_queue(stackbd.queue);
  577.  
  578.     return -EFAULT;
  579. }
  580.  
  581. static void __exit stackbd_exit(void)
  582. {
  583.     printk("stackbd: exit\n");
  584.  
  585.     if (stackbd.is_active)
  586.     {
  587.         kthread_stop(stackbd.thread);
  588.         blkdev_put(stackbd.bdev_raw, STACKBD_BDEV_MODE);
  589.         bdput(stackbd. bdev_raw);
  590.     }
  591.  
  592.     del_gendisk(stackbd.gd);
  593.     put_disk(stackbd.gd);
  594.     unregister_blkdev(major_num, STACKBD_NAME);
  595.     blk_cleanup_queue(stackbd.queue);
  596. #if USE_BLKMQ
  597.     if (stackbd.tag_set.tags)
  598.         blk_mq_free_tag_set(&stackbd.tag_set);
  599. #endif
  600. }
  601.  
  602. module_init(stackbd_init);
  603. module_exit(stackbd_exit);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement