Advertisement
Guest User

eliminate calls to btr_cur_optimistic_insert

a guest
Aug 28th, 2012
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 11.20 KB | None | 0 0
  1.  
  2.     Reduce the number of calls to btr_cur_optimistic_insert()
  3.    
  4.     Summary:
  5.     During the record insertion, InnoDB finds out the page the record
  6.     should be inserted, acquires the latch for that page and attempts
  7.     to insert the record to the page. If there is not enough room in
  8.     the page then the b-tree structure must be changed so it releases
  9.     the page latch and then gets the b-tree latch. It then re-computes
  10.     the page to which the record should be inserted. Most of the time
  11.     this second page is the same page as the previously found page and
  12.     it still doesn't have room for the record insertion. Rarely,
  13.     another thread (eg. purge thread) gets a hold of the previously
  14.     found page and deletes some records on that page so that there is
  15.     room for insertion in InnoDB's second attempt.
  16.    
  17.     This diff eliminates the unnecessary attempts to insert the record
  18.     into the page by checking if the page has been modified since the
  19.     last time the insertion had been attempted.
  20.    
  21.  
  22.  
  23. diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
  24. index df2900c..1381b40 100644
  25. --- a/storage/innodb_plugin/handler/ha_innodb.cc
  26. +++ b/storage/innodb_plugin/handler/ha_innodb.cc
  27. @@ -1109,6 +1109,10 @@ static SHOW_VAR innodb_status_variables[]= {
  28.    (char*) &export_vars.innodb_malloc_cache_block_size_decompress, SHOW_LONG},
  29.    {"no_undo_slot_free",
  30.    (char*) &export_vars.no_undo_slot_free, SHOW_LONG},
  31. +#ifdef UNIV_DEBUG
  32. +  {"num_optimistic_insert_calls_in_pessimistic_descent",
  33. +  (char*) &export_vars.num_optimistic_insert_calls_in_pessimistic_descent, SHOW_LONGLONG},
  34. +#endif
  35.    {NullS, NullS, SHOW_LONG}
  36.  };
  37.  
  38. diff --git a/storage/innodb_plugin/ibuf/ibuf0ibuf.c b/storage/innodb_plugin/ibuf/ibuf0ibuf.c
  39. index a227a73..b770afd 100644
  40. --- a/storage/innodb_plugin/ibuf/ibuf0ibuf.c
  41. +++ b/storage/innodb_plugin/ibuf/ibuf0ibuf.c
  42. @@ -2581,7 +2581,23 @@ ibuf_insert_low(
  43.     ulint       space,  /*!< in: space id where to insert */
  44.     ulint       zip_size,/*!< in: compressed page size in bytes, or 0 */
  45.     ulint       page_no,/*!< in: page number where to insert */
  46. -   que_thr_t*  thr)    /*!< in: query thread */
  47. +   que_thr_t*  thr,    /*!< in: query thread */
  48. +   ulint*  ibuf_page_no,   /*!< *page_no and *modify_clock are used to decide
  49. +                       whether to call btr_cur_optimistic_insert() during
  50. +                       pessimistic descent down the index tree.
  51. +                       in: If this is optimistic descent, then *page_no
  52. +                       must be ULINT_UNDEFINED. If it is pessimistic
  53. +                       descent, *page_no must be the page_no to which an
  54. +                       optimistic insert was attempted last time
  55. +                       ibuf_insert_low() was called.
  56. +                       out: If this is the optimistic descent, *page_no is set
  57. +                       to the page_no to which an optimistic insert was
  58. +                       attempted. If it is pessimistic descent, this value is
  59. +                       not changed. */
  60. +   ib_uint64_t*    modify_clock) /*!< in/out: *modify_clock == ULLINT_UNDEFINED
  61. +                                during optimistic descent, and the modify_clock
  62. +                                value for the page that was used for optimistic
  63. +                                insert during pessimistic descent */
  64.  {
  65.     big_rec_t*  dummy_big_rec;
  66.     btr_pcur_t  pcur;
  67. @@ -2739,6 +2755,8 @@ ibuf_insert_low(
  68.     cursor = btr_pcur_get_btr_cur(&pcur);
  69.  
  70.     if (mode == BTR_MODIFY_PREV) {
  71. +       ut_a(*ibuf_page_no == ULINT_UNDEFINED);
  72. +       ut_a(*modify_clock == ULLINT_UNDEFINED);
  73.         err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
  74.                         ibuf_entry, &ins_rec,
  75.                         &dummy_big_rec, 0, thr, &mtr);
  76. @@ -2746,6 +2764,10 @@ ibuf_insert_low(
  77.             /* Update the page max trx id field */
  78.             page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
  79.                            thr_get_trx(thr)->id, &mtr);
  80. +       } else {
  81. +           *ibuf_page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
  82. +           *modify_clock = buf_block_get_modify_clock(
  83. +                   btr_cur_get_block(cursor));
  84.         }
  85.     } else {
  86.         ut_ad(mode == BTR_MODIFY_TREE);
  87. @@ -2757,10 +2779,16 @@ ibuf_insert_low(
  88.  
  89.         root = ibuf_tree_root_get(&mtr);
  90.  
  91. -       err = btr_cur_optimistic_insert(
  92. -               BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
  93. -               cursor, ibuf_entry, &ins_rec,
  94. -               &dummy_big_rec, 0, thr, &mtr);
  95. +       if ((*ibuf_page_no != buf_block_get_page_no(btr_cur_get_block(cursor)))
  96. +           || (*modify_clock != buf_block_get_modify_clock(
  97. +                   btr_cur_get_block(cursor)))) {
  98. +           err = btr_cur_optimistic_insert(
  99. +                   BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
  100. +                   cursor, ibuf_entry, &ins_rec,
  101. +                   &dummy_big_rec, 0, thr, &mtr);
  102. +       } else {
  103. +           err = DB_FAIL;
  104. +       }
  105.  
  106.         if (err == DB_FAIL) {
  107.             err = btr_cur_pessimistic_insert(
  108. @@ -2845,6 +2873,8 @@ ibuf_insert(
  109.  {
  110.     ulint   err;
  111.     ulint   entry_size;
  112. +   ulint   ibuf_page_no = ULINT_UNDEFINED;
  113. +   ullint  modify_clock = ULLINT_UNDEFINED;
  114.  
  115.     ut_a(trx_sys_multiple_tablespace_format);
  116.     ut_ad(dtuple_check_typed(entry));
  117. @@ -2873,10 +2903,12 @@ do_insert:
  118.     }
  119.  
  120.     err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
  121. -                 index, space, zip_size, page_no, thr);
  122. +                 index, space, zip_size, page_no, thr,
  123. +                 &ibuf_page_no, &modify_clock);
  124.     if (err == DB_FAIL) {
  125.         err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
  126. -                     index, space, zip_size, page_no, thr);
  127. +                     index, space, zip_size, page_no, thr,
  128. +                     &ibuf_page_no, &modify_clock);
  129.     }
  130.  
  131.     if (err == DB_SUCCESS) {
  132. diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
  133. index 14ea98c..c0177a4 100644
  134. --- a/storage/innodb_plugin/include/srv0srv.h
  135. +++ b/storage/innodb_plugin/include/srv0srv.h
  136. @@ -1141,6 +1141,9 @@ struct export_var_struct{
  137.     ulint       innodb_drop_purge_skip_row;
  138.     ulint       innodb_drop_ibuf_skip_row;
  139.     ulint       no_undo_slot_free;
  140. +#ifdef UNIV_DEBUG
  141. +   ullint  num_optimistic_insert_calls_in_pessimistic_descent;
  142. +#endif /* UNIV_DEBUG */
  143.  };
  144.  
  145.  /** The server system struct */
  146. diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c
  147. index 43db5c2..95c3953 100644
  148. --- a/storage/innodb_plugin/row/row0ins.c
  149. +++ b/storage/innodb_plugin/row/row0ins.c
  150. @@ -70,6 +70,12 @@ check.
  151.  If you make a change in this module make sure that no codepath is
  152.  introduced where a call to log_free_check() is bypassed. */
  153.  
  154. +#ifdef UNIV_DEBUG
  155. +/** Number of optimistic and pessimistic inserts performed on the
  156. +b-tree of the indexes */
  157. +ullint row_ins_optimistic_insert_calls_in_pessimistic_descent = 0;
  158. +#endif /* UNIV_DEBUG */
  159. +
  160.  /*********************************************************************//**
  161.  Creates an insert node struct.
  162.  @return    own: insert node struct */
  163. @@ -1999,7 +2005,23 @@ row_ins_index_entry_low(
  164.     dict_index_t*   index,  /*!< in: index */
  165.     dtuple_t*   entry,  /*!< in/out: index entry to insert */
  166.     ulint       n_ext,  /*!< in: number of externally stored columns */
  167. -   que_thr_t*  thr)    /*!< in: query thread */
  168. +   que_thr_t*  thr,    /*!< in: query thread */
  169. +   ulint*  page_no,    /*!< *page_no and *modify_clock are used to decide
  170. +                       whether to call btr_cur_optimistic_insert() during
  171. +                       pessimistic descent down the index tree.
  172. +                       in: If this is optimistic descent, then *page_no
  173. +                       must be ULINT_UNDEFINED. If it is pessimistic
  174. +                       descent, *page_no must be the page_no to which an
  175. +                       optimistic insert was attempted last time
  176. +                       row_ins_index_entry_low() was called.
  177. +                       out: If this is the optimistic descent, *page_no is set
  178. +                       to the page_no to which an optimistic insert was
  179. +                       attempted. If it is pessimistic descent, this value is
  180. +                       not changed. */
  181. +   ib_uint64_t*    modify_clock) /*!< in/out: *modify_clock == ULLINT_UNDEFINED
  182. +                                during optimistic descent, and the modify_clock
  183. +                                value for the page that was used for optimistic
  184. +                                insert during pessimistic descent */
  185.  {
  186.     btr_cur_t   cursor;
  187.     ulint       ignore_sec_unique   = 0;
  188. @@ -2185,9 +2207,16 @@ row_ins_index_entry_low(
  189.         }
  190.     } else {
  191.         if (mode == BTR_MODIFY_LEAF) {
  192. +           ut_a(*page_no == ULINT_UNDEFINED);
  193. +           ut_a(*modify_clock == ULLINT_UNDEFINED);
  194.             err = btr_cur_optimistic_insert(
  195.                 0, &cursor, entry, &insert_rec, &big_rec,
  196.                 n_ext, thr, &mtr);
  197. +           if (err != DB_SUCCESS) {
  198. +               *page_no = buf_block_get_page_no(btr_cur_get_block(&cursor));
  199. +               *modify_clock = buf_block_get_modify_clock(
  200. +                       btr_cur_get_block(&cursor));
  201. +           }
  202.         } else {
  203.             ut_a(mode == BTR_MODIFY_TREE);
  204.             if (buf_LRU_buf_pool_running_out()) {
  205. @@ -2197,9 +2226,16 @@ row_ins_index_entry_low(
  206.                 goto function_exit;
  207.             }
  208.  
  209. -           err = btr_cur_optimistic_insert(
  210. -               0, &cursor, entry, &insert_rec, &big_rec,
  211. -               n_ext, thr, &mtr);
  212. +           if ((*page_no != buf_block_get_page_no(btr_cur_get_block(&cursor)))
  213. +               || (*modify_clock != buf_block_get_modify_clock(
  214. +                       btr_cur_get_block(&cursor)))) {
  215. +               ut_d(++row_ins_optimistic_insert_calls_in_pessimistic_descent);
  216. +               err = btr_cur_optimistic_insert(
  217. +                       0, &cursor, entry, &insert_rec, &big_rec,
  218. +                       n_ext, thr, &mtr);
  219. +           } else {
  220. +               err = DB_FAIL;
  221. +           }
  222.  
  223.             if (err == DB_FAIL) {
  224.                 err = btr_cur_pessimistic_insert(
  225. @@ -2290,6 +2326,8 @@ row_ins_index_entry(
  226.     que_thr_t*  thr)    /*!< in: query thread */
  227.  {
  228.     ulint   err;
  229. +   ulint   page_no = ULINT_UNDEFINED;
  230. +   ullint  modify_clock = ULLINT_UNDEFINED;
  231.  
  232.     if (foreign && UT_LIST_GET_FIRST(index->table->foreign_list)) {
  233.         err = row_ins_check_foreign_constraints(index->table, index,
  234. @@ -2303,7 +2341,7 @@ row_ins_index_entry(
  235.     /* Try first optimistic descent to the B-tree */
  236.  
  237.     err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
  238. -                     n_ext, thr);
  239. +                     n_ext, thr, &page_no, &modify_clock);
  240.     if (err != DB_FAIL) {
  241.  
  242.         return(err);
  243. @@ -2312,7 +2350,7 @@ row_ins_index_entry(
  244.     /* Try then pessimistic descent to the B-tree */
  245.  
  246.     err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
  247. -                     n_ext, thr);
  248. +                     n_ext, thr, &page_no, &modify_clock);
  249.     return(err);
  250.  }
  251.  
  252. diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
  253. index 15c18a8..35199f7 100644
  254. --- a/storage/innodb_plugin/srv/srv0srv.c
  255. +++ b/storage/innodb_plugin/srv/srv0srv.c
  256. @@ -2304,6 +2304,10 @@ export_zip(
  257.     *decompressed_secondary_usec = zip_stat->decompressed_secondary_usec;
  258.  }
  259.  
  260. +#ifdef UNIV_DEBUG
  261. +extern ullint row_ins_optimistic_insert_calls_in_pessimistic_descent;
  262. +#endif /* UNIV_DEBUG */
  263. +
  264.  /******************************************************************//**
  265.  Function to pass InnoDB status variables to MySQL */
  266.  UNIV_INTERN
  267. @@ -2757,6 +2761,10 @@ srv_export_innodb_status(void)
  268.  
  269.     export_vars.innodb_drop_purge_skip_row = srv_drop_purge_skip_row;
  270.     export_vars.innodb_drop_ibuf_skip_row = srv_drop_ibuf_skip_row;
  271. +#ifdef UNIV_DEBUG
  272. +   export_vars.num_optimistic_insert_calls_in_pessimistic_descent =
  273. +       row_ins_optimistic_insert_calls_in_pessimistic_descent;
  274. +#endif /* UNIV_DEBUG */
  275.  
  276.     mutex_exit(&srv_innodb_monitor_mutex);
  277.  }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement