Advertisement
Guest User

Attempt at restoring xnack support in ROCR-Runtime and AMDGPU

a guest
Jul 20th, 2024
20
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.83 KB | Source Code | 0 0
  1. --- a/src/core/runtime/isa.cpp 2024-07-19 23:55:42.477579901 -0000
  2. +++ b/src/core/runtime/isa.cpp 2024-07-19 23:55:42.477579903 -0000
  3. @@ -142,3 +142,4 @@ bool Isa::GetInfo(const hsa_isa_info_t &
  4. if (this->GetVersion() == Version(7, 0, 0) ||
  5. - this->GetVersion() == Version(8, 0, 1)) {
  6. + this->GetVersion() == Version(8, 0, 1) ||
  7. + this->GetVersion() == Version(8, 0, 3)) {
  8. profiles[1] = true;
  9. @@ -252,3 +253,5 @@ constexpr size_t hsa_name_size = 63;
  10. ISAREG_ENTRY_GEN("gfx802", 8, 0, 2, unsupported, unsupported, 64)
  11. - ISAREG_ENTRY_GEN("gfx803", 8, 0, 3, unsupported, unsupported, 64)
  12. + ISAREG_ENTRY_GEN("gfx803", 8, 0, 3, unsupported, any, 64)
  13. + ISAREG_ENTRY_GEN("gfx803:xnack-", 8, 0, 3, unsupported, disabled, 64)
  14. + ISAREG_ENTRY_GEN("gfx803:xnack+", 8, 0, 3, unsupported, enabled, 64)
  15. ISAREG_ENTRY_GEN("gfx805", 8, 0, 5, unsupported, unsupported, 64)
  16. @@ -348,3 +351,5 @@ constexpr size_t hsa_name_size = 63;
  17. ISAREG_ENTRY_GEN("gfx1102", 11, 0, 2, unsupported, unsupported, 32)
  18. - ISAREG_ENTRY_GEN("gfx1103", 11, 0, 3, unsupported, unsupported, 32)
  19. + ISAREG_ENTRY_GEN("gfx1103", 11, 0, 3, unsupported, any, 32)
  20. + ISAREG_ENTRY_GEN("gfx1103:xnack-", 11, 0, 3, unsupported, disabled, 32)
  21. + ISAREG_ENTRY_GEN("gfx1103:xnack+", 11, 0, 3, unsupported, enabled, 32)
  22. ISAREG_ENTRY_GEN("gfx1150", 11, 5, 0, unsupported, unsupported, 32)
  23. --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 2024-07-19 23:28:10.520865455 -0000
  24. +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c 2024-07-19 23:28:10.520865456 -0000
  25. @@ -58,8 +58,8 @@ static struct workqueue_struct *kfd_proc
  26.  
  27. -/* Ordered, single-threaded workqueue for restoring evicted
  28. - * processes. Restoring multiple processes concurrently under memory
  29. - * pressure can lead to processes blocking each other from validating
  30. - * their BOs and result in a live-lock situation where processes
  31. - * remain evicted indefinitely.
  32. - */
  33. + /* Ordered, single-threaded workqueue for restoring evicted
  34. + * processes. Restoring multiple processes concurrently under memory
  35. + * pressure can lead to processes blocking each other from validating
  36. + * their BOs and result in a live-lock situation where processes
  37. + * remain evicted indefinitely.
  38. + */
  39. static struct workqueue_struct *kfd_restore_wq;
  40. @@ -1420,8 +1420,2 @@ bool kfd_process_xnack_mode(struct kfd_p
  41.  
  42. - /* Only consider GFXv9 and higher GPUs. Older GPUs don't
  43. - * support the SVM APIs and don't need to be considered
  44. - * for the XNACK mode selection.
  45. - */
  46. - if (!KFD_IS_SOC15(dev))
  47. - continue;
  48. /* Aldebaran can always support XNACK because it can support
  49. @@ -1438,12 +1432,17 @@ bool kfd_process_xnack_mode(struct kfd_p
  50.  
  51. - /* GFXv10 and later GPUs do not support shader preemption
  52. - * during page faults. This can lead to poor QoS for queue
  53. - * management and memory-manager-related preemptions or
  54. - * even deadlocks.
  55. - */
  56. - if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
  57. - return false;
  58. -
  59. - if (dev->kfd->noretry)
  60. + if (KFD_GC_VERSION(dev) >= IP_VERSION(8, 0, 0)) {
  61. + pr_warn("AMD GPUs do not properly support shader preemption");
  62. + pr_warn(" during page faults.\n");
  63. + pr_warn("This may lead to poor QoS for queue management or");
  64. + pr_warn(" memory-manager-related preemptions and deadlocks.\n");
  65. + }
  66. +
  67. + if (dev->kfd->noretry) {
  68. + pr_err("Xnack requires noretry be disabled!\n");
  69. + pr_err("Via kernel cmdline: amdgpu.noretry=0\n");
  70. + pr_err("During module loading: printf 'options amdgpu noretry=0'");
  71. + pr_err(" >>/etc/modprobe.d/amdgpu.conf\n");
  72. + pr_err("Manually: sudo printf 0 >/sys/module/amdgpu/parameters/noretry\n");
  73. return false;
  74. + }
  75. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement