Advertisement
kopyl

Untitled

Dec 9th, 2023 (edited)
781
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.14 KB | None | 0 0
  1. WAIT_TIME = 1
  2. NAME = 'icons-train'
  3.  
  4. IMAGE_NAME = f'runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04'
  5. GPU_TYPE_ID = 'NVIDIA A100 80GB PCIe'
  6. OS_DISK_SIZE_GB = 100
  7. PERSISTENT_DISK_SIZE_GB = 0
  8. CLOUD_TYPE = 'SECURE'
  9. MIN_DOWNLOAD_SPEED = 700
  10. DATA_CENTER_ID = 'EU-RO-1'
  11. NETWORK_VOLUME_ID = '54epb6rtc4'
  12. TEMPLATE_ID = 'ue50iblx66'
  13. CUDA_VERSION = '12.2'
  14. GPU_COUNT = 8
  15.  
  16. ERRORS = {
  17.     "specs": (
  18.         "There are no longer any instances available with "
  19.         "the requested specifications. Please refresh and try again."
  20.     ),
  21.     "disk": (
  22.         "There are no longer any instances available with "
  23.         "enough disk space."
  24.     )
  25. }
  26.  
  27.  
  28. def create_pod(api, bar):
  29.     bar.update(1)
  30.     pod_config = f"""
  31.        cudaVersion: "{CUDA_VERSION}",
  32.        templateId: "{TEMPLATE_ID}",
  33.        networkVolumeId: "{NETWORK_VOLUME_ID}",
  34.        dataCenterId: "{DATA_CENTER_ID}",
  35.        minDownload: {MIN_DOWNLOAD_SPEED},
  36.        gpuCount: {GPU_COUNT},
  37.        volumeInGb: {PERSISTENT_DISK_SIZE_GB},
  38.        containerDiskInGb: {OS_DISK_SIZE_GB},
  39.        gpuTypeId: "{GPU_TYPE_ID}",
  40.        cloudType: {CLOUD_TYPE},
  41.        supportPublicIp: true,
  42.        name: "{NAME}",
  43.        dockerArgs: "",
  44.        volumeMountPath: "/workspace",
  45.        imageName: "{IMAGE_NAME}",
  46.        startJupyter: true,
  47.        startSsh: true,
  48.    """
  49.  
  50.     response = api.create_on_demand_pod(pod_config)
  51.     resp_json = response.json()
  52.  
  53.     if response.status_code == 200:
  54.         if 'errors' in resp_json:
  55.  
  56.             for error in resp_json['errors']:
  57.                 if error['message'] == ERRORS['specs']:
  58.                     time.sleep(WAIT_TIME)
  59.                     create_pod(api, bar)
  60.                 elif error['message'] == ERRORS['disk']:
  61.                     print(error)
  62.                     print('No instances with enough disk space available, sleeping for 5 seconds')
  63.                     time.sleep(WAIT_TIME)
  64.                     create_pod(api, bar)
  65.                 else:
  66.                     print('ERROR: ' + error['message'])
  67.         else:
  68.             return
  69.  
  70.  
  71. bar = tqdm()
  72.  
  73. api = API("XXX")
  74. res = create_pod(api, bar)
  75. res
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement