Advertisement
marcoverl

os-c7-torque.radl

Oct 6th, 2016
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.69 KB | None | 0 0
  1. network private ( provider_id = 'GRID-CERTIFICATION-wan' )
  2. network public ( outports = '15002/udp,15001/tcp,15004/tcp,15003/udp,15001/udp,1023/tcp,1023/udp,8899/tcp,15004/udp,15002/tcp,15003/
  3. tcp' and outbound = 'yes' and provider_id = 'GRID-CERTIFICATION-wan' and pool_name = 'Ext' )
  4.  
  5. contextualize (
  6. system front configure front step 1
  7. system wn configure wn step 2
  8. )
  9.  
  10. deploy front 1 ost
  11. deploy wn @input.NumNodes@ ost
  12.  
  13. system wn (
  14. disk.0.os.credentials.username = 'centos' and
  15. instance_type = 'cldareapd.medium' and
  16. net_interface.0.connection = 'private' and
  17. disk.0.os.name = 'linux' and
  18. #Image for CentOS 7
  19. disk.0.image.url = 'ost://cloud-areapd.pd.infn.it/7544a870-90a6-4409-9a7d-5b376c278688'
  20. )
  21.  
  22. system front (
  23. disk.0.os.credentials.username = 'centos' and
  24. queue_system = 'torque' and
  25. instance_type = 'cldareapd.small' and
  26. net_interface.0.connection = 'public' and
  27. net_interface.1.connection = 'private' and
  28. net_interface.1.dns_name = 'torqueserver.localdomain' and
  29. disk.0.os.name = 'linux' and
  30. #Image for CentOS 7
  31. disk.0.image.url = 'ost://cloud-areapd.pd.infn.it/7544a870-90a6-4409-9a7d-5b376c278688'
  32. )
  33.  
  34. configure wn (
  35. @begin
  36.  
  37. - tasks:
  38. - ignore_errors: true
  39. command: service firewalld stop
  40. - include_vars: '{{item}}'
  41. with_first_found:
  42. - '{{ ansible_os_family }}.yml'
  43. - RedHat.yml
  44. - name: Create User {{item.name}}
  45. user: name={{item.name}} password={{item.password}} shell=/bin/bash
  46. with_items: USERS
  47. - authorized_key: user={{item.name}} key="{{ lookup('file', '/tmp/' + item.name + '_id_rsa.pub') }}"
  48. name: Add the authorized_key to the user {{item.name}}
  49. with_items: USERS
  50. - include: sudo_copy.yml src=/home/{{item.name}}/.ssh/id_rsa.pub dest=/home/{{item.name}}/.ssh/id_rsa.pub
  51. owner={{item.name}} group={{item.name}} mode=0644 loop={{USERS}}
  52. name: Copy the id_rsa.pub file to the user
  53. - include: sudo_copy.yml src=/home/{{item.name}}/.ssh/id_rsa dest=/home/{{item.name}}/.ssh/id_rsa
  54. owner={{item.name}} group={{item.name}} mode=0600 loop={{USERS}}
  55. name: Copy the id_rsa file to the user
  56. - template: src=utils/templates/ssh_known_hosts.conf dest=/etc/ssh/ssh_known_hosts
  57. - apt: name=torque-mom,torque-client update_cache=yes cache_valid_time=3600
  58. name: Apt install torque mom
  59. when: ansible_os_family == "Debian"
  60. - name: create epel.repo
  61. template: src=utils/templates/epel-es.repo dest=/etc/yum.repos.d/epel.repo
  62. when: ansible_os_family == "RedHat"
  63. - name: Yum install Torque in REL system
  64. when: ansible_os_family == "RedHat"
  65. yum: name=torque-mom,torque-client,openssh-clients
  66. - include: munge_repo_wn.yml
  67. when: ansible_os_family == "RedHat"
  68. - copy: content=torqueserver.localdomain dest=/etc/torque/server_name
  69. name: Set the Torque server name
  70. - copy:
  71. content: '$pbsserver torqueserver.localdomain
  72.  
  73. $logevent 255
  74.  
  75. '
  76. dest: '{{TORQUE_PATH}}/mom_priv/torque.cfg'
  77. group: root
  78. mode: 0644
  79. owner: root
  80. name: Create the mom_priv/torque.cfg file
  81. - copy:
  82. content: '$clienthost torqueserver.localdomain
  83.  
  84. $max_conn_timeout_micro_sec 10000
  85.  
  86. $mom_host {{IM_NODE_HOSTNAME}}
  87.  
  88. '
  89. dest: '{{TORQUE_PATH}}/mom_priv/config'
  90. group: root
  91. mode: 0644
  92. owner: root
  93. name: Create the mom_priv/config file
  94. - copy:
  95. content: 'nodes=0
  96.  
  97. '
  98. dest: '{{TORQUE_PATH}}/mom_priv/mom.layout'
  99. group: root
  100. mode: 0644
  101. owner: root
  102. name: Create the mom_priv/mom.layout file
  103. - command: /usr/sbin/trqauthd creates=/tmp/trqauthd-unix
  104. - shell: sleep 5
  105. - service: name={{MOM_SERVICE}} state=started pattern=/usr/sbin/pbs_mom
  106. - shell: sleep 5
  107. - shell: kill -9 $(pgrep trqauthd)
  108. - service: name={{AUTH_SERVICE}} state=started pattern=/usr/sbin/trqauthd
  109. - service: name={{MOM_SERVICE}} state=restarted pattern=/usr/sbin/pbs_mom
  110. vars:
  111. USERS:
  112. - name: user1
  113. password: $6$Ehg4GHQT5y$6ZCTLffp.epiNEhS1M3ZB.P6Kii1wELySe/DCwUInGt8r7zgdAHfHw66DuPwpS6pfOiZ9PS/KaTiBKjoCn23t0
  114. - name: dteam001
  115. password: $6$Ehg4GHQT5y$6ZCTLffp.epiNEhS1M3ZB.P6Kii1wELySe/DCwUInGt8r7zgdAHfHw66DuPwpS6pfOiZ9PS/KaTiBKjoCn23t0
  116. @end
  117. )
  118.  
  119. configure front (
  120. @begin
  121.  
  122. - tasks:
  123. - ignore_errors: true
  124. command: service firewalld stop
  125. - include_vars: '{{item}}'
  126. with_first_found:
  127. - '{{ ansible_os_family }}.yml'
  128. - RedHat.yml
  129. - command: hostname torqueserver.localdomain
  130. - name: create epel.repo
  131. template: src=utils/templates/epel-es.repo dest=/etc/yum.repos.d/epel.repo
  132. when: ansible_os_family == "RedHat"
  133. - user: name={{item.name}} password={{item.password}} generate_ssh_key=yes shell=/bin/bash
  134. with_items: USERS
  135. - local_action: command cp /home/{{item.name}}/.ssh/id_rsa.pub /tmp/{{item.name}}_id_rsa.pub
  136. creates=/tmp/{{item.name}}_id_rsa.pub
  137. with_items: USERS
  138. - authorized_key: user={{item.name}} key="{{ lookup('file', '/tmp/' + item.name + '_id_rsa.pub') }}"
  139. name: Add the authorized_key to the user {{item.name}}
  140. with_items: USERS
  141. - template: src=utils/templates/ssh_known_hosts.conf dest=/etc/ssh/ssh_known_hosts
  142. - apt: name=torque-server,torque-client,g++,libtorque2-dev,make update_cache=yes
  143. cache_valid_time=3600
  144. name: Apt install Torque in Deb system
  145. when: ansible_os_family == "Debian"
  146. - name: Yum install Torque in REL system
  147. when: ansible_os_family == "RedHat"
  148. yum: name=torque-server,torque-scheduler,torque-client,openssh-clients,gcc-c++,torque-devel,make state=present
  149. - include: munge_repo_front.yml
  150. when: ansible_os_family == "RedHat"
  151. - copy: dest=/etc/torque/server_name content=torqueserver.localdomain group=root mode=0644 owner=root
  152. - copy:
  153. content: '{% for number in range(0, NNODES|int + 1) %}
  154.  
  155. vnode-{{number}} np=2 num_node_boards=1
  156.  
  157. {% endfor %}
  158.  
  159. '
  160. dest: /etc/torque/nodes
  161. group: root
  162. mode: 0644
  163. owner: root
  164. name: create /etc/torque/nodes file
  165. - file: src=/etc/torque/nodes dest='{{TORQUE_PATH}}/server_priv/nodes' owner=root group=root state=link
  166. name: create symlink
  167. - command: /usr/sbin/trqauthd creates=/tmp/trqauthd-unix
  168. name: starting trqauthd
  169. - shell: sleep 5
  170. - command: pbs_server -t create -f creates='{{TORQUE_PATH}}/server_priv/server.lock'
  171. name: first start of pbs_server
  172. - shell: sleep 5
  173. - shell: echo "set server operators += root@torqueserver.localdomain" | qmgr
  174. - shell: echo "set server managers += root@torqueserver.localdomain" | qmgr
  175. - command: qmgr -c 'create queue batch'
  176. - command: qmgr -c 'set queue batch queue_type = Execution'
  177. - command: qmgr -c 'set queue batch resources_default.nodes = 1'
  178. - command: qmgr -c 'set queue batch enabled = True'
  179. - command: qmgr -c 'set queue batch started = True'
  180. - command: qmgr -c 'set server default_queue = batch'
  181. - command: qmgr -c 'set server scheduling = True'
  182. - command: qmgr -c 'set server scheduler_iteration = 20'
  183. - command: qmgr -c 'set server node_check_rate = 40'
  184. - command: qmgr -c 'set server resources_default.neednodes = 1'
  185. - command: qmgr -c 'set server resources_default.nodect = 1'
  186. - command: qmgr -c 'set server resources_default.nodes = 1'
  187. - command: qmgr -c 'set server query_other_jobs = True'
  188. - command: qmgr -c 'set server node_pack = False'
  189. - command: qmgr -c 'set server job_stat_rate = 30'
  190. - command: qmgr -c 'set server mom_job_sync = True'
  191. - command: qmgr -c 'set server authorized_users = *@torqueserver.localdomain'
  192. - command: qmgr -c 'set server poll_jobs = True'
  193. name: qmgr configuration executed
  194. - shell: pgrep pbs_server && qterm && sleep 5
  195. name: pbs_server killed
  196. - service: name={{TORQUE_SERVICE}} state=started pattern=/usr/sbin/pbs_server enabled=yes
  197. vars:
  198. USERS:
  199. - name: user1
  200. password: $6$Ehg4GHQT5y$6ZCTLffp.epiNEhS1M3ZB.P6Kii1wELySe/DCwUInGt8r7zgdAHfHw66DuPwpS6pfOiZ9PS/KaTiBKjoCn23t0
  201. - name: dteam001
  202. password: $6$Ehg4GHQT5y$6ZCTLffp.epiNEhS1M3ZB.P6Kii1wELySe/DCwUInGt8r7zgdAHfHw66DuPwpS6pfOiZ9PS/KaTiBKjoCn23t0
  203. NNODES: '@input.NumNodes@'
  204. - tasks:
  205. - apt: name=libtorque2-dev,subversion
  206. when: ansible_os_family == "Debian"
  207. - when: ansible_os_family == "RedHat"
  208. yum: name=torque-devel,subversion
  209. - subversion: repo=svn://opensvn.adaptivecomputing.com/maui/branches/3.3.1 dest=/tmp/maui
  210. - command: ./configure --prefix=/usr/local/maui chdir=/tmp/maui/ creates=/tmp/maui/Makefile
  211. - command: make chdir=/tmp/maui creates=chdir=/tmp/maui/bin/maui
  212. - command: make install chdir=/tmp/maui creates=/usr/local/maui/maui.cfg
  213. - lineinfile: dest=/usr/local/maui/maui.cfg regexp=DEFERTIME line='DEFERTIME 0'
  214. - lineinfile: dest=/usr/local/maui/maui.cfg regexp=DEFERCOUNT line='DEFERCOUNT 99999'
  215. - lineinfile: dest=/usr/local/maui/maui.cfg regexp='LOGFILE ' line='LOGFILE /var/log/torque/sched_logs/maui.log'
  216. - copy:
  217. content: export PATH="$PATH:/usr/local/maui/bin"
  218. dest: /etc/profile.d/maui.sh
  219. name: add maui commands to the bash
  220. - copy:
  221. content: '[Unit]
  222.  
  223. Description=maui-sched
  224.  
  225. After=syslog.target network.target trqauthd.service
  226.  
  227. [Service]
  228.  
  229. EnvironmentFile=/usr/local/maui/maui.cfg
  230.  
  231. Type=forking
  232.  
  233. ExecStart=/usr/local/maui/sbin/maui --configfile=/usr/local/maui/maui.cfg
  234.  
  235. [Install]
  236.  
  237. WantedBy=multi-user.target
  238.  
  239. '
  240. dest: /usr/lib/systemd/system/maui.service
  241. group: root
  242. mode: 0644
  243. owner: root
  244. name: create maui.service file
  245. - shell: sleep 2
  246. - service: name={{MAUI_SERVICE}} state=started pattern=/usr/local/maui/sbin/maui
  247. - file: src=/etc/torque/nodes dest='{{TORQUE_PATH}}/server_priv/nodes' owner=root group=root state=link
  248. name: create symlink
  249. - service: name={{TORQUE_SERVICE}} state=restarted pattern=/usr/sbin/pbs_server
  250. - copy:
  251. content: '#!/bin/sh
  252.  
  253. # es. qsub -l nodes=2 test.job
  254.  
  255. #PBS -S /bin/sh
  256.  
  257. #PBS -N ExampleJob
  258.  
  259. #PBS -l walltime=00:01:00
  260.  
  261. #PBS -q batch
  262.  
  263. #PBS -o risultato
  264.  
  265. #PBS -e errori
  266.  
  267. DATE=`date`
  268.  
  269. echo "$DATE"
  270.  
  271. sleep 5
  272.  
  273. echo "Ci sono una serie di cose interessanti che ti interessera sapere"
  274.  
  275. echo "Questo job stato identificato come $PBS_JOBID e si chiama $PBS_JOBNAME"
  276.  
  277. echo "e stato inserito inizialmente nella coda $PBS_O_QUEUE"
  278.  
  279. echo "ed e stato eseguito sulla coda $PBS_QUEUE"
  280.  
  281. echo "E stato sottoposto dalla macchina: $PBS_O_HOST"
  282.  
  283. echo "E stato eseguito sulla macchina: `hostname`"
  284.  
  285. date
  286.  
  287. echo ""
  288.  
  289. '
  290. dest: '/home/dteam001/test.job'
  291. mode: 0755
  292. owner: dteam001
  293. name: Create the test script
  294. @end
  295. )
  296.  
  297. configure sudo_copy (
  298. @begin
  299.  
  300. - local_action: command sudo cp {{src}} /tmp/._sudo_cp_{{ src | basename }}
  301. with_items: loop|default([0])
  302. - local_action: command sudo chmod +r /tmp/._sudo_cp_{{ src | basename }}
  303. with_items: loop|default([0])
  304. - copy:
  305. dest: '{{ dest }}'
  306. group: '{{ group | default(None) }}'
  307. mode: '{{ mode | default(None) }}'
  308. owner: '{{ owner | default(None) }}'
  309. src: /tmp/._sudo_cp_{{ src | basename }}
  310. with_items: loop|default([0])
  311.  
  312. @end
  313. )
  314.  
  315. configure munge_repo_front (
  316. @begin
  317.  
  318. - apt: name=munge update_cache=yes cache_valid_time=3600
  319. when: ansible_os_family == "Debian"
  320. - when: ansible_os_family == "RedHat"
  321. yum: name=munge
  322. - shell: echo -n "foo" | sha512sum | cut -d' ' -f1 >/etc/munge/munge.key
  323. - shell: chown munge.munge /etc/munge/munge.key
  324. - shell: chmod 400 /etc/munge/munge.key
  325. - file: path=/var/log/munge state=directory mode=0700
  326. - file: path=/var/log state=directory mode=0755
  327. - service: name=munge state=restarted
  328.  
  329. @end
  330. )
  331.  
  332. configure munge_repo_wn (
  333. @begin
  334.  
  335. - apt: name=munge update_cache=yes cache_valid_time=3600
  336. when: ansible_os_family == "Debian"
  337. - when: ansible_os_family == "RedHat"
  338. yum: name=munge
  339. - shell: echo -n "foo" | sha512sum | cut -d' ' -f1 >/etc/munge/munge.key
  340. - shell: chown munge.munge /etc/munge/munge.key
  341. - shell: chmod 400 /etc/munge/munge.key
  342. - file: path=/var/log/munge state=directory mode=0700
  343. - file: path=/var/log state=directory mode=0755
  344. - service: name=munge state=restarted
  345.  
  346.  
  347. @end
  348. )
  349.  
  350. configure Debian (
  351. @begin
  352.  
  353. MOM_SERVICE: torque-mom
  354. SCHED_SERVICE: torque-scheduler
  355. TORQUE_PATH: /var/spool/torque
  356. TORQUE_SERVICE: torque-server
  357. AUTH_SERVICE: trqauthd
  358. FIREWALL_SERVICE: iptables
  359.  
  360. @end
  361. )
  362.  
  363. configure RedHat (
  364. @begin
  365.  
  366. MOM_SERVICE: pbs_mom
  367. SCHED_SERVICE: pbs_sched
  368. TORQUE_PATH: /var/lib/torque
  369. TORQUE_SERVICE: pbs_server
  370. AUTH_SERVICE: trqauthd
  371. MAUI_SERVICE: maui
  372. FIREWALL_SERVICE: firewalld
  373.  
  374. @end
  375. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement