-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathprovision_headnode.yml
235 lines (203 loc) · 5.64 KB
/
provision_headnode.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
---
- hosts: localhost
gather_facts: false
vars_files:
- ./vars/main.yml
tasks:
- import_tasks: tasks/add_headnode_inventory.yml
- hosts: headnode
become: yes
vars_files:
- ./vars/main.yml
roles:
- role: geerlingguy.nfs
vars:
nfs_exports:
- "/home {{ cluster_network_cidr }}(rw,sync,no_root_squash)"
- "/opt/ohpc/pub {{ cluster_network_cidr }}(rw,sync,no_root_squash)"
when: not install_intel_oneapi
- role: geerlingguy.nfs
vars:
nfs_exports:
- "/home {{ cluster_network_cidr }}(rw,sync,no_root_squash)"
- "/opt/ohpc/pub {{ cluster_network_cidr }}(rw,sync,no_root_squash)"
- "/opt/intel {{ cluster_network_cidr }}(ro,no_root_squash)"
when: install_intel_oneapi
- role: geerlingguy.ntp
vars:
ntp_daemon: chronyd
ntp_timezone: "America/New_York"
ntp_enabled: true
ntp_config_file: /etc/chrony.conf
ntp_manage_config: true
ntp_servers:
- "ntp0.cac.cornell.edu iburst"
- "ntp1.cac.cornell.edu iburst"
- "ntp2.cac.cornell.edu iburst"
ntp_cron_handler_enabled: true
handlers:
- name: restart slurmdbd
service:
name: slurmdbd
state: restarted
enabled: true
- name: restart slurmctld
service:
name: slurmctld
state: restarted
enabled: true
- name: restart rsyslog
service:
name: rsyslog
state: restarted
enabled: true
- name: restart mariadb
service:
name: mariadb
state: restarted
enabled: true
tasks:
- import_tasks: tasks/install_headnode_packages.yml
- import_tasks: tasks/install_openstack_client.yml
# - import_tasks: tasks/install_headnode_packages.yml
- import_tasks: tasks/install_oneapi.yml
when: install_intel_oneapi
- name: Increase memlock
blockinfile:
path: /etc/security/limits.conf
insertbefore: '# End of file'
block: |
* soft memlock unlimited
* hard memlock unlimited
- name: Make sure firewall is running
service:
name: firewalld
state: started
enabled: true
- name: Allow incoming traffic from cluster network
ansible.posix.firewalld:
source: "{{ cluster_network_cidr }}"
zone: trusted
state: enabled
permanent: true
immediate: true
- name: Upload cluster-env script and profiles
copy:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
owner: root
group: root
mode: '0755'
with_items:
- { src: cluster-env.sh, dest: /etc/profile.d/cluster-env.sh }
- { src: cluster-env.csh, dest: /etc/profile.d/cluster-env.csh }
- { src: cluster-env, dest: /usr/bin/cluster-env }
- name: Create /var/log/slurm
file:
path: /var/log/slurm
owner: slurm
group: slurm
mode: 0700
state: directory
- name: Copy openrc file
template:
src: openrc.sh.j2
dest: /etc/slurm/openrc.sh
owner: slurm
group: slurm
mode: 0600
- name: Install MariaDB
dnf:
name:
- mariadb-server
state: present
- name: Start MariaDB
service:
name: mariadb
state: started
enabled: true
- name: Install PyMySQL
package:
name: python3-PyMySQL
state: present
- name: Copy .my.cnf
copy:
src: files/my.cnf
dest: /root/.my.cnf
owner: root
group: root
mode: '0644'
- name: Create slurm_acct_db MySQL database
community.mysql.mysql_db:
name: slurm_acct_db
state: present
- name: Create slurm user in MySQL database
community.mysql.mysql_user:
name: "{{ mysql_user }}"
host: localhost
password: "{{ mysql_password }}"
priv:
'slurm_acct_db.*': 'ALL,GRANT'
state: present
- name: Configure slurmdbd
template:
src: slurmdbd.conf.j2
dest: /etc/slurm/slurmdbd.conf
owner: slurm
group: slurm
mode: 0600
notify: restart slurmdbd
- name: Copy slurm.conf
template:
src: slurm.conf.j2
dest: /etc/slurm/slurm.conf
owner: root
group: root
mode: 0644
notify: restart slurmctld
- name: Copy slurm_resume.sh
template:
src: slurm_resume.sh.j2
dest: /usr/local/sbin/slurm_resume.sh
owner: root
group: root
mode: 0755
- name: Copy slurm_suspend.sh
template:
src: slurm_suspend.sh.j2
dest: /usr/local/sbin/slurm_suspend.sh
owner: root
group: root
mode: 0755
- name: Increase the number of munge daemons to 10
copy:
dest: /etc/sysconfig/munge
owner: root
group: root
mode: 0644
content: |
DAEMON_ARGS="--key-file /etc/munge/munge.key --num-threads 10"
- name: Start slurmctld, slurmdbd, and munge
service:
name: "{{ item }}"
state: started
enabled: true
with_items:
- munge
- slurmdbd
- slurmctld
- name: Gather munge.key
fetch:
src: /etc/munge/munge.key
dest: ./files/munge.key
flat: yes
- name: Configure rsyslog to accept syslog from compute nodes
blockinfile:
path: /etc/rsyslog.d/ohpc.conf
insertbefore: EOF
create: true
state: present
block: |
module(load="imudp")
input(type="imudp" port="514")
notify: restart rsyslog