Script to automate VM deactivation
[invirt/scripts/outage.git] / invirt-deactivate
1 #!/usr/bin/env python3
2
3 # Sample invocation: PYTHONPATH=build/lib/ kdo quentin/root python3.8 ~/Documents/MIT/SIPB/XVM/invirt-deactivate --uri postgresql://postgres:@localhost:1235/invirt --remote xvm-remote.mit.edu --force 2>&1 | tee shutdown-20201020.log
4
5 import argparse
6
7 from invirt import database
8 from invirt.database import record, models
9 import hesiod
10 import time
11 from sqlalchemy import func
12 from sqlalchemy import *
13 from sqlalchemy import orm
14 from sqlalchemy.orm import create_session, relation
15 from subprocess import check_call, check_output
16 import yaml
17
18 lockers_table = Table(
19     'lockers', models.meta,
20     Column('name', String, nullable=False, primary_key=True),
21     Column('type', Enum("AFS", "ERR", name='locker_type')),
22     Column('message', String),
23 )
24
25 class Locker(record.Record):
26     _identity_field = 'name'
27
28 models.mapper(Locker, lockers_table)
29
30 def print_list(l):
31     for i in sorted(l, key=lambda i: str(i)):
32         if not hasattr(i, '__iter__') or isinstance(i, (str, bytes)):
33             i = (i,)
34         print("\t".join(str(x) for x in i))
35
36 def main():
37     parser = argparse.ArgumentParser(description='Set memory, disk, and VM quotas')
38
39     parser.add_argument('-u', '--uri', type=str, dest='uri',
40                         help='Database URI (e.g. postgresql://postgres:@localhost:1234/invirt)')
41     parser.add_argument('-r', '--remote-host', type=str, dest='remote',
42                         default='xvm-remote-dev.mit.edu',
43                         help='Remote host')
44     parser.add_argument('-f', '--force', action='store_true', dest='force',
45                         help='Shut VMs down')
46
47     args = parser.parse_args()
48
49     database.connect(args.uri)
50     database.session.begin()
51
52     lockers_table.create(checkfirst=True)
53
54     owners = database.session.query(database.Machine.owner).distinct()
55     for o, in owners:
56         l = Locker.query.get(o)
57         if not l:
58             l = Locker(name=o)
59         try:
60             fs = hesiod.FilsysLookup(o).filsys
61             l.type=fs[0]['type']
62             l.message=fs[0].get('message')
63         except FileNotFoundError:
64             l.type=None
65             l.message=None
66         except:
67             print("Error looking up", o)
68             raise
69     database.session.commit()
70
71     database.session.begin()
72     machines_no_access = database.session.query(database.Machine).filter(~database.Machine.acl.any()).all()
73
74     machines_no_locker = database.session.query(database.Machine).join(Locker, database.Machine.owner == Locker.name).filter(Locker.type == None).all()
75     machines_err_locker = database.session.query(database.Machine, Locker.message).join(Locker, database.Machine.owner == Locker.name).filter(Locker.type == 'ERR').all()
76
77     print("Machines with no admin users:\n")
78     print_list(machines_no_access)
79     print("\nMachines with missing locker:\n")
80     print_list(machines_no_locker)
81     print("\nMachines with ERR locker:\n")
82     print_list(machines_err_locker)
83
84     listvms = yaml.safe_load(check_output(
85         ['remctl', args.remote, 'web', 'listvms'],
86     ))
87
88     machines_running = set(listvms)
89     machines_broken_locker = set(machines_no_locker) | set(m for (m,msg) in machines_err_locker)
90     machines_deactivate = set(machines_no_access) & machines_broken_locker
91     print("\nMachines with no admin users AND broken locker:\n")
92     print_list(machines_deactivate)
93     machines_to_shut_down = database.Machine.query.filter(database.Machine.name.in_(machines_running & set(m.name for m in machines_deactivate))).all()
94     print("\nMachines to shutdown%s:\n" % (" (WILL SHUTDOWN)" if args.force else ""))
95     print_list(machines_to_shut_down)
96
97     if input("Are you sure (yes/NO)?") != "yes":
98         return
99
100     if args.force:
101         for m in machines_to_shut_down:
102             m.adminable = True
103
104     database.session.commit()
105
106     if args.force:
107         for m in machines_to_shut_down:
108             c = ['remctl', args.remote, 'control', m.name, 'shutdown']
109             print(' '.join(c))
110             try:
111                 check_call(c)
112             except:
113                 print("Failed.")
114         print("Waiting 30 seconds for VMs to exit")
115         time.sleep(30)
116         for m in machines_to_shut_down:
117             c = ['remctl', args.remote, 'control', m.name, 'destroy']
118             print(' '.join(c))
119             try:
120                 check_call(c)
121             except:
122                 print("Failed.")
123
124 if __name__ == '__main__':
125     main()