Ignore VMs that have administrator set to a deactivated user, even though that causes...
[invirt/scripts/outage.git] / invirt-deactivate
1 #!/usr/bin/env python3
2
3 # Sample invocation: PYTHONPATH=build/lib/ kdo quentin/root python3.8 ~/Documents/MIT/SIPB/XVM/invirt-deactivate --uri postgresql://postgres:@localhost:1235/invirt --remote xvm-remote.mit.edu --force 2>&1 | tee shutdown-20201020.log
4
5 import argparse
6
7 from invirt import database
8 from invirt.database import record, models
9 import hesiod
10 import time
11 from sqlalchemy import func
12 from sqlalchemy import *
13 from sqlalchemy import orm
14 from sqlalchemy.orm import create_session, relation
15 from subprocess import check_call, check_output
16 import yaml
17
18 lockers_table = Table(
19     'lockers', models.meta,
20     Column('name', String, nullable=False, primary_key=True),
21     Column('type', Enum("AFS", "ERR", name='locker_type')),
22     Column('message', String),
23 )
24
25 class Locker(record.Record):
26     _identity_field = 'name'
27
28 models.mapper(Locker, lockers_table)
29
30 def print_list(l):
31     for i in sorted(l, key=lambda i: str(i)):
32         if not hasattr(i, '__iter__') or isinstance(i, (str, bytes)):
33             i = (i,)
34         print("\t".join(str(x) for x in i))
35
36 def main():
37     parser = argparse.ArgumentParser(description='Set memory, disk, and VM quotas')
38
39     parser.add_argument('-u', '--uri', type=str, dest='uri',
40                         help='Database URI (e.g. postgresql://postgres:@localhost:1234/invirt)')
41     parser.add_argument('-r', '--remote-host', type=str, dest='remote',
42                         default='xvm-remote-dev.mit.edu',
43                         help='Remote host')
44     parser.add_argument('-f', '--force', action='store_true', dest='force',
45                         help='Shut VMs down')
46
47     args = parser.parse_args()
48
49     database.connect(args.uri)
50     database.session.begin()
51
52     lockers_table.create(checkfirst=True)
53
54     owners = database.session.query(database.Machine.owner).distinct()
55     for o, in owners:
56         l = Locker.query.get(o)
57         if not l:
58             l = Locker(name=o)
59         try:
60             fs = hesiod.FilsysLookup(o).filsys
61             l.type=fs[0]['type']
62             l.message=fs[0].get('message')
63         except FileNotFoundError:
64             l.type=None
65             l.message=None
66         except:
67             print("Error looking up", o)
68             raise
69     database.session.commit()
70
71     database.session.begin()
72     acl_locker_no_err = database.session.query(database.MachineAccess.machine_id).outerjoin(Locker, database.MachineAccess.user == Locker.name).filter((Locker.type == None) | (Locker.type != "ERR")).group_by(database.MachineAccess.machine_id).subquery()
73     machines_no_access = database.session.query(database.Machine).outerjoin(acl_locker_no_err).filter(acl_locker_no_err.c.machine_id == None).all()
74
75     machines_no_locker = database.session.query(database.Machine).join(Locker, database.Machine.owner == Locker.name).filter(Locker.type == None).all()
76     machines_err_locker = database.session.query(database.Machine, Locker.message).join(Locker, database.Machine.owner == Locker.name).filter(Locker.type == 'ERR').all()
77
78     print("Machines with no admin users:\n")
79     print_list(machines_no_access)
80     print("\nMachines with missing locker:\n")
81     print_list(machines_no_locker)
82     print("\nMachines with ERR locker:\n")
83     print_list(machines_err_locker)
84
85     listvms = yaml.safe_load(check_output(
86         ['remctl', args.remote, 'web', 'listvms'],
87     ))
88
89     machines_running = set(listvms)
90     machines_broken_locker = set(machines_no_locker) | set(m for (m,msg) in machines_err_locker)
91     machines_deactivate = set(machines_no_access) & machines_broken_locker
92     print("\nMachines with no admin users AND broken locker:\n")
93     print_list(machines_deactivate)
94     machines_to_shut_down = database.Machine.query.filter(database.Machine.name.in_(machines_running & set(m.name for m in machines_deactivate))).all()
95     print("\nMachines to shutdown%s:\n" % (" (WILL SHUTDOWN)" if args.force else ""))
96     print_list(machines_to_shut_down)
97
98     if input("Are you sure (yes/NO)?") != "yes":
99         return
100
101     if args.force:
102         for m in machines_to_shut_down:
103             m.adminable = True
104
105     database.session.commit()
106
107     if args.force:
108         for m in machines_to_shut_down:
109             c = ['remctl', args.remote, 'control', m.name, 'shutdown']
110             print(' '.join(c))
111             try:
112                 check_call(c)
113             except:
114                 print("Failed.")
115         print("Waiting 30 seconds for VMs to exit")
116         time.sleep(30)
117         for m in machines_to_shut_down:
118             c = ['remctl', args.remote, 'control', m.name, 'destroy']
119             print(' '.join(c))
120             try:
121                 check_call(c)
122             except:
123                 print("Failed.")
124
125 if __name__ == '__main__':
126     main()