import logging import os import re from subprocess import call, check_call import threading import time from flask import Flask, request from ironicclient import client, exceptions app = Flask(__name__) LOG = logging.getLogger("discoverd") OS_ARGS = dict((k.lower(), v) for (k, v) in os.environ.items() if k.startswith('OS_')) ALLOW_SEARCH_BY_MAC = True def is_valid_mac(address): m = "[0-9a-f]{2}(:[0-9a-f]{2}){5}$" return (isinstance(address, (str, unicode)) and re.match(m, address.lower())) def process(node_info): """Process data from discovery ramdisk.""" if node_info.get('error'): LOG.error('Error happened during discovery: %s', node_info['error']) return keys = ('cpus', 'cpu_arch', 'memory_mb', 'local_gb', 'macs') missing = [key for key in keys if not node_info.get(key)] if missing: LOG.error('The following required parameters are missing: %s', missing) return valid_macs = [mac.lower() for mac in node_info['macs'] if is_valid_mac(mac)] if valid_macs != node_info['macs']: LOG.warn('The following MACs were invalid in discovery data ' 'for node with BMC %(ipmi_address)s and were ' 'excluded: %(invalid)s', {'invalid': set(node_info['macs']) - set(valid_macs), 'ipmi_address': node_info.get('ipmi_address')}) LOG.info('Discovery data received from node with BMC ' '%(ipmi_address)s: CPUs: %(cpus)s %(cpu_arch)s, ' 'memory %(memory_mb)s MiB, disk %(local_gb)s GiB, ' 'macs %(macs)s', dict((key, node_info.get(key)) for key in keys + ('ipmi_address',))) ironic = client.get_client(1, **OS_ARGS) bmc_known = bool(node_info.get('ipmi_address')) if bmc_known: # TODO(dtantsur): bulk loading nodes = ironic.node.list(maintenance=True, limit=0, sort_key='created_at', sort_dir='desc', detail=True) address = node_info['ipmi_address'] for node in nodes: if node.driver_info.get('ipmi_address') == address: break else: LOG.error('Unable to find node with ipmi_address %s', node_info['ipmi_address']) return elif ALLOW_SEARCH_BY_MAC: # In case of testing with vms and pxe_ssh driver LOG.warn('No BMC address provided, trying to use MAC ' 'addresses for finding node') port = None for mac in valid_macs: try: port = ironic.port.get_by_address(mac) except exceptions.NotFound: continue else: break if port is not None: try: node = ironic.node.get(port.node_uuid) except exceptions.NotFound: node = None if port is None or node is None: LOG.error('Unable to find node with macs %s', valid_macs) return else: LOG.error('No ipmi_address provided and searching by MAC is not ' 'allowed') return if not node.maintenance: LOG.error('Refusing to apply discovered data to node %s ' 'which is not in maintenance state', node.uuid) return patch = [{'op': 'add', 'path': '/extra/newly_discovered', 'value': 'true'}] existing = node.properties for key in ('cpus', 'cpu_arch', 'memory_mb', 'local_gb'): if not existing.get(key): patch.append({'op': 'add', 'path': '/properties/%s' % key, 'value': str(node_info[key])}) ironic.node.update(node.uuid, patch) for mac in valid_macs: try: ironic.port.create(node_uuid=node.uuid, address=mac) except exceptions.Conflict: LOG.warning('MAC %(mac)s appeared in discovery data for ' 'node %(node)s, but already exists in ' 'database for another node - skipping', {'mac': mac, 'node': node.uuid}) LOG.info('Node %s was updated with data from discovery process, forcing ' 'power off', node.uuid) Firewall.unwhitelist_macs(valid_macs) Firewall.update_filters(ironic) ironic.node.set_power_state(node.uuid, 'off') class Firewall(object): LOCK = threading.RLock() MACS_DISCOVERY = set() NEW_CHAIN = 'discovery_temp' CHAIN = 'discovery' INTERFACE = 'br-ctlplane' @staticmethod def _iptables(*args, **kwargs): cmd = ('iptables',) + args LOG.debug('Running iptables %s', args) if kwargs.pop('ignore', False): if call(cmd, **kwargs): LOG.warn('iptables failed: %s', args) return False else: return True else: try: return check_call(cmd, **kwargs) except Exception: LOG.error('iptables failed: %s', args) raise @classmethod def init(cls): cls._iptables('-F', cls.NEW_CHAIN, ignore=True) cls._iptables('-X', cls.NEW_CHAIN, ignore=True) cls._iptables('-D', 'INPUT', '-i', cls.INTERFACE, '-p', 'udp', '--dport', '67', '-j', cls.CHAIN, ignore=True) # may be missing on first run cls._iptables('-F', cls.CHAIN, ignore=True) cls._iptables('-X', cls.CHAIN, ignore=True) # Code expects it to exist cls._iptables('-N', cls.CHAIN) @classmethod def whitelist_macs(cls, macs): with cls.LOCK: cls.MACS_DISCOVERY.update(macs) @classmethod def unwhitelist_macs(cls, macs): with cls.LOCK: cls.MACS_DISCOVERY.difference_update(macs) @classmethod def update_filters(cls, ironic): with cls.LOCK: macs_active = set(p.address for p in ironic.port.list(limit=0)) to_blacklist = macs_active - cls.MACS_DISCOVERY # Operate on temporary chain cls._iptables('-N', cls.NEW_CHAIN) # - Blacklist active macs, so that nova can boot them for mac in to_blacklist: cls._iptables('-A', cls.NEW_CHAIN, '-m', 'mac', '--mac-source', mac, '-j', 'DROP') # - Whitelist everything else cls._iptables('-A', cls.NEW_CHAIN, '-j', 'ACCEPT') # Swap chains cls._iptables('-I', 'INPUT', '-i', cls.INTERFACE, '-p', 'udp', '--dport', '67', '-j', cls.NEW_CHAIN) cls._iptables('-D', 'INPUT', '-i', cls.INTERFACE, '-p', 'udp', '--dport', '67', '-j', cls.CHAIN, ignore=True) # may be missing on first run cls._iptables('-F', cls.CHAIN) cls._iptables('-X', cls.CHAIN) cls._iptables('-E', cls.NEW_CHAIN, cls.CHAIN) def start(uuids): """Initiate discovery for given node uuids.""" ironic = client.get_client(1, **OS_ARGS) LOG.debug('Validating nodes %s', uuids) nodes = [] for uuid in uuids: try: node = ironic.node.get(uuid) except exceptions.HTTPClientError: LOG.exception('Failed validation of node %s', uuid) continue if not node.maintenance: LOG.error('Node %s not in maintenance - skipping', uuid) continue nodes.append(node) LOG.info('Proceeding with discovery on nodes %s', [n.uuid for n in nodes]) to_exclude = set() for node in nodes: if not node.driver.endswith('ssh'): continue LOG.warn('Driver for %s is %s, requires white-listing MAC', node.uuid, node.driver) # TODO(dtantsur): pagination ports = ironic.node.list_ports(node.uuid, limit=0) to_exclude.update(p.address for p in ports) if to_exclude: Firewall.whitelist_macs(to_exclude) Firewall.update_filters(ironic) for node in nodes: ironic.node.set_power_state(node.uuid, 'on') @app.route('/continue', methods=['POST']) def post_continue(): data = request.get_json(force=True) LOG.debug("Got JSON %s, going into processing thread", data) threading.Thread(target=process, args=(data,)).start() return "{}", 202, {"content-type": "application/json"} @app.route('/start', methods=['POST']) def post_start(): data = request.get_json(force=True) LOG.debug("Got JSON %s, going into processing thread", data) threading.Thread(target=start, args=(data,)).start() return "{}", 202, {"content-type": "application/json"} def periodic_update(event, ironic): while not event.is_set(): LOG.debug('Running periodic update of filters') Firewall.update_filters(ironic) for _ in range(15): if event.is_set(): return time.sleep(1) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) ironic = client.get_client(1, **OS_ARGS) Firewall.init() event = threading.Event() threading.Thread(target=periodic_update, args=(event, ironic)).start() try: app.run(debug=True, host='0.0.0.0', port=5050) finally: LOG.info('Waiting for background thread to shutdown') event.set()