Source code for mosaic.runtime.monitor


import os
import time
import psutil
import asyncio
import datetime
import subprocess as cmd_subprocess

import mosaic
from .runtime import Runtime, RuntimeProxy
from .utils import MonitoredResource, MonitoredObject
from .strategies import RoundRobin
from ..file_manipulation import h5
from ..utils import subprocess, at_exit
from ..utils.utils import memory_limit, cpu_count
from ..utils.logger import LoggerManager, _stdout, _stderr
from ..profile import profiler, global_profiler


__all__ = ['Monitor', 'monitor_strategies']


monitor_strategies = {
    'round-robin': RoundRobin
}


def _cpu_mask(num_workers, worker_index, num_threads):
    # Work out the first core ID for this subjob
    startid = (worker_index - 1) * num_threads

    # This is the process CPU ID
    valsum = {}
    for j in range(0, num_threads):
        # Thread CPU ID
        threadid = startid + j
        # Convert to bitmask components
        pos = int(threadid / 4)
        offset = threadid - pos * 4
        val = 2 ** offset
        # This is a fat bitmask so add up the thread values in the right position
        valsum[pos] = valsum.get(pos, 0) + val

    valmask = ''
    # Generate the hex repreesntation of the fat bitmask
    for j in range(max(valsum.keys()), -1, -1):
        valmask = f'{valmask}{valsum.get(j, 0):X}'

    # Append to the list of masks in the appropriate way for this subjob
    mask = '0x' + f'{valmask}'

    return mask



[docs]
class Monitor(Runtime):
    """
    The monitor takes care of keeping track of the state of the network
    and collects statistics about it.

    It also handles the allocation of tesserae to certain workers.

    """

    is_monitor = True

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self._memory_limit = memory_limit()

        self.strategy_name = kwargs.get('monitor_strategy', 'round-robin')
        self._monitor_strategy = monitor_strategies[self.strategy_name](self)

        self._monitored_nodes = dict()
        self._monitored_tessera = dict()
        self._monitored_tasks = dict()

        self._dirty_tessera = set()
        self._dirty_tasks = set()

        now = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
        self._profile_filename = '%s.profile.h5' % now
        self._init_filename = None

        self._start_t = time.time()
        self._end_t = None


[docs]
    async def init(self, **kwargs):
        """
        Asynchronous counterpart of ``__init__``.

        Parameters
        ----------
        kwargs

        Returns
        -------

        """
        await super().init(**kwargs)

        # Maybe dump file
        if kwargs.get('dump_init', False):
            self.init_file({})

        # Start local cluster
        await self.init_warehouse(**kwargs)

        if self.mode in ['local', 'interactive']:
            await self.init_local(**kwargs)

        else:
            await self.init_cluster(**kwargs)



[docs]
    async def init_local(self, **kwargs):
        """
        Init nodes in local mode.

        Parameters
        ----------
        kwargs

        Returns
        -------

        """
        num_workers = kwargs.get('num_workers', 1)

        def start_node(*args, **extra_kwargs):
            kwargs.update(extra_kwargs)
            kwargs['runtime_indices'] = 0

            mosaic.init('node', *args, **kwargs, wait=True)

        node_proxy = RuntimeProxy(name='node', indices=0)
        node_subprocess = subprocess(start_node)(name=node_proxy.uid, daemon=False)
        node_subprocess.start_process()
        node_proxy.subprocess = node_subprocess

        self._nodes[node_proxy.uid] = node_proxy
        await self._comms.wait_for(node_proxy.uid)

        while node_proxy.uid not in self._monitored_nodes:
            await asyncio.sleep(0.1)

        self._comms.start_heartbeat(node_proxy.uid)

        self.logger.info('Listening at <NODE:0 | WORKER:0:0-0:%d>' % num_workers)



[docs]
    async def init_cluster(self, **kwargs):
        """
        Init nodes in cluster mode.

        Parameters
        ----------
        kwargs

        Returns
        -------

        """
        node_list = kwargs.get('node_list', None)

        if node_list is None:
            raise ValueError('No node_list was provided to initialise mosaic in cluster mode')

        num_cpus = cpu_count()
        num_nodes = len(node_list)
        num_workers = kwargs.get('num_workers', 1)
        num_threads = kwargs.get('num_threads', None) or num_cpus // num_workers
        log_level = kwargs.get('log_level', 'info')
        runtime_address = self.address
        runtime_port = self.port
        pubsub_port = self.pubsub_port

        ssh_flags = os.environ.get('SSH_FLAGS', '')
        ssh_commands = os.environ.get('SSH_COMMANDS', None)
        ssh_commands = ssh_commands + ';' if ssh_commands else ''
        reuse_head = '--reuse-head' if self.reuse_head else '--free-head'

        in_slurm = os.environ.get('SLURM_NODELIST', None) is not None

        tasks = []

        for node_index, node_address in zip(range(num_nodes), node_list):
            node_proxy = RuntimeProxy(name='node', indices=node_index)

            remote_cmd = (f'{ssh_commands} '
                          f'mrun --node -i {node_index} '
                          f'--monitor-address {runtime_address} --monitor-port {runtime_port} '
                          f'--pubsub-port {pubsub_port} '
                          f'-n {num_nodes} -nw {num_workers} -nth {num_threads} '
                          f'--cluster --{log_level} {reuse_head}')

            if in_slurm:
                cpu_mask = _cpu_mask(1, 1, num_cpus)

                cmd = (f'srun {ssh_flags} --nodes=1 --ntasks=1 --tasks-per-node={num_cpus} '
                       f'--cpu-bind=mask_cpu:{cpu_mask} --mem-bind=local '
                       f'--oversubscribe '
                       f'--distribution=block:block '
                       f'--hint=nomultithread --no-kill '
                       f'--nodelist={node_address} '
                       f'{remote_cmd}')

            else:
                cmd = (f'ssh {ssh_flags} {node_address} '
                       f'"{remote_cmd}"')

            node_subprocess = cmd_subprocess.Popen(cmd,
                                                   shell=True,
                                                   stdout=_stdout,
                                                   stderr=_stderr)
            node_proxy.subprocess = node_subprocess
            self._nodes[node_proxy.uid] = node_proxy

            async def wait_for(proxy):
                await self._comms.wait_for(proxy.uid)

                while proxy.uid not in self._monitored_nodes:
                    await asyncio.sleep(0.1)

                return proxy

            tasks.append(wait_for(node_proxy))

        for node_proxy in asyncio.as_completed(tasks):
            node_proxy = await node_proxy
            self.logger.debug('Started node %s' % node_proxy.uid)

        for node_uid in self._nodes.keys():
            self._comms.start_heartbeat(node_uid)
            self.logger.debug('Started heartbeat with node %s' % node_uid)

        self.logger.info('Listening at <NODE:%d-%d | '
                         'WORKER:0:0-%d:%d address=%s>' % (0, num_nodes, num_nodes, num_workers,
                                                           ', '.join(node_list)))



[docs]
    def init_file(self, runtime_config):
        runtime_id = self.uid
        runtime_address = self.address
        runtime_port = self.port
        pubsub_port = self.pubsub_port

        # Store runtime ID, address and port in a tmp file for the
        # head to use
        path = os.path.join(os.getcwd(), 'mosaic-workspace')
        if not os.path.exists(path):
            os.makedirs(path)

        self._init_filename = os.path.join(path, 'monitor.key')
        with open(self._init_filename, 'w') as file:
            file.write('[ADDRESS]\n')
            file.write('UID=%s\n' % runtime_id)
            file.write('ADD=%s\n' % runtime_address)
            file.write('PRT=%s\n' % runtime_port)
            file.write('PUB=%s\n' % pubsub_port)
            file.write('[ARGS]\n')

            for key, value in runtime_config.items():
                if key in ['runtime_indices', 'address', 'port',
                           'monitor_address', 'monitor_port', 'node_list']:
                    continue
                if isinstance(value, str):
                    file.write('%s="%s"\n' % (key, value))
                else:
                    file.write('%s=%s\n' % (key, value))

        def _rm_dirs():
            os.remove(self._init_filename)

        at_exit.add(_rm_dirs)



[docs]
    def set_logger(self):
        """
        Set up logging.

        Returns
        -------

        """
        self.logger = LoggerManager()

        if self.mode == 'interactive':
            self.logger.set_remote(runtime_id='head', format=self.mode)
        else:
            self.logger.set_local(format=self.mode)



[docs]
    def set_profiler(self):
        """
        Set up profiling.

        Returns
        -------

        """
        global_profiler.set_local()

        self._loop.interval(self.append_description, interval=10)



[docs]
    def update_node(self, sender_id, update, sub_resources):
        if sender_id not in self._monitored_nodes:
            self._monitored_nodes[sender_id] = MonitoredResource(sender_id)

        node = self._monitored_nodes[sender_id]
        node.update(update, **sub_resources)
        self._monitor_strategy.update_node(node)



[docs]
    def add_tessera_event(self, sender_id, msgs):
        msgs = [msgs] if not isinstance(msgs, list) else msgs
        for msg in msgs:
            self._add_tessera_event(sender_id, **msg)



[docs]
    def add_task_event(self, sender_id, msgs):
        msgs = [msgs] if not isinstance(msgs, list) else msgs
        for msg in msgs:
            self._add_task_event(sender_id, **msg)



[docs]
    def add_tessera_profile(self, sender_id, msgs):
        msgs = [msgs] if not isinstance(msgs, list) else msgs
        for msg in msgs:
            self._add_tessera_profile(sender_id, **msg)



[docs]
    def add_task_profile(self, sender_id, msgs):
        msgs = [msgs] if not isinstance(msgs, list) else msgs
        for msg in msgs:
            self._add_task_profile(sender_id, **msg)


    def _add_tessera_event(self, sender_id, runtime_id, uid, **kwargs):
        if uid not in self._monitored_tessera:
            self._monitored_tessera[uid] = MonitoredObject(runtime_id, uid)

        obj = self._monitored_tessera[uid]
        obj.add_event(sender_id, **kwargs)
        self._monitor_strategy.update_tessera(obj)
        self._dirty_tessera.add(uid)

    def _add_task_event(self, sender_id, runtime_id, uid, tessera_id, **kwargs):
        if uid not in self._monitored_tasks:
            self._monitored_tasks[uid] = MonitoredObject(runtime_id, uid, tessera_id=tessera_id)

        obj = self._monitored_tasks[uid]
        obj.add_event(sender_id, **kwargs)
        self._monitor_strategy.update_task(obj)
        self._dirty_tasks.add(uid)

    def _add_tessera_profile(self, sender_id, runtime_id, uid, profile):
        if uid not in self._monitored_tessera:
            self._monitored_tessera[uid] = MonitoredObject(runtime_id, uid)

        obj = self._monitored_tessera[uid]
        obj.add_profile(sender_id, profile)
        self._dirty_tessera.add(uid)

    def _add_task_profile(self, sender_id, runtime_id, uid, tessera_id, profile):
        if uid not in self._monitored_tasks:
            self._monitored_tasks[uid] = MonitoredObject(runtime_id, uid, tessera_id=tessera_id)

        obj = self._monitored_tasks[uid]
        obj.add_profile(sender_id, profile)
        self._dirty_tasks.add(uid)


[docs]
    def append_description(self):
        if not profiler.tracing:
            return

        if not len(self._dirty_tessera) and not len(self._dirty_tasks):
            return

        description = {
            'monitored_tessera': {},
            'monitored_tasks': {},
        }

        for uid in self._dirty_tessera:
            tessera = self._monitored_tessera[uid]
            description['monitored_tessera'][uid] = tessera.append()

        for uid in self._dirty_tasks:
            task = self._monitored_tasks[uid]
            description['monitored_tasks'][uid] = task.append()

        self._append_description(description)

        self._dirty_tessera = set()
        self._dirty_tasks = set()


    def _append_description(self, description):
        if not h5.file_exists(filename=self._profile_filename):
            description['start_t'] = self._start_t

            with h5.HDF5(filename=self._profile_filename, mode='w') as file:
                file.dump(description)

        else:
            with h5.HDF5(filename=self._profile_filename, mode='a') as file:
                file.append(description)


[docs]
    async def stop(self, sender_id=None):
        """
        Stop runtime.

        Parameters
        ----------
        sender_id : str

        Returns
        -------

        """
        # Delete files
        if self._init_filename is not None:
            try:
                os.remove(self._init_filename)
            except Exception:
                pass

        # Get final profile updates before closing
        if profiler.tracing:
            profiler.stop()
            self._end_t = time.time()

            description = {
                'end_t': self._end_t,
                'monitored_tessera': {},
                'monitored_tasks': {},
            }

            for uid, tessera in self._monitored_tessera.items():
                tessera.collect()
                description['monitored_tessera'][tessera.uid] = tessera.append()

            for uid, task in self._monitored_tasks.items():
                task.collect()
                description['monitored_tasks'][task.uid] = task.append()

            self._append_description(description)

        # Close warehouse
        await self._local_warehouse.stop()
        self._local_warehouse.subprocess.join_process()

        # Close nodes
        for node_id, node in self._nodes.items():
            await node.stop()

            if hasattr(node.subprocess, 'stop_process'):
                node.subprocess.join_process()

            if isinstance(node.subprocess, cmd_subprocess.Popen):
                ps_process = psutil.Process(node.subprocess.pid)

                for child in ps_process.children(recursive=True):
                    child.kill()

                ps_process.kill()

        await super().stop(sender_id)



[docs]
    async def select_worker(self, sender_id):
        """
        Select appropriate worker to allocate a tessera.

        Parameters
        ----------
        sender_id : str

        Returns
        -------
        str
            UID of selected worker.

        """
        while not len(self._monitored_nodes.keys()):
            await asyncio.sleep(0.1)

        return self._monitor_strategy.select_worker(sender_id)



[docs]
    async def barrier(self, sender_id, timeout=None):
        """
        Wait until all pending tasks are done. If no timeout is
        provided, the barrier will wait indefinitely.

        Parameters
        ----------
        timeout : float, optional

        Returns
        -------

        """
        pending_tasks = []
        for task in self._monitored_tasks.values():
            if task.state in ['done', 'failed', 'collected']:
                continue

            pending_tasks.append(task)
        self.logger.info('Pending barrier tasks %d' % len(pending_tasks))

        tic = time.time()
        while pending_tasks:
            await asyncio.sleep(0.5)

            for task in pending_tasks:
                if task.state in ['done', 'failed', 'collected']:
                    pending_tasks.remove(task)

            if timeout is not None and (time.time() - tic) > timeout:
                break