From ecf836c23505774e49db6784696a766ff652a1f6 Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 12 Feb 2024 14:57:00 -0600 Subject: [PATCH] Zeroconf offline-handling improvements (#77) * Add benchmark.py * Add cpu / disk benchmark APIs * Add cpu_benchmark method to distributed_job_manager.py * Do a better job of storing hostnames = * Remove hostname from Zeroconf cache if server goes offline * Add cpu / disk benchmark APIs * Add cpu_benchmark method to distributed_job_manager.py * Do a better job of storing hostnames = * Remove hostname from Zeroconf cache if server goes offline * Wrap main code in try finally block to always stop zeroconf * Add missing import --- requirements.txt | 11 ++++---- src/api/server_proxy.py | 10 ++++++- src/init.py | 48 ++++++++++++++++++-------------- src/utilities/zeroconf_server.py | 17 ++++++----- 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/requirements.txt b/requirements.txt index 845e035..efec5e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,10 +2,9 @@ PyQt6>=6.6.1 psutil>=5.9.8 requests>=2.31.0 Pillow>=10.2.0 -json2html>=1.3.0 PyYAML>=6.0.1 -flask>=3.0.1 -tqdm>=4.66.1 +flask>=3.0.2 +tqdm>=4.66.2 werkzeug>=3.0.1 Pypubsub>=4.0.3 zeroconf>=0.131.0 @@ -22,7 +21,6 @@ matplotlib>=3.8.2 MarkupSafe>=2.1.4 python-dateutil>=2.8.2 certifi>=2023.11.17 -PySide6>=6.6.1 shiboken6>=6.6.1 Pygments>=2.17.2 cycler>=0.12.1 @@ -32,4 +30,7 @@ fonttools>=4.47.2 Jinja2>=3.1.3 pyparsing>=3.1.1 kiwisolver>=1.4.5 -attrs>=23.2.0 \ No newline at end of file +attrs>=23.2.0 +lxml>=5.1.0 +click>=8.1.7 +requests_toolbelt>=1.0.0 \ No newline at end of file diff --git a/src/api/server_proxy.py b/src/api/server_proxy.py index 6cca41f..0536446 100644 --- a/src/api/server_proxy.py +++ b/src/api/server_proxy.py @@ -10,6 +10,7 @@ from requests_toolbelt.multipart import MultipartEncoder, MultipartEncoderMonito from src.utilities.misc_helper import is_localhost from src.utilities.status_utils import RenderStatus +from src.utilities.zeroconf_server import ZeroconfServer status_colors = {RenderStatus.ERROR: "red", RenderStatus.CANCELLED: 'orange1', RenderStatus.COMPLETED: 'green', RenderStatus.NOT_STARTED: "yellow", RenderStatus.SCHEDULED: 'purple', @@ -19,7 +20,7 @@ categories = [RenderStatus.RUNNING, RenderStatus.WAITING_FOR_SUBJOBS, RenderStat RenderStatus.SCHEDULED, RenderStatus.COMPLETED, RenderStatus.CANCELLED, RenderStatus.UNDEFINED] logger = logging.getLogger() -OFFLINE_MAX = 2 +OFFLINE_MAX = 4 LOOPBACK = '127.0.0.1' @@ -87,6 +88,13 @@ class RenderServerProxy: self.__offline_flags = self.__offline_flags + 1 except Exception as e: logger.exception(f"Uncaught exception: {e}") + + # If server unexpectedly drops off the network, remove from Zeroconf list + if self.__offline_flags > OFFLINE_MAX: + try: + ZeroconfServer.client_cache.pop(self.hostname) + except KeyError: + pass return None def request(self, payload, timeout=5): diff --git a/src/init.py b/src/init.py index 3ca5402..7026a37 100644 --- a/src/init.py +++ b/src/init.py @@ -24,32 +24,38 @@ def run() -> int: int: The exit status code. """ - # Load Config YAML - Config.setup_config_dir() - Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml'))) - logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S', - level=Config.server_log_level.upper()) + try: + # Load Config YAML + Config.setup_config_dir() + Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml'))) + logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S', + level=Config.server_log_level.upper()) - app: QApplication = QApplication(sys.argv) + app: QApplication = QApplication(sys.argv) - # Start server in background - background_server = threading.Thread(target=start_server) - background_server.daemon = True - background_server.start() + # Start server in background + background_server = threading.Thread(target=start_server) + background_server.daemon = True + background_server.start() - # Setup logging for console ui - buffer_handler = BufferingHandler() - buffer_handler.setFormatter(logging.getLogger().handlers[0].formatter) - logger = logging.getLogger() - logger.addHandler(buffer_handler) + # Setup logging for console ui + buffer_handler = BufferingHandler() + buffer_handler.setFormatter(logging.getLogger().handlers[0].formatter) + logger = logging.getLogger() + logger.addHandler(buffer_handler) - window: MainWindow = MainWindow() - window.buffer_handler = buffer_handler - window.show() + window: MainWindow = MainWindow() + window.buffer_handler = buffer_handler + window.show() - return_code = app.exec() - RenderQueue.prepare_for_shutdown() - return sys.exit(return_code) + return_code = app.exec() + + except Exception as e: + logging.error(f"Unhandled exception: {e}") + return_code = 1 + finally: + RenderQueue.prepare_for_shutdown() + return sys.exit(return_code) class BufferingHandler(logging.Handler, QObject): diff --git a/src/utilities/zeroconf_server.py b/src/utilities/zeroconf_server.py index 50c3340..e896b33 100644 --- a/src/utilities/zeroconf_server.py +++ b/src/utilities/zeroconf_server.py @@ -56,7 +56,7 @@ class ZeroconfServer: cls.service_info = info cls.zeroconf.register_service(info) logger.info(f"Registered zeroconf service: {cls.service_info.name}") - except NonUniqueNameException as e: + except (NonUniqueNameException, socket.gaierror) as e: logger.error(f"Error establishing zeroconf: {e}") @classmethod @@ -74,17 +74,17 @@ class ZeroconfServer: @classmethod def _on_service_discovered(cls, zeroconf, service_type, name, state_change): info = zeroconf.get_service_info(service_type, name) - logger.debug(f"Zeroconf: {name} {state_change}") + hostname = name.split(f'.{cls.service_type}')[0] + logger.debug(f"Zeroconf: {hostname} {state_change}") if service_type == cls.service_type: if state_change == ServiceStateChange.Added or state_change == ServiceStateChange.Updated: - cls.client_cache[name] = info + cls.client_cache[hostname] = info else: - cls.client_cache.pop(name) - pub.sendMessage('zeroconf_state_change', hostname=name, state_change=state_change, info=info) + cls.client_cache.pop(hostname) + pub.sendMessage('zeroconf_state_change', hostname=hostname, state_change=state_change) @classmethod def found_hostnames(cls): - fetched_hostnames = [x.split(f'.{cls.service_type}')[0] for x in cls.client_cache.keys()] local_hostname = socket.gethostname() def sort_key(hostname): @@ -92,13 +92,12 @@ class ZeroconfServer: return False if hostname == local_hostname else True # Sort the list with the local hostname first - sorted_hostnames = sorted(fetched_hostnames, key=sort_key) + sorted_hostnames = sorted(cls.client_cache.keys(), key=sort_key) return sorted_hostnames @classmethod def get_hostname_properties(cls, hostname): - new_key = hostname + '.' + cls.service_type - server_info = cls.client_cache.get(new_key).properties + server_info = cls.client_cache.get(hostname).properties decoded_server_info = {key.decode('utf-8'): value.decode('utf-8') for key, value in server_info.items()} return decoded_server_info