Zeroconf offline-handling improvements (#77)

* Add benchmark.py

* Add cpu / disk benchmark APIs

* Add cpu_benchmark method to distributed_job_manager.py

* Do a better job of storing hostnames =

* Remove hostname from Zeroconf cache if server goes offline

* Add cpu / disk benchmark APIs

* Add cpu_benchmark method to distributed_job_manager.py

* Do a better job of storing hostnames =

* Remove hostname from Zeroconf cache if server goes offline

* Wrap main code in try finally block to always stop zeroconf

* Add missing import
This commit is contained in:
2024-02-12 14:57:00 -06:00
committed by GitHub
parent a31fe98964
commit ecf836c235
4 changed files with 50 additions and 36 deletions

View File

@@ -2,10 +2,9 @@ PyQt6>=6.6.1
psutil>=5.9.8 psutil>=5.9.8
requests>=2.31.0 requests>=2.31.0
Pillow>=10.2.0 Pillow>=10.2.0
json2html>=1.3.0
PyYAML>=6.0.1 PyYAML>=6.0.1
flask>=3.0.1 flask>=3.0.2
tqdm>=4.66.1 tqdm>=4.66.2
werkzeug>=3.0.1 werkzeug>=3.0.1
Pypubsub>=4.0.3 Pypubsub>=4.0.3
zeroconf>=0.131.0 zeroconf>=0.131.0
@@ -22,7 +21,6 @@ matplotlib>=3.8.2
MarkupSafe>=2.1.4 MarkupSafe>=2.1.4
python-dateutil>=2.8.2 python-dateutil>=2.8.2
certifi>=2023.11.17 certifi>=2023.11.17
PySide6>=6.6.1
shiboken6>=6.6.1 shiboken6>=6.6.1
Pygments>=2.17.2 Pygments>=2.17.2
cycler>=0.12.1 cycler>=0.12.1
@@ -33,3 +31,6 @@ Jinja2>=3.1.3
pyparsing>=3.1.1 pyparsing>=3.1.1
kiwisolver>=1.4.5 kiwisolver>=1.4.5
attrs>=23.2.0 attrs>=23.2.0
lxml>=5.1.0
click>=8.1.7
requests_toolbelt>=1.0.0

View File

@@ -10,6 +10,7 @@ from requests_toolbelt.multipart import MultipartEncoder, MultipartEncoderMonito
from src.utilities.misc_helper import is_localhost from src.utilities.misc_helper import is_localhost
from src.utilities.status_utils import RenderStatus from src.utilities.status_utils import RenderStatus
from src.utilities.zeroconf_server import ZeroconfServer
status_colors = {RenderStatus.ERROR: "red", RenderStatus.CANCELLED: 'orange1', RenderStatus.COMPLETED: 'green', status_colors = {RenderStatus.ERROR: "red", RenderStatus.CANCELLED: 'orange1', RenderStatus.COMPLETED: 'green',
RenderStatus.NOT_STARTED: "yellow", RenderStatus.SCHEDULED: 'purple', RenderStatus.NOT_STARTED: "yellow", RenderStatus.SCHEDULED: 'purple',
@@ -19,7 +20,7 @@ categories = [RenderStatus.RUNNING, RenderStatus.WAITING_FOR_SUBJOBS, RenderStat
RenderStatus.SCHEDULED, RenderStatus.COMPLETED, RenderStatus.CANCELLED, RenderStatus.UNDEFINED] RenderStatus.SCHEDULED, RenderStatus.COMPLETED, RenderStatus.CANCELLED, RenderStatus.UNDEFINED]
logger = logging.getLogger() logger = logging.getLogger()
OFFLINE_MAX = 2 OFFLINE_MAX = 4
LOOPBACK = '127.0.0.1' LOOPBACK = '127.0.0.1'
@@ -87,6 +88,13 @@ class RenderServerProxy:
self.__offline_flags = self.__offline_flags + 1 self.__offline_flags = self.__offline_flags + 1
except Exception as e: except Exception as e:
logger.exception(f"Uncaught exception: {e}") logger.exception(f"Uncaught exception: {e}")
# If server unexpectedly drops off the network, remove from Zeroconf list
if self.__offline_flags > OFFLINE_MAX:
try:
ZeroconfServer.client_cache.pop(self.hostname)
except KeyError:
pass
return None return None
def request(self, payload, timeout=5): def request(self, payload, timeout=5):

View File

@@ -24,6 +24,7 @@ def run() -> int:
int: The exit status code. int: The exit status code.
""" """
try:
# Load Config YAML # Load Config YAML
Config.setup_config_dir() Config.setup_config_dir()
Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml'))) Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml')))
@@ -48,6 +49,11 @@ def run() -> int:
window.show() window.show()
return_code = app.exec() return_code = app.exec()
except Exception as e:
logging.error(f"Unhandled exception: {e}")
return_code = 1
finally:
RenderQueue.prepare_for_shutdown() RenderQueue.prepare_for_shutdown()
return sys.exit(return_code) return sys.exit(return_code)

View File

@@ -56,7 +56,7 @@ class ZeroconfServer:
cls.service_info = info cls.service_info = info
cls.zeroconf.register_service(info) cls.zeroconf.register_service(info)
logger.info(f"Registered zeroconf service: {cls.service_info.name}") logger.info(f"Registered zeroconf service: {cls.service_info.name}")
except NonUniqueNameException as e: except (NonUniqueNameException, socket.gaierror) as e:
logger.error(f"Error establishing zeroconf: {e}") logger.error(f"Error establishing zeroconf: {e}")
@classmethod @classmethod
@@ -74,17 +74,17 @@ class ZeroconfServer:
@classmethod @classmethod
def _on_service_discovered(cls, zeroconf, service_type, name, state_change): def _on_service_discovered(cls, zeroconf, service_type, name, state_change):
info = zeroconf.get_service_info(service_type, name) info = zeroconf.get_service_info(service_type, name)
logger.debug(f"Zeroconf: {name} {state_change}") hostname = name.split(f'.{cls.service_type}')[0]
logger.debug(f"Zeroconf: {hostname} {state_change}")
if service_type == cls.service_type: if service_type == cls.service_type:
if state_change == ServiceStateChange.Added or state_change == ServiceStateChange.Updated: if state_change == ServiceStateChange.Added or state_change == ServiceStateChange.Updated:
cls.client_cache[name] = info cls.client_cache[hostname] = info
else: else:
cls.client_cache.pop(name) cls.client_cache.pop(hostname)
pub.sendMessage('zeroconf_state_change', hostname=name, state_change=state_change, info=info) pub.sendMessage('zeroconf_state_change', hostname=hostname, state_change=state_change)
@classmethod @classmethod
def found_hostnames(cls): def found_hostnames(cls):
fetched_hostnames = [x.split(f'.{cls.service_type}')[0] for x in cls.client_cache.keys()]
local_hostname = socket.gethostname() local_hostname = socket.gethostname()
def sort_key(hostname): def sort_key(hostname):
@@ -92,13 +92,12 @@ class ZeroconfServer:
return False if hostname == local_hostname else True return False if hostname == local_hostname else True
# Sort the list with the local hostname first # Sort the list with the local hostname first
sorted_hostnames = sorted(fetched_hostnames, key=sort_key) sorted_hostnames = sorted(cls.client_cache.keys(), key=sort_key)
return sorted_hostnames return sorted_hostnames
@classmethod @classmethod
def get_hostname_properties(cls, hostname): def get_hostname_properties(cls, hostname):
new_key = hostname + '.' + cls.service_type server_info = cls.client_cache.get(hostname).properties
server_info = cls.client_cache.get(new_key).properties
decoded_server_info = {key.decode('utf-8'): value.decode('utf-8') for key, value in server_info.items()} decoded_server_info = {key.decode('utf-8'): value.decode('utf-8') for key, value in server_info.items()}
return decoded_server_info return decoded_server_info