Zeroconf offline-handling improvements (#77)

* Add benchmark.py

* Add cpu / disk benchmark APIs

* Add cpu_benchmark method to distributed_job_manager.py

* Do a better job of storing hostnames =

* Remove hostname from Zeroconf cache if server goes offline

* Add cpu / disk benchmark APIs

* Add cpu_benchmark method to distributed_job_manager.py

* Do a better job of storing hostnames =

* Remove hostname from Zeroconf cache if server goes offline

* Wrap main code in try finally block to always stop zeroconf

* Add missing import
This commit is contained in:
2024-02-12 14:57:00 -06:00
committed by GitHub
parent a31fe98964
commit ecf836c235
4 changed files with 50 additions and 36 deletions

View File

@@ -10,6 +10,7 @@ from requests_toolbelt.multipart import MultipartEncoder, MultipartEncoderMonito
from src.utilities.misc_helper import is_localhost
from src.utilities.status_utils import RenderStatus
from src.utilities.zeroconf_server import ZeroconfServer
status_colors = {RenderStatus.ERROR: "red", RenderStatus.CANCELLED: 'orange1', RenderStatus.COMPLETED: 'green',
RenderStatus.NOT_STARTED: "yellow", RenderStatus.SCHEDULED: 'purple',
@@ -19,7 +20,7 @@ categories = [RenderStatus.RUNNING, RenderStatus.WAITING_FOR_SUBJOBS, RenderStat
RenderStatus.SCHEDULED, RenderStatus.COMPLETED, RenderStatus.CANCELLED, RenderStatus.UNDEFINED]
logger = logging.getLogger()
OFFLINE_MAX = 2
OFFLINE_MAX = 4
LOOPBACK = '127.0.0.1'
@@ -87,6 +88,13 @@ class RenderServerProxy:
self.__offline_flags = self.__offline_flags + 1
except Exception as e:
logger.exception(f"Uncaught exception: {e}")
# If server unexpectedly drops off the network, remove from Zeroconf list
if self.__offline_flags > OFFLINE_MAX:
try:
ZeroconfServer.client_cache.pop(self.hostname)
except KeyError:
pass
return None
def request(self, payload, timeout=5):

View File

@@ -24,32 +24,38 @@ def run() -> int:
int: The exit status code.
"""
# Load Config YAML
Config.setup_config_dir()
Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml')))
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S',
level=Config.server_log_level.upper())
try:
# Load Config YAML
Config.setup_config_dir()
Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml')))
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S',
level=Config.server_log_level.upper())
app: QApplication = QApplication(sys.argv)
app: QApplication = QApplication(sys.argv)
# Start server in background
background_server = threading.Thread(target=start_server)
background_server.daemon = True
background_server.start()
# Start server in background
background_server = threading.Thread(target=start_server)
background_server.daemon = True
background_server.start()
# Setup logging for console ui
buffer_handler = BufferingHandler()
buffer_handler.setFormatter(logging.getLogger().handlers[0].formatter)
logger = logging.getLogger()
logger.addHandler(buffer_handler)
# Setup logging for console ui
buffer_handler = BufferingHandler()
buffer_handler.setFormatter(logging.getLogger().handlers[0].formatter)
logger = logging.getLogger()
logger.addHandler(buffer_handler)
window: MainWindow = MainWindow()
window.buffer_handler = buffer_handler
window.show()
window: MainWindow = MainWindow()
window.buffer_handler = buffer_handler
window.show()
return_code = app.exec()
RenderQueue.prepare_for_shutdown()
return sys.exit(return_code)
return_code = app.exec()
except Exception as e:
logging.error(f"Unhandled exception: {e}")
return_code = 1
finally:
RenderQueue.prepare_for_shutdown()
return sys.exit(return_code)
class BufferingHandler(logging.Handler, QObject):

View File

@@ -56,7 +56,7 @@ class ZeroconfServer:
cls.service_info = info
cls.zeroconf.register_service(info)
logger.info(f"Registered zeroconf service: {cls.service_info.name}")
except NonUniqueNameException as e:
except (NonUniqueNameException, socket.gaierror) as e:
logger.error(f"Error establishing zeroconf: {e}")
@classmethod
@@ -74,17 +74,17 @@ class ZeroconfServer:
@classmethod
def _on_service_discovered(cls, zeroconf, service_type, name, state_change):
info = zeroconf.get_service_info(service_type, name)
logger.debug(f"Zeroconf: {name} {state_change}")
hostname = name.split(f'.{cls.service_type}')[0]
logger.debug(f"Zeroconf: {hostname} {state_change}")
if service_type == cls.service_type:
if state_change == ServiceStateChange.Added or state_change == ServiceStateChange.Updated:
cls.client_cache[name] = info
cls.client_cache[hostname] = info
else:
cls.client_cache.pop(name)
pub.sendMessage('zeroconf_state_change', hostname=name, state_change=state_change, info=info)
cls.client_cache.pop(hostname)
pub.sendMessage('zeroconf_state_change', hostname=hostname, state_change=state_change)
@classmethod
def found_hostnames(cls):
fetched_hostnames = [x.split(f'.{cls.service_type}')[0] for x in cls.client_cache.keys()]
local_hostname = socket.gethostname()
def sort_key(hostname):
@@ -92,13 +92,12 @@ class ZeroconfServer:
return False if hostname == local_hostname else True
# Sort the list with the local hostname first
sorted_hostnames = sorted(fetched_hostnames, key=sort_key)
sorted_hostnames = sorted(cls.client_cache.keys(), key=sort_key)
return sorted_hostnames
@classmethod
def get_hostname_properties(cls, hostname):
new_key = hostname + '.' + cls.service_type
server_info = cls.client_cache.get(new_key).properties
server_info = cls.client_cache.get(hostname).properties
decoded_server_info = {key.decode('utf-8'): value.decode('utf-8') for key, value in server_info.items()}
return decoded_server_info