mirror of
https://github.com/blw1138/Zordon.git
synced 2025-12-17 16:58:12 +00:00
Zeroconf offline-handling improvements (#77)
* Add benchmark.py * Add cpu / disk benchmark APIs * Add cpu_benchmark method to distributed_job_manager.py * Do a better job of storing hostnames = * Remove hostname from Zeroconf cache if server goes offline * Add cpu / disk benchmark APIs * Add cpu_benchmark method to distributed_job_manager.py * Do a better job of storing hostnames = * Remove hostname from Zeroconf cache if server goes offline * Wrap main code in try finally block to always stop zeroconf * Add missing import
This commit is contained in:
@@ -10,6 +10,7 @@ from requests_toolbelt.multipart import MultipartEncoder, MultipartEncoderMonito
|
||||
|
||||
from src.utilities.misc_helper import is_localhost
|
||||
from src.utilities.status_utils import RenderStatus
|
||||
from src.utilities.zeroconf_server import ZeroconfServer
|
||||
|
||||
status_colors = {RenderStatus.ERROR: "red", RenderStatus.CANCELLED: 'orange1', RenderStatus.COMPLETED: 'green',
|
||||
RenderStatus.NOT_STARTED: "yellow", RenderStatus.SCHEDULED: 'purple',
|
||||
@@ -19,7 +20,7 @@ categories = [RenderStatus.RUNNING, RenderStatus.WAITING_FOR_SUBJOBS, RenderStat
|
||||
RenderStatus.SCHEDULED, RenderStatus.COMPLETED, RenderStatus.CANCELLED, RenderStatus.UNDEFINED]
|
||||
|
||||
logger = logging.getLogger()
|
||||
OFFLINE_MAX = 2
|
||||
OFFLINE_MAX = 4
|
||||
LOOPBACK = '127.0.0.1'
|
||||
|
||||
|
||||
@@ -87,6 +88,13 @@ class RenderServerProxy:
|
||||
self.__offline_flags = self.__offline_flags + 1
|
||||
except Exception as e:
|
||||
logger.exception(f"Uncaught exception: {e}")
|
||||
|
||||
# If server unexpectedly drops off the network, remove from Zeroconf list
|
||||
if self.__offline_flags > OFFLINE_MAX:
|
||||
try:
|
||||
ZeroconfServer.client_cache.pop(self.hostname)
|
||||
except KeyError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def request(self, payload, timeout=5):
|
||||
|
||||
48
src/init.py
48
src/init.py
@@ -24,32 +24,38 @@ def run() -> int:
|
||||
int: The exit status code.
|
||||
"""
|
||||
|
||||
# Load Config YAML
|
||||
Config.setup_config_dir()
|
||||
Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml')))
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S',
|
||||
level=Config.server_log_level.upper())
|
||||
try:
|
||||
# Load Config YAML
|
||||
Config.setup_config_dir()
|
||||
Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml')))
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S',
|
||||
level=Config.server_log_level.upper())
|
||||
|
||||
app: QApplication = QApplication(sys.argv)
|
||||
app: QApplication = QApplication(sys.argv)
|
||||
|
||||
# Start server in background
|
||||
background_server = threading.Thread(target=start_server)
|
||||
background_server.daemon = True
|
||||
background_server.start()
|
||||
# Start server in background
|
||||
background_server = threading.Thread(target=start_server)
|
||||
background_server.daemon = True
|
||||
background_server.start()
|
||||
|
||||
# Setup logging for console ui
|
||||
buffer_handler = BufferingHandler()
|
||||
buffer_handler.setFormatter(logging.getLogger().handlers[0].formatter)
|
||||
logger = logging.getLogger()
|
||||
logger.addHandler(buffer_handler)
|
||||
# Setup logging for console ui
|
||||
buffer_handler = BufferingHandler()
|
||||
buffer_handler.setFormatter(logging.getLogger().handlers[0].formatter)
|
||||
logger = logging.getLogger()
|
||||
logger.addHandler(buffer_handler)
|
||||
|
||||
window: MainWindow = MainWindow()
|
||||
window.buffer_handler = buffer_handler
|
||||
window.show()
|
||||
window: MainWindow = MainWindow()
|
||||
window.buffer_handler = buffer_handler
|
||||
window.show()
|
||||
|
||||
return_code = app.exec()
|
||||
RenderQueue.prepare_for_shutdown()
|
||||
return sys.exit(return_code)
|
||||
return_code = app.exec()
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Unhandled exception: {e}")
|
||||
return_code = 1
|
||||
finally:
|
||||
RenderQueue.prepare_for_shutdown()
|
||||
return sys.exit(return_code)
|
||||
|
||||
|
||||
class BufferingHandler(logging.Handler, QObject):
|
||||
|
||||
@@ -56,7 +56,7 @@ class ZeroconfServer:
|
||||
cls.service_info = info
|
||||
cls.zeroconf.register_service(info)
|
||||
logger.info(f"Registered zeroconf service: {cls.service_info.name}")
|
||||
except NonUniqueNameException as e:
|
||||
except (NonUniqueNameException, socket.gaierror) as e:
|
||||
logger.error(f"Error establishing zeroconf: {e}")
|
||||
|
||||
@classmethod
|
||||
@@ -74,17 +74,17 @@ class ZeroconfServer:
|
||||
@classmethod
|
||||
def _on_service_discovered(cls, zeroconf, service_type, name, state_change):
|
||||
info = zeroconf.get_service_info(service_type, name)
|
||||
logger.debug(f"Zeroconf: {name} {state_change}")
|
||||
hostname = name.split(f'.{cls.service_type}')[0]
|
||||
logger.debug(f"Zeroconf: {hostname} {state_change}")
|
||||
if service_type == cls.service_type:
|
||||
if state_change == ServiceStateChange.Added or state_change == ServiceStateChange.Updated:
|
||||
cls.client_cache[name] = info
|
||||
cls.client_cache[hostname] = info
|
||||
else:
|
||||
cls.client_cache.pop(name)
|
||||
pub.sendMessage('zeroconf_state_change', hostname=name, state_change=state_change, info=info)
|
||||
cls.client_cache.pop(hostname)
|
||||
pub.sendMessage('zeroconf_state_change', hostname=hostname, state_change=state_change)
|
||||
|
||||
@classmethod
|
||||
def found_hostnames(cls):
|
||||
fetched_hostnames = [x.split(f'.{cls.service_type}')[0] for x in cls.client_cache.keys()]
|
||||
local_hostname = socket.gethostname()
|
||||
|
||||
def sort_key(hostname):
|
||||
@@ -92,13 +92,12 @@ class ZeroconfServer:
|
||||
return False if hostname == local_hostname else True
|
||||
|
||||
# Sort the list with the local hostname first
|
||||
sorted_hostnames = sorted(fetched_hostnames, key=sort_key)
|
||||
sorted_hostnames = sorted(cls.client_cache.keys(), key=sort_key)
|
||||
return sorted_hostnames
|
||||
|
||||
@classmethod
|
||||
def get_hostname_properties(cls, hostname):
|
||||
new_key = hostname + '.' + cls.service_type
|
||||
server_info = cls.client_cache.get(new_key).properties
|
||||
server_info = cls.client_cache.get(hostname).properties
|
||||
decoded_server_info = {key.decode('utf-8'): value.decode('utf-8') for key, value in server_info.items()}
|
||||
return decoded_server_info
|
||||
|
||||
|
||||
Reference in New Issue
Block a user