Refactor: Move all initialization logic out of api_server and into init (#91)

* Zeroconf logging improvements

* Ignore RuntimeErrors in background threads - Prevents issues during shutdown

* Migrate start up code from api_server.py to init.py

* Add error handlers to the API server to handle detached instances

* Integrate RenderQueue eval loop into RenderQueue object

* Silently catch RuntimeErrors on evaluate_queue

* Stop background queue updates in prepare_for_shutdown
This commit is contained in:
2024-08-08 04:47:22 -05:00
committed by GitHub
parent 6afb6e65a6
commit 3600eeb21b
8 changed files with 290 additions and 177 deletions

View File

@@ -2,14 +2,12 @@
import concurrent.futures
import json
import logging
import multiprocessing
import os
import pathlib
import shutil
import socket
import ssl
import tempfile
import threading
import time
from datetime import datetime
from zipfile import ZipFile
@@ -17,10 +15,10 @@ from zipfile import ZipFile
import psutil
import yaml
from flask import Flask, request, send_file, after_this_request, Response, redirect, url_for, abort
from sqlalchemy.orm.exc import DetachedInstanceError
from src.api.add_job_helpers import handle_uploaded_project_files, process_zipped_project
from src.api.preview_manager import PreviewManager
from src.api.serverproxy_manager import ServerProxyManager
from src.distributed_job_manager import DistributedJobManager
from src.engines.core.base_worker import string_to_status, RenderStatus
from src.engines.engine_manager import EngineManager
@@ -39,6 +37,29 @@ categories = [RenderStatus.RUNNING, RenderStatus.ERROR, RenderStatus.NOT_STARTED
RenderStatus.COMPLETED, RenderStatus.CANCELLED]
# -- Error Handlers --
@server.errorhandler(JobNotFoundError)
def handle_job_not_found(job_error):
return str(job_error), 400
@server.errorhandler(DetachedInstanceError)
def handle_detached_instance(error):
# logger.debug(f"detached instance: {error}")
return "Unavailable", 503
@server.errorhandler(Exception)
def handle_general_error(general_error):
err_msg = f"Server error: {general_error}"
logger.error(err_msg)
return err_msg, 500
# -- Jobs --
def sorted_jobs(all_jobs, sort_by_date=True):
if not sort_by_date:
sorted_job_list = []
@@ -60,9 +81,11 @@ def jobs_json():
job_cache_int = int(json.dumps(all_jobs).__hash__())
job_cache_token = num_to_alphanumeric(job_cache_int)
return {'jobs': all_jobs, 'token': job_cache_token}
except DetachedInstanceError as e:
raise e
except Exception as e:
logger.error(f"Error fetching jobs_json: {e}")
return {}, 500
raise e
@server.get('/api/jobs_long_poll')
@@ -78,9 +101,11 @@ def long_polling_jobs():
if time.time() - start_time > 30:
return {}, 204
time.sleep(1)
except DetachedInstanceError as e:
raise e
except Exception as e:
logger.error(f"Error fetching long_polling_jobs: {e}")
return {}, 500
raise e
@server.route('/api/job/<job_id>/thumbnail')
@@ -107,7 +132,7 @@ def job_thumbnail(job_id):
file_mime_type = mime_types.get(preview_to_send['kind'], 'unknown')
return send_file(preview_to_send['filename'], mimetype=file_mime_type)
except Exception as e:
logger.exception(f'Error getting thumbnail: {e}')
logger.error(f'Error getting thumbnail: {e}')
return f'Error getting thumbnail: {e}', 500
return "No thumbnail available", 404
@@ -145,11 +170,6 @@ def subjob_update_notification(job_id):
return "Job not found", 404
@server.errorhandler(JobNotFoundError)
def handle_job_not_found(job_error):
return f'Cannot find job with ID {job_error.job_id}', 400
@server.get('/api/job/<job_id>')
def get_job_status(job_id):
return RenderQueue.job_with_id(job_id).json()
@@ -488,75 +508,24 @@ def get_disk_benchmark():
return {'write_speed': results[0], 'read_speed': results[-1]}
def start_server():
def eval_loop(delay_sec=1):
while True:
try:
RenderQueue.evaluate_queue()
except Exception as e:
logger.error(f"Uncaught error while evaluating queue: {e}")
time.sleep(delay_sec)
def start_server(hostname=None):
try:
Config.setup_config_dir()
Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml')))
# suppress requests logging
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
# get hostname
# get hostname
if not hostname:
local_hostname = socket.gethostname()
local_hostname = local_hostname + (".local" if not local_hostname.endswith(".local") else "")
hostname = local_hostname + (".local" if not local_hostname.endswith(".local") else "")
# load flask settings
server.config['HOSTNAME'] = local_hostname
server.config['PORT'] = int(Config.port_number)
server.config['UPLOAD_FOLDER'] = system_safe_path(os.path.expanduser(Config.upload_folder))
server.config['MAX_CONTENT_PATH'] = Config.max_content_path
server.config['enable_split_jobs'] = Config.enable_split_jobs
# load flask settings
server.config['HOSTNAME'] = hostname
server.config['PORT'] = int(Config.port_number)
server.config['UPLOAD_FOLDER'] = system_safe_path(os.path.expanduser(Config.upload_folder))
server.config['MAX_CONTENT_PATH'] = Config.max_content_path
server.config['enable_split_jobs'] = Config.enable_split_jobs
# Setup storage directories
EngineManager.engines_path = system_safe_path(os.path.join(os.path.join(os.path.expanduser(Config.upload_folder),
'engines')))
os.makedirs(EngineManager.engines_path, exist_ok=True)
PreviewManager.storage_path = system_safe_path(os.path.join(os.path.expanduser(Config.upload_folder), 'previews'))
# disable most Flask logging
flask_log = logging.getLogger('werkzeug')
flask_log.setLevel(Config.flask_log_level.upper())
server.config['THUMBS_FOLDER'] = PreviewManager.storage_path # todo: remove this
# Debug info
logger.debug(f"Upload directory: {server.config['UPLOAD_FOLDER']}")
logger.debug(f"Thumbs directory: {PreviewManager.storage_path}")
logger.debug(f"Engines directory: {EngineManager.engines_path}")
# disable most Flask logging
flask_log = logging.getLogger('werkzeug')
flask_log.setLevel(Config.flask_log_level.upper())
# check for updates for render engines if configured or on first launch
if Config.update_engines_on_launch or not EngineManager.get_engines():
EngineManager.update_all_engines()
# Set up the RenderQueue object
RenderQueue.load_state(database_directory=server.config['UPLOAD_FOLDER'])
ServerProxyManager.subscribe_to_listener()
DistributedJobManager.subscribe_to_listener()
thread = threading.Thread(target=eval_loop, kwargs={'delay_sec': Config.queue_eval_seconds}, daemon=True)
thread.start()
logger.info(f"Starting Zordon Render Server - Hostname: '{server.config['HOSTNAME']}:'")
ZeroconfServer.configure("_zordon._tcp.local.", server.config['HOSTNAME'], server.config['PORT'])
ZeroconfServer.properties = {'system_cpu': current_system_cpu(), 'system_cpu_cores': multiprocessing.cpu_count(),
'system_os': current_system_os(),
'system_os_version': current_system_os_version()}
ZeroconfServer.start()
try:
server.run(host='0.0.0.0', port=server.config['PORT'], debug=Config.flask_debug_enable,
use_reloader=False, threaded=True)
finally:
RenderQueue.save_state()
finally:
ZeroconfServer.stop()
logger.debug('Starting API server')
server.run(host='0.0.0.0', port=server.config['PORT'], debug=Config.flask_debug_enable, use_reloader=False,
threaded=True)

View File

@@ -1,22 +1,27 @@
''' app/init.py '''
import logging
import multiprocessing
import os
import socket
import sys
import threading
import time
from collections import deque
from PyQt6.QtCore import QObject, pyqtSignal
from PyQt6.QtWidgets import QApplication
from src.api.api_server import start_server
from src.api.preview_manager import PreviewManager
from src.api.serverproxy_manager import ServerProxyManager
from src.distributed_job_manager import DistributedJobManager
from src.engines.engine_manager import EngineManager
from src.render_queue import RenderQueue
from src.ui.main_window import MainWindow
from src.utilities.config import Config
from src.utilities.misc_helper import system_safe_path
from src.utilities.misc_helper import system_safe_path, current_system_cpu, current_system_os, current_system_os_version
from src.utilities.zeroconf_server import ZeroconfServer
logger = logging.getLogger()
def run() -> int:
def run(server_only=False) -> int:
"""
Initializes the application and runs it.
@@ -24,55 +29,130 @@ def run() -> int:
int: The exit status code.
"""
# setup logging
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S',
level=Config.server_log_level.upper())
logging.getLogger("requests").setLevel(logging.WARNING) # suppress noisy requests/urllib3 logging
logging.getLogger("urllib3").setLevel(logging.WARNING)
# Setup logging for console ui
buffer_handler = __setup_buffer_handler() if not server_only else None
logger.info(f"Starting Zordon Render Server")
return_code = 0
try:
# Load Config YAML
Config.setup_config_dir()
Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml')))
# configure default paths
EngineManager.engines_path = system_safe_path(
os.path.join(os.path.join(os.path.expanduser(Config.upload_folder),
'engines')))
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S',
level=Config.server_log_level.upper())
os.makedirs(EngineManager.engines_path, exist_ok=True)
PreviewManager.storage_path = system_safe_path(
os.path.join(os.path.expanduser(Config.upload_folder), 'previews'))
app: QApplication = QApplication(sys.argv)
# Debug info
logger.debug(f"Upload directory: {os.path.expanduser(Config.upload_folder)}")
logger.debug(f"Thumbs directory: {PreviewManager.storage_path}")
logger.debug(f"Engines directory: {EngineManager.engines_path}")
# Start server in background
background_server = threading.Thread(target=start_server)
background_server.daemon = True
background_server.start()
# Set up the RenderQueue object
RenderQueue.load_state(database_directory=system_safe_path(os.path.expanduser(Config.upload_folder)))
ServerProxyManager.subscribe_to_listener()
DistributedJobManager.subscribe_to_listener()
# Setup logging for console ui
buffer_handler = BufferingHandler()
buffer_handler.setFormatter(logging.getLogger().handlers[0].formatter)
logger = logging.getLogger()
logger.addHandler(buffer_handler)
# check for updates for render engines if configured or on first launch
if Config.update_engines_on_launch or not EngineManager.get_engines():
EngineManager.update_all_engines()
window: MainWindow = MainWindow()
window.buffer_handler = buffer_handler
window.show()
# get hostname
local_hostname = socket.gethostname()
local_hostname = local_hostname + (".local" if not local_hostname.endswith(".local") else "")
return_code = app.exec()
# configure and start API server
api_server = threading.Thread(target=start_server, args=(local_hostname,))
api_server.daemon = True
api_server.start()
# start zeroconf server
ZeroconfServer.configure("_zordon._tcp.local.", local_hostname, Config.port_number)
ZeroconfServer.properties = {'system_cpu': current_system_cpu(),
'system_cpu_cores': multiprocessing.cpu_count(),
'system_os': current_system_os(),
'system_os_version': current_system_os_version()}
ZeroconfServer.start()
logger.info(f"Zordon Render Server started - Hostname: {local_hostname}")
RenderQueue.evaluation_inverval = Config.queue_eval_seconds
RenderQueue.start()
# start in gui or server only (cli) mode
logger.debug(f"Launching in {'server only' if server_only else 'GUI'} mode")
if server_only: # CLI only
api_server.join()
else: # GUI
return_code = __show_gui(buffer_handler)
except KeyboardInterrupt:
pass
except Exception as e:
logging.error(f"Unhandled exception: {e}")
return_code = 1
finally:
RenderQueue.prepare_for_shutdown()
# shut down gracefully
logger.info(f"Zordon Render Server is preparing to shut down")
try:
RenderQueue.prepare_for_shutdown()
except Exception as e:
logger.exception(f"Exception during prepare for shutdown: {e}")
ZeroconfServer.stop()
logger.info(f"Zordon Render Server has shut down")
return sys.exit(return_code)
class BufferingHandler(logging.Handler, QObject):
new_record = pyqtSignal(str)
def __setup_buffer_handler():
# lazy load GUI frameworks
from PyQt6.QtCore import QObject, pyqtSignal
def __init__(self, capacity=100):
logging.Handler.__init__(self)
QObject.__init__(self)
self.buffer = deque(maxlen=capacity) # Define a buffer with a fixed capacity
class BufferingHandler(logging.Handler, QObject):
new_record = pyqtSignal(str)
def emit(self, record):
msg = self.format(record)
self.buffer.append(msg) # Add message to the buffer
self.new_record.emit(msg) # Emit signal
def __init__(self, capacity=100):
logging.Handler.__init__(self)
QObject.__init__(self)
self.buffer = deque(maxlen=capacity) # Define a buffer with a fixed capacity
def get_buffer(self):
return list(self.buffer) # Return a copy of the buffer
def emit(self, record):
try:
msg = self.format(record)
self.buffer.append(msg) # Add message to the buffer
self.new_record.emit(msg) # Emit signal
except RuntimeError:
pass
def get_buffer(self):
return list(self.buffer) # Return a copy of the buffer
buffer_handler = BufferingHandler()
buffer_handler.setFormatter(logging.getLogger().handlers[0].formatter)
logger = logging.getLogger()
logger.addHandler(buffer_handler)
return buffer_handler
def __show_gui(buffer_handler):
# lazy load GUI frameworks
from PyQt6.QtWidgets import QApplication
# load application
app: QApplication = QApplication(sys.argv)
# configure main window
from src.ui.main_window import MainWindow
window: MainWindow = MainWindow()
window.buffer_handler = buffer_handler
window.show()
return app.exec()

View File

@@ -1,13 +1,15 @@
import logging
import os
import threading
import time
from datetime import datetime
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm.exc import DetachedInstanceError
from src.utilities.status_utils import RenderStatus
from src.engines.engine_manager import EngineManager
from src.engines.core.base_worker import Base
from src.utilities.status_utils import RenderStatus
logger = logging.getLogger()
@@ -17,6 +19,9 @@ class JobNotFoundError(Exception):
super().__init__(args)
self.job_id = job_id
def __str__(self):
return f"Cannot find job with ID: {self.job_id}"
class RenderQueue:
engine = None
@@ -24,9 +29,36 @@ class RenderQueue:
job_queue = []
maximum_renderer_instances = {'blender': 1, 'aerender': 1, 'ffmpeg': 4}
last_saved_counts = {}
is_running = False
__eval_thread = None
evaluation_inverval = 1
def __init__(self):
pass
# --------------------------------------------
# Start / Stop Background Updates
# --------------------------------------------
@classmethod
def start(cls):
cls.is_running = True
cls.__eval_thread = threading.Thread(target=cls.__eval_loop, daemon=True)
cls.__eval_thread.start()
@classmethod
def __eval_loop(cls):
while cls.is_running:
try:
RenderQueue.evaluate_queue()
except Exception as e:
logger.exception(f"Uncaught error while evaluating queue: {e}")
time.sleep(cls.evaluation_inverval)
@classmethod
def stop(cls):
cls.is_running = False
# --------------------------------------------
# Queue Management
# --------------------------------------------
@classmethod
def add_to_render_queue(cls, render_job, force_start=False):
@@ -89,6 +121,7 @@ class RenderQueue:
@classmethod
def prepare_for_shutdown(cls):
logger.debug("Closing session")
cls.stop()
running_jobs = cls.jobs_with_status(RenderStatus.RUNNING) # cancel all running jobs
[cls.cancel_job(job) for job in running_jobs]
cls.save_state()
@@ -105,19 +138,22 @@ class RenderQueue:
@classmethod
def evaluate_queue(cls):
not_started = cls.jobs_with_status(RenderStatus.NOT_STARTED, priority_sorted=True)
for job in not_started:
if cls.is_available_for_job(job.renderer, job.priority):
cls.start_job(job)
try:
not_started = cls.jobs_with_status(RenderStatus.NOT_STARTED, priority_sorted=True)
for job in not_started:
if cls.is_available_for_job(job.renderer, job.priority):
cls.start_job(job)
scheduled = cls.jobs_with_status(RenderStatus.SCHEDULED, priority_sorted=True)
for job in scheduled:
if job.scheduled_start <= datetime.now():
logger.debug(f"Starting scheduled job: {job}")
cls.start_job(job)
scheduled = cls.jobs_with_status(RenderStatus.SCHEDULED, priority_sorted=True)
for job in scheduled:
if job.scheduled_start <= datetime.now():
logger.debug(f"Starting scheduled job: {job}")
cls.start_job(job)
if cls.last_saved_counts != cls.job_counts():
cls.save_state()
if cls.last_saved_counts != cls.job_counts():
cls.save_state()
except DetachedInstanceError:
pass
@classmethod
def start_job(cls, job):

View File

@@ -1,4 +1,3 @@
import sys
import logging
from PyQt6.QtGui import QFont
@@ -16,7 +15,10 @@ class QSignalHandler(logging.Handler, QObject):
def emit(self, record):
msg = self.format(record)
self.new_record.emit(msg) # Emit signal
try:
self.new_record.emit(msg) # Emit signal
except RuntimeError:
pass
class ConsoleWindow(QMainWindow):

View File

@@ -183,8 +183,13 @@ class MainWindow(QMainWindow):
def __background_update(self):
while True:
self.update_servers()
self.fetch_jobs()
try:
self.update_servers()
self.fetch_jobs()
except RuntimeError:
pass
except Exception as e:
logger.error(f"Uncaught exception in background update: {e}")
time.sleep(0.5)
def closeEvent(self, event):
@@ -372,30 +377,36 @@ class MainWindow(QMainWindow):
def load_image_path(self, image_path):
# Load and set the image using QPixmap
pixmap = QPixmap(image_path)
if not pixmap:
logger.error("Error loading image")
return
self.image_label.setPixmap(pixmap)
try:
pixmap = QPixmap(image_path)
if not pixmap:
logger.error("Error loading image")
return
self.image_label.setPixmap(pixmap)
except Exception as e:
logger.error(f"Error loading image path: {e}")
def load_image_data(self, pillow_image):
# Convert the Pillow Image to a QByteArray (byte buffer)
byte_array = QByteArray()
buffer = QBuffer(byte_array)
buffer.open(QIODevice.OpenModeFlag.WriteOnly)
pillow_image.save(buffer, "PNG")
buffer.close()
try:
# Convert the Pillow Image to a QByteArray (byte buffer)
byte_array = QByteArray()
buffer = QBuffer(byte_array)
buffer.open(QIODevice.OpenModeFlag.WriteOnly)
pillow_image.save(buffer, "PNG")
buffer.close()
# Create a QImage from the QByteArray
image = QImage.fromData(byte_array)
# Create a QImage from the QByteArray
image = QImage.fromData(byte_array)
# Create a QPixmap from the QImage
pixmap = QPixmap.fromImage(image)
# Create a QPixmap from the QImage
pixmap = QPixmap.fromImage(image)
if not pixmap:
logger.error("Error loading image")
return
self.image_label.setPixmap(pixmap)
if not pixmap:
logger.error("Error loading image")
return
self.image_label.setPixmap(pixmap)
except Exception as e:
logger.error(f"Error loading image data: {e}")
def update_servers(self):
found_servers = list(set(ZeroconfServer.found_hostnames() + self.added_hostnames))

View File

@@ -32,20 +32,23 @@ class StatusBar(QStatusBar):
# Check for status change every 1s on background thread
while True:
new_status = proxy.status()
new_image_name = image_names.get(new_status, 'Synchronize.png')
image_path = os.path.join(resources_dir(), new_image_name)
self.label.setPixmap((QPixmap(image_path).scaled(16, 16, Qt.AspectRatioMode.KeepAspectRatio)))
try:
# update status label - get download status
new_status = proxy.status()
if EngineManager.download_tasks:
if len(EngineManager.download_tasks) == 1:
task = EngineManager.download_tasks[0]
new_status = f"{new_status} | Downloading {task.engine.capitalize()} {task.version}..."
else:
new_status = f"{new_status} | Downloading {len(EngineManager.download_tasks)} engines"
self.messageLabel.setText(new_status)
# add download status
if EngineManager.download_tasks:
if len(EngineManager.download_tasks) == 1:
task = EngineManager.download_tasks[0]
new_status = f"{new_status} | Downloading {task.engine.capitalize()} {task.version}..."
else:
new_status = f"{new_status} | Downloading {len(EngineManager.download_tasks)} engines"
self.messageLabel.setText(new_status)
# update status image
new_image_name = image_names.get(new_status, 'Synchronize.png')
new_image_path = os.path.join(resources_dir(), new_image_name)
self.label.setPixmap((QPixmap(new_image_path).scaled(16, 16, Qt.AspectRatioMode.KeepAspectRatio)))
except RuntimeError: # ignore runtime errors during shutdown
pass
time.sleep(1)
background_thread = threading.Thread(target=background_update,)

View File

@@ -2,7 +2,8 @@ import logging
import socket
from pubsub import pub
from zeroconf import Zeroconf, ServiceInfo, ServiceBrowser, ServiceStateChange, NonUniqueNameException
from zeroconf import Zeroconf, ServiceInfo, ServiceBrowser, ServiceStateChange, NonUniqueNameException, \
NotRunningException
logger = logging.getLogger()
@@ -31,12 +32,14 @@ class ZeroconfServer:
def start(cls, listen_only=False):
if not cls.service_type:
raise RuntimeError("The 'configure' method must be run before starting the zeroconf server")
logger.debug("Starting zeroconf service")
if not listen_only:
cls._register_service()
cls._browse_services()
@classmethod
def stop(cls):
logger.debug("Stopping zeroconf service")
cls._unregister_service()
cls.zeroconf.close()
@@ -73,15 +76,18 @@ class ZeroconfServer:
@classmethod
def _on_service_discovered(cls, zeroconf, service_type, name, state_change):
info = zeroconf.get_service_info(service_type, name)
hostname = name.split(f'.{cls.service_type}')[0]
logger.debug(f"Zeroconf: {hostname} {state_change}")
if service_type == cls.service_type:
if state_change == ServiceStateChange.Added or state_change == ServiceStateChange.Updated:
cls.client_cache[hostname] = info
else:
cls.client_cache.pop(hostname)
pub.sendMessage('zeroconf_state_change', hostname=hostname, state_change=state_change)
try:
info = zeroconf.get_service_info(service_type, name)
hostname = name.split(f'.{cls.service_type}')[0]
logger.debug(f"Zeroconf: {hostname} {state_change}")
if service_type == cls.service_type:
if state_change == ServiceStateChange.Added or state_change == ServiceStateChange.Updated:
cls.client_cache[hostname] = info
else:
cls.client_cache.pop(hostname)
pub.sendMessage('zeroconf_state_change', hostname=hostname, state_change=state_change)
except NotRunningException:
pass
@classmethod
def found_hostnames(cls):
@@ -104,9 +110,15 @@ class ZeroconfServer:
# Example usage:
if __name__ == "__main__":
import time
logging.basicConfig(level=logging.DEBUG)
ZeroconfServer.configure("_zordon._tcp.local.", "foobar.local", 8080)
try:
ZeroconfServer.start()
input("Server running - Press enter to end")
while True:
time.sleep(0.1)
except KeyboardInterrupt:
pass
finally:
ZeroconfServer.stop()