Refactor: Move all initialization logic out of api_server and into init (#91)

* Zeroconf logging improvements

* Ignore RuntimeErrors in background threads - Prevents issues during shutdown

* Migrate start up code from api_server.py to init.py

* Add error handlers to the API server to handle detached instances

* Integrate RenderQueue eval loop into RenderQueue object

* Silently catch RuntimeErrors on evaluate_queue

* Stop background queue updates in prepare_for_shutdown
This commit is contained in:
2024-08-08 04:47:22 -05:00
committed by GitHub
parent 6afb6e65a6
commit 3600eeb21b
8 changed files with 290 additions and 177 deletions

View File

@@ -2,14 +2,12 @@
import concurrent.futures
import json
import logging
import multiprocessing
import os
import pathlib
import shutil
import socket
import ssl
import tempfile
import threading
import time
from datetime import datetime
from zipfile import ZipFile
@@ -17,10 +15,10 @@ from zipfile import ZipFile
import psutil
import yaml
from flask import Flask, request, send_file, after_this_request, Response, redirect, url_for, abort
from sqlalchemy.orm.exc import DetachedInstanceError
from src.api.add_job_helpers import handle_uploaded_project_files, process_zipped_project
from src.api.preview_manager import PreviewManager
from src.api.serverproxy_manager import ServerProxyManager
from src.distributed_job_manager import DistributedJobManager
from src.engines.core.base_worker import string_to_status, RenderStatus
from src.engines.engine_manager import EngineManager
@@ -39,6 +37,29 @@ categories = [RenderStatus.RUNNING, RenderStatus.ERROR, RenderStatus.NOT_STARTED
RenderStatus.COMPLETED, RenderStatus.CANCELLED]
# -- Error Handlers --
@server.errorhandler(JobNotFoundError)
def handle_job_not_found(job_error):
return str(job_error), 400
@server.errorhandler(DetachedInstanceError)
def handle_detached_instance(error):
# logger.debug(f"detached instance: {error}")
return "Unavailable", 503
@server.errorhandler(Exception)
def handle_general_error(general_error):
err_msg = f"Server error: {general_error}"
logger.error(err_msg)
return err_msg, 500
# -- Jobs --
def sorted_jobs(all_jobs, sort_by_date=True):
if not sort_by_date:
sorted_job_list = []
@@ -60,9 +81,11 @@ def jobs_json():
job_cache_int = int(json.dumps(all_jobs).__hash__())
job_cache_token = num_to_alphanumeric(job_cache_int)
return {'jobs': all_jobs, 'token': job_cache_token}
except DetachedInstanceError as e:
raise e
except Exception as e:
logger.error(f"Error fetching jobs_json: {e}")
return {}, 500
raise e
@server.get('/api/jobs_long_poll')
@@ -78,9 +101,11 @@ def long_polling_jobs():
if time.time() - start_time > 30:
return {}, 204
time.sleep(1)
except DetachedInstanceError as e:
raise e
except Exception as e:
logger.error(f"Error fetching long_polling_jobs: {e}")
return {}, 500
raise e
@server.route('/api/job/<job_id>/thumbnail')
@@ -107,7 +132,7 @@ def job_thumbnail(job_id):
file_mime_type = mime_types.get(preview_to_send['kind'], 'unknown')
return send_file(preview_to_send['filename'], mimetype=file_mime_type)
except Exception as e:
logger.exception(f'Error getting thumbnail: {e}')
logger.error(f'Error getting thumbnail: {e}')
return f'Error getting thumbnail: {e}', 500
return "No thumbnail available", 404
@@ -145,11 +170,6 @@ def subjob_update_notification(job_id):
return "Job not found", 404
@server.errorhandler(JobNotFoundError)
def handle_job_not_found(job_error):
return f'Cannot find job with ID {job_error.job_id}', 400
@server.get('/api/job/<job_id>')
def get_job_status(job_id):
return RenderQueue.job_with_id(job_id).json()
@@ -488,75 +508,24 @@ def get_disk_benchmark():
return {'write_speed': results[0], 'read_speed': results[-1]}
def start_server():
def eval_loop(delay_sec=1):
while True:
try:
RenderQueue.evaluate_queue()
except Exception as e:
logger.error(f"Uncaught error while evaluating queue: {e}")
time.sleep(delay_sec)
def start_server(hostname=None):
try:
Config.setup_config_dir()
Config.load_config(system_safe_path(os.path.join(Config.config_dir(), 'config.yaml')))
# suppress requests logging
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
# get hostname
# get hostname
if not hostname:
local_hostname = socket.gethostname()
local_hostname = local_hostname + (".local" if not local_hostname.endswith(".local") else "")
hostname = local_hostname + (".local" if not local_hostname.endswith(".local") else "")
# load flask settings
server.config['HOSTNAME'] = local_hostname
server.config['PORT'] = int(Config.port_number)
server.config['UPLOAD_FOLDER'] = system_safe_path(os.path.expanduser(Config.upload_folder))
server.config['MAX_CONTENT_PATH'] = Config.max_content_path
server.config['enable_split_jobs'] = Config.enable_split_jobs
# load flask settings
server.config['HOSTNAME'] = hostname
server.config['PORT'] = int(Config.port_number)
server.config['UPLOAD_FOLDER'] = system_safe_path(os.path.expanduser(Config.upload_folder))
server.config['MAX_CONTENT_PATH'] = Config.max_content_path
server.config['enable_split_jobs'] = Config.enable_split_jobs
# Setup storage directories
EngineManager.engines_path = system_safe_path(os.path.join(os.path.join(os.path.expanduser(Config.upload_folder),
'engines')))
os.makedirs(EngineManager.engines_path, exist_ok=True)
PreviewManager.storage_path = system_safe_path(os.path.join(os.path.expanduser(Config.upload_folder), 'previews'))
# disable most Flask logging
flask_log = logging.getLogger('werkzeug')
flask_log.setLevel(Config.flask_log_level.upper())
server.config['THUMBS_FOLDER'] = PreviewManager.storage_path # todo: remove this
# Debug info
logger.debug(f"Upload directory: {server.config['UPLOAD_FOLDER']}")
logger.debug(f"Thumbs directory: {PreviewManager.storage_path}")
logger.debug(f"Engines directory: {EngineManager.engines_path}")
# disable most Flask logging
flask_log = logging.getLogger('werkzeug')
flask_log.setLevel(Config.flask_log_level.upper())
# check for updates for render engines if configured or on first launch
if Config.update_engines_on_launch or not EngineManager.get_engines():
EngineManager.update_all_engines()
# Set up the RenderQueue object
RenderQueue.load_state(database_directory=server.config['UPLOAD_FOLDER'])
ServerProxyManager.subscribe_to_listener()
DistributedJobManager.subscribe_to_listener()
thread = threading.Thread(target=eval_loop, kwargs={'delay_sec': Config.queue_eval_seconds}, daemon=True)
thread.start()
logger.info(f"Starting Zordon Render Server - Hostname: '{server.config['HOSTNAME']}:'")
ZeroconfServer.configure("_zordon._tcp.local.", server.config['HOSTNAME'], server.config['PORT'])
ZeroconfServer.properties = {'system_cpu': current_system_cpu(), 'system_cpu_cores': multiprocessing.cpu_count(),
'system_os': current_system_os(),
'system_os_version': current_system_os_version()}
ZeroconfServer.start()
try:
server.run(host='0.0.0.0', port=server.config['PORT'], debug=Config.flask_debug_enable,
use_reloader=False, threaded=True)
finally:
RenderQueue.save_state()
finally:
ZeroconfServer.stop()
logger.debug('Starting API server')
server.run(host='0.0.0.0', port=server.config['PORT'], debug=Config.flask_debug_enable, use_reloader=False,
threaded=True)