Files
Zordon/lib/server/api_server.py
Brett Williams 34fbdaa4d9 Refactor: DistributedJobManager with pub/sub status change notifications (#25)
* Add pubsub to render_queue and base_worker

* Refactor: Convert ZeroconfServer to Singleton with Class Methods

* New API for subjob servers to notify parent job servers of status changes

* Refactor: Move all subjob related methods to distributed_job_manager.py

* Rewrite for wait_for_subjobs

* Fix: DistributedJobManager.find_available_servers() takes 1 positional argument but 3 were given

* DistributedJobManager should now notify / be notified abotu background job changes

* Fix the make_ready api. Change children keyname to be id@hostname so it can be unique

* Fixes

* Image sequence to movie needs to find the actual start frame

* Fix: subjob_status_change did not return a valid response

* Fix client renderer selection

* Small fix for subjob status checking

* Fix issue with divide_frames_equally

* Fix issue where downloads were not occurring

* Fix issue where old status was being reported

* Add docstrings and code cleanup
2023-06-30 19:49:57 -05:00

595 lines
25 KiB
Python
Executable File

#!/usr/bin/env python3
import json
import logging
import os
import pathlib
import platform
import shutil
import socket
import ssl
import threading
import time
import zipfile
from datetime import datetime
from urllib.request import urlretrieve
from zipfile import ZipFile
import json2html
import psutil
import yaml
from flask import Flask, request, render_template, send_file, after_this_request, Response, redirect, url_for, abort
from werkzeug.utils import secure_filename
from lib.distributed_job_manager import DistributedJobManager
from lib.render_queue import RenderQueue, JobNotFoundError
from lib.server.server_proxy import RenderServerProxy
from lib.server.zeroconf_server import ZeroconfServer
from lib.utilities.server_helper import generate_thumbnail_for_job
from lib.workers.base_worker import string_to_status, RenderStatus
from lib.workers.worker_factory import RenderWorkerFactory
logger = logging.getLogger()
server = Flask(__name__, template_folder='templates', static_folder='static')
ssl._create_default_https_context = ssl._create_unverified_context # disable SSL for downloads
categories = [RenderStatus.RUNNING, RenderStatus.ERROR, RenderStatus.NOT_STARTED, RenderStatus.SCHEDULED,
RenderStatus.COMPLETED, RenderStatus.CANCELLED]
def sorted_jobs(all_jobs, sort_by_date=True):
if not sort_by_date:
sorted_job_list = []
if all_jobs:
for status_category in categories:
found_jobs = [x for x in all_jobs if x.status == status_category.value]
if found_jobs:
sorted_found_jobs = sorted(found_jobs, key=lambda d: d.date_created, reverse=True)
sorted_job_list.extend(sorted_found_jobs)
else:
sorted_job_list = sorted(all_jobs, key=lambda d: d.date_created, reverse=True)
return sorted_job_list
@server.route('/')
@server.route('/index')
def index():
with open('config/presets.yaml') as f:
presets = yaml.load(f, Loader=yaml.FullLoader)
return render_template('index.html', all_jobs=sorted_jobs(RenderQueue.all_jobs()),
hostname=server.config['HOSTNAME'], renderer_info=renderer_info(),
render_clients=[server.config['HOSTNAME']], preset_list=presets)
@server.get('/api/jobs')
def jobs_json():
try:
hash_token = request.args.get('token', None)
all_jobs = [x.json() for x in RenderQueue.all_jobs()]
job_cache_token = str(json.dumps(all_jobs).__hash__())
if hash_token and hash_token == job_cache_token:
return [], 204 # no need to update
else:
return {'jobs': all_jobs, 'token': job_cache_token}
except Exception as e:
logger.exception(f"Exception fetching all_jobs_cached: {e}")
return [], 500
@server.route('/ui/job/<job_id>/full_details')
def job_detail(job_id):
found_job = RenderQueue.job_with_id(job_id)
table_html = json2html.json2html.convert(json=found_job.json(),
table_attributes='class="table is-narrow is-striped is-fullwidth"')
media_url = None
if found_job.file_list() and found_job.status == RenderStatus.COMPLETED:
media_basename = os.path.basename(found_job.file_list()[0])
media_url = f"/api/job/{job_id}/file/{media_basename}"
return render_template('details.html', detail_table=table_html, media_url=media_url,
hostname=server.config['HOSTNAME'], job_status=found_job.status.value.title(),
job=found_job, renderer_info=renderer_info())
@server.route('/api/job/<job_id>/thumbnail')
def job_thumbnail(job_id):
big_thumb = request.args.get('size', False) == "big"
video_ok = request.args.get('video_ok', False)
found_job = RenderQueue.job_with_id(job_id, none_ok=True)
if found_job:
os.makedirs(server.config['THUMBS_FOLDER'], exist_ok=True)
thumb_video_path = os.path.join(server.config['THUMBS_FOLDER'], found_job.id + '.mp4')
thumb_image_path = os.path.join(server.config['THUMBS_FOLDER'], found_job.id + '.jpg')
big_video_path = os.path.join(server.config['THUMBS_FOLDER'], found_job.id + '_big.mp4')
big_image_path = os.path.join(server.config['THUMBS_FOLDER'], found_job.id + '_big.jpg')
# generate regular thumb if it doesn't exist
if not os.path.exists(thumb_video_path) and not os.path.exists(thumb_video_path + '_IN-PROGRESS') and \
found_job.status not in [RenderStatus.CANCELLED, RenderStatus.ERROR]:
generate_thumbnail_for_job(found_job, thumb_video_path, thumb_image_path, max_width=240)
# generate big thumb if it doesn't exist
if not os.path.exists(big_video_path) and not os.path.exists(big_image_path + '_IN-PROGRESS') and \
found_job.status not in [RenderStatus.CANCELLED, RenderStatus.ERROR]:
generate_thumbnail_for_job(found_job, big_video_path, big_image_path, max_width=800)
# generated videos
if video_ok:
if big_thumb and os.path.exists(big_video_path) and not os.path.exists(
big_video_path + '_IN-PROGRESS'):
return send_file(big_video_path, mimetype="video/mp4")
elif os.path.exists(thumb_video_path) and not os.path.exists(thumb_video_path + '_IN-PROGRESS'):
return send_file(thumb_video_path, mimetype="video/mp4")
# Generated thumbs
if big_thumb and os.path.exists(big_image_path):
return send_file(big_image_path, mimetype='image/jpeg')
elif os.path.exists(thumb_image_path):
return send_file(thumb_image_path, mimetype='image/jpeg')
# Misc status icons
if found_job.status == RenderStatus.RUNNING:
return send_file('static/images/gears.png', mimetype="image/png")
elif found_job.status == RenderStatus.CANCELLED:
return send_file('static/images/cancelled.png', mimetype="image/png")
elif found_job.status == RenderStatus.SCHEDULED:
return send_file('static/images/scheduled.png', mimetype="image/png")
elif found_job.status == RenderStatus.NOT_STARTED:
return send_file('static/images/not_started.png', mimetype="image/png")
# errors
return send_file('static/images/error.png', mimetype="image/png")
# Get job file routing
@server.route('/api/job/<job_id>/file/<filename>', methods=['GET'])
def get_job_file(job_id, filename):
found_job = RenderQueue.job_with_id(job_id)
try:
for full_path in found_job.file_list():
if filename in full_path:
return send_file(path_or_file=full_path)
except FileNotFoundError:
abort(404)
@server.get('/api/jobs/<status_val>')
def filtered_jobs_json(status_val):
state = string_to_status(status_val)
jobs = [x.json() for x in RenderQueue.jobs_with_status(state)]
if jobs:
return jobs
else:
return f'Cannot find jobs with status {status_val}', 400
@server.post('/api/job/<job_id>/notify_parent_of_status_change')
def subjob_status_change(job_id):
try:
subjob_details = request.json
logger.info(f"Subjob to job id: {job_id} is now {subjob_details['status']}")
DistributedJobManager.handle_subjob_status_change(RenderQueue.job_with_id(job_id), subjob_data=subjob_details)
return Response(status=200)
except JobNotFoundError:
return "Job not found", 404
@server.errorhandler(JobNotFoundError)
def handle_job_not_found(job_error):
return f'Cannot find job with ID {job_error.job_id}', 400
@server.get('/api/job/<job_id>')
def get_job_status(job_id):
return RenderQueue.job_with_id(job_id).json()
@server.get('/api/job/<job_id>/logs')
def get_job_logs(job_id):
found_job = RenderQueue.job_with_id(job_id)
log_path = found_job.log_path()
log_data = None
if log_path and os.path.exists(log_path):
with open(log_path) as file:
log_data = file.read()
return Response(log_data, mimetype='text/plain')
@server.get('/api/job/<job_id>/file_list')
def get_file_list(job_id):
return RenderQueue.job_with_id(job_id).file_list()
@server.get('/api/job/<job_id>/make_ready')
def make_job_ready(job_id):
try:
found_job = RenderQueue.job_with_id(job_id)
if found_job.status in [RenderStatus.CONFIGURING, RenderStatus.NOT_STARTED]:
if found_job.children:
for child_key in found_job.children.keys():
child_id = child_key.split('@')[0]
hostname = child_key.split('@')[-1]
RenderServerProxy(hostname).request_data(f'job/{child_id}/make_ready')
found_job.status = RenderStatus.NOT_STARTED
RenderQueue.save_state()
return found_job.json(), 200
except Exception as e:
return "Error making job ready: {e}", 500
return "Not valid command", 405
@server.route('/api/job/<job_id>/download_all')
def download_all(job_id):
zip_filename = None
@after_this_request
def clear_zip(response):
if zip_filename and os.path.exists(zip_filename):
os.remove(zip_filename)
return response
found_job = RenderQueue.job_with_id(job_id)
output_dir = os.path.dirname(found_job.output_path)
if os.path.exists(output_dir):
zip_filename = os.path.join('/tmp', pathlib.Path(found_job.input_path).stem + '.zip')
with ZipFile(zip_filename, 'w') as zipObj:
for f in os.listdir(output_dir):
zipObj.write(filename=os.path.join(output_dir, f),
arcname=os.path.basename(f))
return send_file(zip_filename, mimetype="zip", as_attachment=True, )
else:
return f'Cannot find project files for job {job_id}', 500
@server.get('/api/presets')
def presets():
with open('config/presets.yaml') as f:
presets = yaml.load(f, Loader=yaml.FullLoader)
return presets
@server.get('/api/full_status')
def full_status():
full_results = {'timestamp': datetime.now().isoformat(), 'servers': {}}
try:
snapshot_results = snapshot()
server_data = {'status': snapshot_results.get('status', {}), 'jobs': snapshot_results.get('jobs', {}),
'is_online': True}
full_results['servers'][server.config['HOSTNAME']] = server_data
except Exception as e:
logger.error(f"Exception fetching full status: {e}")
return full_results
@server.get('/api/snapshot')
def snapshot():
server_status = status()
server_jobs = [x.json() for x in RenderQueue.all_jobs()]
server_data = {'status': server_status, 'jobs': server_jobs, 'timestamp': datetime.now().isoformat()}
return server_data
@server.get('/api/_detected_clients')
def detected_clients():
# todo: dev/debug only. Should not ship this - probably.
return ZeroconfServer.found_clients()
@server.post('/api/add_job')
def add_job_handler():
# initial handling of raw data
try:
if request.is_json:
jobs_list = [request.json] if not isinstance(request.json, list) else request.json
elif request.form.get('json', None):
jobs_list = json.loads(request.form['json'])
else:
# Cleanup flat form data into nested structure
form_dict = {k: v for k, v in dict(request.form).items() if v}
args = {}
arg_keys = [k for k in form_dict.keys() if '-arg_' in k]
for server_hostname in arg_keys:
if form_dict['renderer'] in server_hostname or 'AnyRenderer' in server_hostname:
cleaned_key = server_hostname.split('-arg_')[-1]
args[cleaned_key] = form_dict[server_hostname]
form_dict.pop(server_hostname)
args['raw'] = form_dict.get('raw_args', None)
form_dict['args'] = args
jobs_list = [form_dict]
except Exception as e:
err_msg = f"Error processing job data: {e}"
logger.error(err_msg)
return err_msg, 500
# start handling project files
try:
# handle uploaded files
logger.debug(f"Incoming new job request: {jobs_list}")
uploaded_project = request.files.get('file', None)
project_url = jobs_list[0].get('url', None)
local_path = jobs_list[0].get('local_path', None)
renderer = jobs_list[0].get('renderer')
downloaded_file_url = None
if uploaded_project and uploaded_project.filename:
referred_name = os.path.basename(uploaded_project.filename)
elif project_url:
# download and save url - have to download first to know filename due to redirects
logger.info(f"Attempting to download URL: {project_url}")
try:
downloaded_file_url, info = urlretrieve(project_url)
referred_name = info.get_filename() or os.path.basename(project_url)
except Exception as e:
err_msg = f"Error downloading file: {e}"
logger.error(err_msg)
return err_msg, 406
elif local_path and os.path.exists(local_path):
referred_name = os.path.basename(local_path)
else:
return "Cannot find any valid project paths", 400
# prep local filepath
cleaned_path_name = os.path.splitext(referred_name)[0].replace(' ', '_')
job_dir = os.path.join(server.config['UPLOAD_FOLDER'], '-'.join(
[datetime.now().strftime("%Y.%m.%d_%H.%M.%S"), renderer, cleaned_path_name]))
os.makedirs(job_dir, exist_ok=True)
upload_dir = os.path.join(job_dir, 'source')
os.makedirs(upload_dir, exist_ok=True)
# move projects to their work directories
loaded_project_local_path = None
if uploaded_project and uploaded_project.filename:
loaded_project_local_path = os.path.join(upload_dir, secure_filename(uploaded_project.filename))
uploaded_project.save(loaded_project_local_path)
logger.info(f"Transfer complete for {loaded_project_local_path.split(server.config['UPLOAD_FOLDER'])[-1]}")
elif project_url:
loaded_project_local_path = os.path.join(upload_dir, referred_name)
shutil.move(downloaded_file_url, loaded_project_local_path)
logger.info(f"Download complete for {loaded_project_local_path.split(server.config['UPLOAD_FOLDER'])[-1]}")
elif local_path:
loaded_project_local_path = os.path.join(upload_dir, referred_name)
shutil.copy(local_path, loaded_project_local_path)
logger.info(f"Import complete for {loaded_project_local_path.split(server.config['UPLOAD_FOLDER'])[-1]}")
# process uploaded zip files
zip_path = loaded_project_local_path if loaded_project_local_path.lower().endswith('.zip') else None
if zip_path:
zip_path = loaded_project_local_path
work_path = os.path.dirname(zip_path)
try:
with zipfile.ZipFile(zip_path, 'r') as myzip:
myzip.extractall(os.path.dirname(zip_path))
project_files = [x for x in os.listdir(work_path) if os.path.isfile(os.path.join(work_path, x))]
project_files = [x for x in project_files if '.zip' not in x]
supported_exts = RenderWorkerFactory.class_for_name(renderer).engine.supported_extensions
if supported_exts:
project_files = [file for file in project_files if
any(file.endswith(ext) for ext in supported_exts)]
if len(project_files) != 1: # we have to narrow down to 1 main project file, otherwise error
return {'error': f'Cannot find valid project file in {os.path.basename(zip_path)}'}, 400
extracted_project_path = os.path.join(work_path, project_files[0])
logger.info(f"Extracted zip file to {extracted_project_path}")
loaded_project_local_path = extracted_project_path
except (zipfile.BadZipFile, zipfile.LargeZipFile) as e:
err_msg = f"Error processing zip file: {e}"
logger.error(err_msg)
return err_msg, 500
# create and add jobs to render queue
results = []
for job_data in jobs_list:
try:
# prepare output paths
output_dir = os.path.join(job_dir, job_data.get('name') if len(jobs_list) > 1 else 'output')
os.makedirs(output_dir, exist_ok=True)
# get new output path in output_dir
job_data['output_path'] = os.path.join(output_dir, os.path.basename(
job_data.get('output_path', None) or loaded_project_local_path
))
# create & configure jobs
worker = RenderWorkerFactory.create_worker(renderer=job_data['renderer'],
input_path=loaded_project_local_path,
output_path=job_data["output_path"],
args=job_data.get('args', {}))
worker.status = job_data.get("initial_status", worker.status)
worker.parent = job_data.get("parent", worker.parent)
worker.name = job_data.get("name", worker.name)
worker.priority = int(job_data.get('priority', worker.priority))
worker.start_frame = int(job_data.get("start_frame", worker.start_frame))
worker.end_frame = int(job_data.get("end_frame", worker.end_frame))
# determine if we can / should split the job
if server.config.get('enable_split_jobs', False) and (worker.total_frames > 1) and not worker.parent:
DistributedJobManager.split_into_subjobs(worker, job_data, zip_path or loaded_project_local_path)
RenderQueue.add_to_render_queue(worker, force_start=job_data.get('force_start', False))
if not worker.parent:
make_job_ready(worker.id)
results.append(worker.json())
except Exception as e:
err_msg = f"Exception creating render job: {e}"
logger.exception(err_msg)
results.append({'error': err_msg})
# return any errors from results list
for response in results:
if response.get('error', None):
return results, 400
# redirect to index if requested
if request.args.get('redirect', False):
return redirect(url_for('index'))
else:
return results, 200
except Exception as e:
logger.exception(f"Unknown error adding job: {e}")
return 'unknown error', 500
@server.get('/api/job/<job_id>/cancel')
def cancel_job(job_id):
if not request.args.get('confirm', False):
return 'Confirmation required to cancel job', 400
if RenderQueue.cancel_job(RenderQueue.job_with_id(job_id)):
if request.args.get('redirect', False):
return redirect(url_for('index'))
else:
return "Job cancelled"
else:
return "Unknown error", 500
@server.route('/api/job/<job_id>/delete', methods=['POST', 'GET'])
def delete_job(job_id):
try:
if not request.args.get('confirm', False):
return 'Confirmation required to delete job', 400
# Check if we can remove the 'output' directory
found_job = RenderQueue.job_with_id(job_id)
output_dir = os.path.dirname(found_job.output_path)
if server.config['UPLOAD_FOLDER'] in output_dir and os.path.exists(output_dir):
shutil.rmtree(output_dir)
# Remove any thumbnails
for filename in os.listdir(server.config['THUMBS_FOLDER']):
if job_id in filename:
os.remove(os.path.join(server.config['THUMBS_FOLDER'], filename))
thumb_path = os.path.join(server.config['THUMBS_FOLDER'], found_job.id + '.mp4')
if os.path.exists(thumb_path):
os.remove(thumb_path)
# See if we own the project_dir (i.e. was it uploaded)
project_dir = os.path.dirname(os.path.dirname(found_job.input_path))
if server.config['UPLOAD_FOLDER'] in project_dir and os.path.exists(project_dir):
# check to see if any other projects are sharing the same project file
project_dir_files = [f for f in os.listdir(project_dir) if not f.startswith('.')]
if len(project_dir_files) == 0 or (len(project_dir_files) == 1 and 'source' in project_dir_files[0]):
logger.info(f"Removing project directory: {project_dir}")
shutil.rmtree(project_dir)
RenderQueue.delete_job(found_job)
if request.args.get('redirect', False):
return redirect(url_for('index'))
else:
return "Job deleted", 200
except Exception as e:
logger.error(f"Error deleting job: {e}")
return f"Error deleting job: {e}", 500
@server.get('/api/clear_history')
def clear_history():
RenderQueue.clear_history()
return 'success'
@server.route('/api/status')
def status():
renderer_data = {}
for render_class in RenderWorkerFactory.supported_classes():
if render_class.engine.renderer_path(): # only return renderers installed on host
renderer_data[render_class.engine.name()] = \
{'version': render_class.engine.version(),
'is_available': RenderQueue.is_available_for_job(render_class.engine.name())
}
return {"timestamp": datetime.now().isoformat(),
"platform": platform.platform(),
"cpu_percent": psutil.cpu_percent(percpu=False),
"cpu_percent_per_cpu": psutil.cpu_percent(percpu=True),
"cpu_count": psutil.cpu_count(logical=False),
"memory_total": psutil.virtual_memory().total,
"memory_available": psutil.virtual_memory().available,
"memory_percent": psutil.virtual_memory().percent,
"job_counts": RenderQueue.job_counts(),
"renderers": renderer_data,
"hostname": server.config['HOSTNAME'],
"port": server.config['PORT']
}
@server.get('/api/renderer_info')
def renderer_info():
renderer_data = {}
for r in RenderWorkerFactory.supported_renderers():
engine = RenderWorkerFactory.class_for_name(r).engine
if engine.renderer_path():
renderer_data[r] = {'is_available': RenderQueue.is_available_for_job(engine.name()),
'version': engine.version(),
'supported_extensions': engine.supported_extensions,
'supported_export_formats': engine.get_output_formats(),
'path': engine.renderer_path()}
return renderer_data
@server.route('/upload')
def upload_file_page():
return render_template('upload.html', supported_renderers=RenderWorkerFactory.supported_renderers())
def start_server(background_thread=False):
def eval_loop(delay_sec=1):
while True:
RenderQueue.evaluate_queue()
time.sleep(delay_sec)
with open('config/config.yaml') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', datefmt='%d-%b-%y %H:%M:%S',
level=config.get('server_log_level', 'INFO').upper())
# get hostname
local_hostname = socket.gethostname()
local_hostname = local_hostname + (".local" if not local_hostname.endswith(".local") else "")
# load flask settings
server.config['HOSTNAME'] = local_hostname
server.config['PORT'] = int(config.get('port_number', 8080))
server.config['UPLOAD_FOLDER'] = os.path.expanduser(config['upload_folder'])
server.config['THUMBS_FOLDER'] = os.path.join(os.path.expanduser(config['upload_folder']), 'thumbs')
server.config['MAX_CONTENT_PATH'] = config['max_content_path']
server.config['enable_split_jobs'] = config.get('enable_split_jobs', False)
# disable most Flask logging
flask_log = logging.getLogger('werkzeug')
flask_log.setLevel(config.get('flask_log_level', 'ERROR').upper())
# Set up the RenderQueue object
RenderQueue.start_queue()
DistributedJobManager.start()
thread = threading.Thread(target=eval_loop, kwargs={'delay_sec': config.get('queue_eval_seconds', 1)}, daemon=True)
thread.start()
logging.info(f"Starting Zordon Render Server - Hostname: '{server.config['HOSTNAME']}:'")
ZeroconfServer.configure("_zordon._tcp.local.", server.config['HOSTNAME'], server.config['PORT'])
ZeroconfServer.start()
try:
if background_thread:
server_thread = threading.Thread(
target=lambda: server.run(host='0.0.0.0', port=server.config['PORT'], debug=False, use_reloader=False))
server_thread.start()
server_thread.join()
else:
server.run(host='0.0.0.0', port=server.config['PORT'], debug=config.get('flask_debug_enable', False),
use_reloader=False, threaded=True)
finally:
RenderQueue.save_state()
ZeroconfServer.stop()