mirror of
https://github.com/blw1138/Zordon.git
synced 2025-12-17 16:58:12 +00:00
Multi client jobs (#15)
* Add API to expose if RenderQueue is available to take new jobs for a given renderer and priority * Fix issue with calculating Blender percent complete when not starting at 1 * Rename owner / client properties to parent / children * Add make_ready method to API * Create and submit subjobs to other servers * Update make_ready to update children jobs and some misc fixes * Misc GUI cleanup
This commit is contained in:
@@ -20,10 +20,11 @@ from flask import Flask, request, render_template, send_file, after_this_request
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from lib.render_queue import RenderQueue, JobNotFoundError
|
||||
from lib.workers.base_worker import string_to_status, RenderStatus
|
||||
from lib.workers.worker_factory import RenderWorkerFactory
|
||||
from lib.server.server_proxy import RenderServerProxy
|
||||
from lib.server.zeroconf_server import ZeroconfServer
|
||||
from lib.utilities.server_helper import generate_thumbnail_for_job
|
||||
from lib.workers.base_worker import string_to_status, RenderStatus
|
||||
from lib.workers.worker_factory import RenderWorkerFactory
|
||||
|
||||
logger = logging.getLogger()
|
||||
server = Flask(__name__, template_folder='templates', static_folder='static')
|
||||
@@ -186,6 +187,23 @@ def get_file_list(job_id):
|
||||
return RenderQueue.job_with_id(job_id).file_list()
|
||||
|
||||
|
||||
@server.get('/api/job/<job_id>/make_ready')
|
||||
def make_job_ready(job_id):
|
||||
try:
|
||||
found_job = RenderQueue.job_with_id(job_id)
|
||||
if found_job.status in [RenderStatus.NOT_READY, RenderStatus.NOT_STARTED]:
|
||||
if found_job.children:
|
||||
for child_name in found_job.children.split(','):
|
||||
child_id, hostname = child_name.split('@')
|
||||
RenderServerProxy(hostname).request_data(f'/api/job/<child_id>/make_ready')
|
||||
found_job.status = RenderStatus.NOT_STARTED
|
||||
RenderQueue.save_state()
|
||||
return found_job.json(), 200
|
||||
except Exception as e:
|
||||
return "Error making job ready: {e}", 500
|
||||
return "Not valid command", 405
|
||||
|
||||
|
||||
@server.route('/api/job/<job_id>/download_all')
|
||||
def download_all(job_id):
|
||||
zip_filename = None
|
||||
@@ -239,9 +257,31 @@ def snapshot():
|
||||
return server_data
|
||||
|
||||
|
||||
@server.get('/api/_detected_clients')
|
||||
def detected_clients():
|
||||
# todo: dev/debug only. Should not ship this - probably.
|
||||
return server.config['ZEROCONF_SERVER'].found_clients()
|
||||
|
||||
|
||||
@server.route('/api/is_available_for_job', methods=['POST', 'GET'])
|
||||
def available_for_job():
|
||||
"""
|
||||
Check queue to see if it can take a job with a given renderer and priority
|
||||
"""
|
||||
renderer = request.args.get('renderer')
|
||||
priority = request.args.get('priority')
|
||||
|
||||
if not renderer or not priority:
|
||||
return {"error": "Both 'renderer' and 'priority' parameters are required"}, 400
|
||||
elif renderer not in RenderWorkerFactory.supported_renderers():
|
||||
return {"error": f"Unsupported renderer: {renderer}"}, 400
|
||||
else:
|
||||
return {"is_available": RenderQueue.is_available_for_job(renderer, priority),
|
||||
'renderer': renderer, 'priority': priority}, 200
|
||||
|
||||
|
||||
@server.post('/api/add_job')
|
||||
def add_job_handler():
|
||||
|
||||
# initial handling of raw data
|
||||
try:
|
||||
if request.is_json:
|
||||
@@ -253,11 +293,11 @@ def add_job_handler():
|
||||
form_dict = {k: v for k, v in dict(request.form).items() if v}
|
||||
args = {}
|
||||
arg_keys = [k for k in form_dict.keys() if '-arg_' in k]
|
||||
for key in arg_keys:
|
||||
if form_dict['renderer'] in key or 'AnyRenderer' in key:
|
||||
cleaned_key = key.split('-arg_')[-1]
|
||||
args[cleaned_key] = form_dict[key]
|
||||
form_dict.pop(key)
|
||||
for server_hostname in arg_keys:
|
||||
if form_dict['renderer'] in server_hostname or 'AnyRenderer' in server_hostname:
|
||||
cleaned_key = server_hostname.split('-arg_')[-1]
|
||||
args[cleaned_key] = form_dict[server_hostname]
|
||||
form_dict.pop(server_hostname)
|
||||
args['raw'] = form_dict.get('raw_args', None)
|
||||
form_dict['args'] = args
|
||||
jobs_list = [form_dict]
|
||||
@@ -269,6 +309,7 @@ def add_job_handler():
|
||||
# start handling project files
|
||||
try:
|
||||
# handle uploaded files
|
||||
logger.debug(f"Incoming new job request: {jobs_list}")
|
||||
uploaded_project = request.files.get('file', None)
|
||||
project_url = jobs_list[0].get('url', None)
|
||||
input_path = jobs_list[0].get('input_path', None)
|
||||
@@ -342,31 +383,36 @@ def add_job_handler():
|
||||
|
||||
# create and add jobs to render queue
|
||||
results = []
|
||||
for job in jobs_list:
|
||||
for job_data in jobs_list:
|
||||
try:
|
||||
# prepare output paths
|
||||
output_dir = os.path.join(job_dir, job.get('name', None) or 'output')
|
||||
output_dir = os.path.join(job_dir, job_data.get('name', None) or 'output')
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# get new output path in output_dir
|
||||
job['output_path'] = os.path.join(output_dir, os.path.basename(
|
||||
job.get('name', None) or job.get('output_path', None) or loaded_project_local_path
|
||||
job_data['output_path'] = os.path.join(output_dir, os.path.basename(
|
||||
job_data.get('name', None) or job_data.get('output_path', None) or loaded_project_local_path
|
||||
))
|
||||
|
||||
# create & configure jobs
|
||||
render_job = RenderWorkerFactory.create_worker(renderer=job['renderer'],
|
||||
worker = RenderWorkerFactory.create_worker(renderer=job_data['renderer'],
|
||||
input_path=loaded_project_local_path,
|
||||
output_path=job["output_path"],
|
||||
args=job.get('args', {}))
|
||||
render_job.client = server.config['HOSTNAME']
|
||||
render_job.owner = job.get("owner", render_job.owner)
|
||||
render_job.name = job.get("name", render_job.name)
|
||||
render_job.priority = int(job.get('priority', render_job.priority))
|
||||
render_job.start_frame = job.get("start_frame", render_job.start_frame)
|
||||
render_job.end_frame = job.get("end_frame", render_job.end_frame)
|
||||
output_path=job_data["output_path"],
|
||||
args=job_data.get('args', {}))
|
||||
worker.status = job_data.get("initial_status", worker.status)
|
||||
worker.parent = job_data.get("parent", worker.parent)
|
||||
worker.name = job_data.get("name", worker.name)
|
||||
worker.priority = int(job_data.get('priority', worker.priority))
|
||||
worker.start_frame = job_data.get("start_frame", worker.start_frame)
|
||||
worker.end_frame = job_data.get("end_frame", worker.end_frame)
|
||||
|
||||
RenderQueue.add_to_render_queue(render_job, force_start=job.get('force_start', False))
|
||||
results.append(render_job.json())
|
||||
# determine if we can / should split the job
|
||||
if server.config.get('enable_split_jobs', False) and (worker.total_frames > 1) and not worker.parent:
|
||||
create_subjobs(worker, job_data, loaded_project_local_path)
|
||||
|
||||
RenderQueue.add_to_render_queue(worker, force_start=job_data.get('force_start', False))
|
||||
make_job_ready(worker.id)
|
||||
results.append(worker.json())
|
||||
except Exception as e:
|
||||
err_msg = f"Error creating render job: {e}"
|
||||
logger.error(err_msg)
|
||||
@@ -388,6 +434,89 @@ def add_job_handler():
|
||||
return 'unknown error', 500
|
||||
|
||||
|
||||
def create_subjobs(worker, job_data, project_path):
|
||||
|
||||
# Check availablity
|
||||
local_hostname = server.config['HOSTNAME']
|
||||
found_servers = [x for x in server.config['ZEROCONF_SERVER'].found_clients() if local_hostname not in x]
|
||||
available_servers = [local_hostname] + [hostname for hostname in found_servers if
|
||||
RenderServerProxy(hostname).is_available_for_job(renderer=worker.renderer,
|
||||
priority=worker.priority)]
|
||||
|
||||
if len(available_servers) <= 1:
|
||||
logger.debug("No available servers to split job with. Skipping subjob creation.")
|
||||
return
|
||||
|
||||
logger.info(f"Found {len(available_servers) - 1} additional available servers | "
|
||||
f"Breaking up job into {len(available_servers)} jobs")
|
||||
logger.debug(f"Available servers: {available_servers}")
|
||||
|
||||
def divide_frames(start_frame, end_frame, num_servers):
|
||||
frame_range = end_frame - start_frame + 1
|
||||
frames_per_server = frame_range // num_servers
|
||||
leftover_frames = frame_range % num_servers
|
||||
|
||||
ranges = []
|
||||
current_start = start_frame
|
||||
for i in range(num_servers):
|
||||
current_end = current_start + frames_per_server - 1
|
||||
if leftover_frames > 0:
|
||||
current_end += 1
|
||||
leftover_frames -= 1
|
||||
if current_start <= current_end:
|
||||
ranges.append((current_start, current_end))
|
||||
current_start = current_end + 1
|
||||
|
||||
return ranges
|
||||
|
||||
# Calculate respective frames for each server
|
||||
server_frame_ranges = {}
|
||||
for idx, frame_range in enumerate(divide_frames(worker.start_frame, worker.end_frame, len(available_servers))):
|
||||
server_frame_ranges[available_servers[idx]] = frame_range
|
||||
|
||||
logger.info(f"Job {worker.id} split plan: {server_frame_ranges}")
|
||||
|
||||
# Prep and submit these sub-jobs
|
||||
submission_results = {}
|
||||
try:
|
||||
for server_hostname, frame_range in server_frame_ranges.items():
|
||||
if server_hostname != local_hostname:
|
||||
subjob = job_data.copy()
|
||||
subjob['name'] = f"{worker.name}[{frame_range[0]}-{frame_range[-1]}]"
|
||||
subjob['parent'] = f"{worker.id}@{local_hostname}"
|
||||
subjob['start_frame'] = frame_range[0]
|
||||
subjob['end_frame'] = frame_range[-1]
|
||||
|
||||
logger.debug(f"Posting subjob with frames {subjob['start_frame']}-"
|
||||
f"{subjob['end_frame']} to {server_hostname}")
|
||||
post_results = RenderServerProxy(server_hostname).post_job_to_server(
|
||||
input_path=project_path, job_list=[subjob])
|
||||
if post_results.ok:
|
||||
submission_results[server_hostname] = post_results.json()[0]
|
||||
else:
|
||||
logger.error(f"Failed to create subjob on {server_hostname}")
|
||||
break
|
||||
|
||||
# check that job posts were all successful.
|
||||
if len(submission_results) != (len(server_frame_ranges) - 1):
|
||||
raise ValueError("Failed to create all subjobs") # look into recalculating job numbers and use exising jobs
|
||||
|
||||
# truncate parent render_job
|
||||
worker.end_frame = min(server_frame_ranges[local_hostname][-1], worker.end_frame)
|
||||
logger.info(f"Local job now rendering from {worker.start_frame} to {worker.end_frame}")
|
||||
|
||||
# start subjobs
|
||||
logger.debug(f"Starting {len(server_frame_ranges) - 1} attempted subjobs")
|
||||
worker.children = ",".join([f"{results['id']}@{hostname}" for hostname, results in submission_results.items()])
|
||||
worker.name = f"{worker.name}[{worker.start_frame}-{worker.end_frame}]"
|
||||
|
||||
except Exception as e:
|
||||
# cancel all the subjobs
|
||||
logger.error(f"Failed to split job into subjobs: {e}")
|
||||
logger.debug(f"Cancelling {len(server_frame_ranges) - 1} attempted subjobs")
|
||||
[RenderServerProxy(hostname).cancel_job(results['id'], confirm=True) for hostname, results in submission_results.items()]
|
||||
|
||||
|
||||
@server.get('/api/job/<job_id>/cancel')
|
||||
def cancel_job(job_id):
|
||||
if not request.args.get('confirm', False):
|
||||
@@ -506,6 +635,7 @@ def start_server(background_thread=False):
|
||||
server.config['UPLOAD_FOLDER'] = os.path.expanduser(config['upload_folder'])
|
||||
server.config['THUMBS_FOLDER'] = os.path.join(os.path.expanduser(config['upload_folder']), 'thumbs')
|
||||
server.config['MAX_CONTENT_PATH'] = config['max_content_path']
|
||||
server.config['enable_split_jobs'] = config.get('enable_split_jobs', False)
|
||||
|
||||
# disable most Flask logging
|
||||
flask_log = logging.getLogger('werkzeug')
|
||||
@@ -520,6 +650,7 @@ def start_server(background_thread=False):
|
||||
logging.info(f"Starting Zordon Render Server - Hostname: '{server.config['HOSTNAME']}:'")
|
||||
zeroconf_server = ZeroconfServer("_zordon._tcp.local.", server.config['HOSTNAME'], server.config['PORT'])
|
||||
zeroconf_server.start()
|
||||
server.config['ZEROCONF_SERVER'] = zeroconf_server
|
||||
|
||||
try:
|
||||
if background_thread:
|
||||
@@ -531,4 +662,5 @@ def start_server(background_thread=False):
|
||||
server.run(host='0.0.0.0', port=server.config['PORT'], debug=config.get('flask_debug_enable', False),
|
||||
use_reloader=False, threaded=True)
|
||||
finally:
|
||||
RenderQueue.save_state()
|
||||
zeroconf_server.stop()
|
||||
|
||||
@@ -84,7 +84,7 @@ class RenderServerProxy:
|
||||
self.__update_in_background = False
|
||||
|
||||
def get_jobs(self, timeout=5, ignore_token=False):
|
||||
if not self.__update_in_background:
|
||||
if not self.__update_in_background or ignore_token:
|
||||
self.__update_job_cache(timeout, ignore_token)
|
||||
return self.__jobs_cache.copy() if self.__jobs_cache else None
|
||||
|
||||
@@ -104,6 +104,14 @@ class RenderServerProxy:
|
||||
all_data = self.request_data('full_status', timeout=timeout)
|
||||
return all_data
|
||||
|
||||
def cancel_job(self, job_id, confirm=False):
|
||||
return self.request_data(f'job/{job_id}/cancel?confirm={confirm}')
|
||||
|
||||
def is_available_for_job(self, renderer, priority=2):
|
||||
request_data = self.request_data(f'is_available_for_job?renderer={renderer}&priority={priority}',
|
||||
timeout=1) or {}
|
||||
return request_data.get('is_available', False)
|
||||
|
||||
def post_job_to_server(self, input_path, job_list, callback=None):
|
||||
# Prepare the form data
|
||||
encoder = MultipartEncoder({
|
||||
|
||||
Reference in New Issue
Block a user