Create subjobs after submission - #54 (#79)

* Force start in render queue only starts NOT_STARTED and SCHEDULED jobs

* Refactor adding jobs / subjobs

* Remove dead code

* Fixed issue with bulk job submission

* Cancel job now cancels all subjobs

* Misc fixes

* JSON now returns job hostname

* Add hostname as optional column in DB

* Misc fixes

* Error handling for removing zip file after download

* Clean up imports

* Fixed issue where worker child information would not be saved
This commit is contained in:
2024-07-30 19:22:38 -05:00
committed by GitHub
parent 6d33f262b3
commit 8a3e74660c
8 changed files with 138 additions and 142 deletions

View File

@@ -10,10 +10,6 @@ import requests
from tqdm import tqdm
from werkzeug.utils import secure_filename
from src.distributed_job_manager import DistributedJobManager
from src.engines.engine_manager import EngineManager
from src.render_queue import RenderQueue
logger = logging.getLogger()
@@ -153,70 +149,3 @@ def process_zipped_project(zip_path):
logger.error(f"Error processing zip file: {e}")
raise ValueError(f"Error processing zip file: {e}")
return extracted_project_path
def create_render_jobs(jobs_list, loaded_project_local_path):
"""
Creates render jobs.
This method takes a list of job data, a local path to a loaded project, and a job directory. It creates a render
job for each job data in the list and appends the result to a list. The list of results is then returned.
Args:
jobs_list (list): A list of job data.
loaded_project_local_path (str): The local path to the loaded project.
Returns:
list: A list of results from creating the render jobs.
"""
results = []
for job_data in jobs_list:
try:
# get new output path in output_dir
output_path = job_data.get('output_path')
if not output_path:
loaded_project_filename = os.path.basename(loaded_project_local_path)
output_filename = os.path.splitext(loaded_project_filename)[0]
else:
output_filename = os.path.basename(output_path)
# Prepare output path
output_dir = os.path.join(os.path.dirname(os.path.dirname(loaded_project_local_path)), 'output')
output_path = os.path.join(output_dir, output_filename)
os.makedirs(output_dir, exist_ok=True)
logger.debug(f"New job output path: {output_path}")
# create & configure jobs
worker = EngineManager.create_worker(renderer=job_data['renderer'],
input_path=loaded_project_local_path,
output_path=output_path,
engine_version=job_data.get('engine_version'),
args=job_data.get('args', {}))
worker.status = job_data.get("initial_status", worker.status)
worker.parent = job_data.get("parent", worker.parent)
worker.name = job_data.get("name", worker.name)
worker.priority = int(job_data.get('priority', worker.priority))
worker.start_frame = int(job_data.get("start_frame", worker.start_frame))
worker.end_frame = int(job_data.get("end_frame", worker.end_frame))
# determine if we can / should split the job
if job_data.get("enable_split_jobs", False) and (worker.total_frames > 1) and not worker.parent:
DistributedJobManager.split_into_subjobs(worker, job_data, loaded_project_local_path)
else:
logger.debug("Not splitting into subjobs")
RenderQueue.add_to_render_queue(worker, force_start=job_data.get('force_start', False))
if not worker.parent:
from src.api.api_server import make_job_ready
make_job_ready(worker.id)
results.append(worker.json())
except FileNotFoundError as e:
err_msg = f"Cannot create job: {e}"
logger.error(err_msg)
results.append({'error': err_msg})
except Exception as e:
err_msg = f"Exception creating render job: {e}"
logger.exception(err_msg)
results.append({'error': err_msg})
return results

View File

@@ -17,7 +17,7 @@ import psutil
import yaml
from flask import Flask, request, send_file, after_this_request, Response, redirect, url_for, abort
from src.api.add_job_helpers import handle_uploaded_project_files, process_zipped_project, create_render_jobs
from src.api.add_job_helpers import handle_uploaded_project_files, process_zipped_project
from src.api.serverproxy_manager import ServerProxyManager
from src.distributed_job_manager import DistributedJobManager
from src.engines.core.base_worker import string_to_status, RenderStatus
@@ -182,24 +182,6 @@ def get_file_list(job_id):
return RenderQueue.job_with_id(job_id).file_list()
@server.get('/api/job/<job_id>/make_ready')
def make_job_ready(job_id):
try:
found_job = RenderQueue.job_with_id(job_id)
if found_job.status in [RenderStatus.CONFIGURING, RenderStatus.NOT_STARTED]:
if found_job.children:
for child_key in found_job.children.keys():
child_id = child_key.split('@')[0]
hostname = child_key.split('@')[-1]
ServerProxyManager.get_proxy_for_hostname(hostname).request_data(f'job/{child_id}/make_ready')
found_job.status = RenderStatus.NOT_STARTED
RenderQueue.save_state()
return found_job.json(), 200
except Exception as e:
return f"Error making job ready: {e}", 500
return "Not valid command", 405
@server.route('/api/job/<job_id>/download_all')
def download_all(job_id):
zip_filename = None
@@ -207,7 +189,10 @@ def download_all(job_id):
@after_this_request
def clear_zip(response):
if zip_filename and os.path.exists(zip_filename):
os.remove(zip_filename)
try:
os.remove(zip_filename)
except Exception as e:
logger.warning(f"Error removing zip file '{zip_filename}': {e}")
return response
found_job = RenderQueue.job_with_id(job_id)
@@ -283,13 +268,13 @@ def add_job_handler():
if loaded_project_local_path.lower().endswith('.zip'):
loaded_project_local_path = process_zipped_project(loaded_project_local_path)
results = create_render_jobs(jobs_list, loaded_project_local_path)
for response in results:
if response.get('error', None):
return results, 400
results = []
for new_job_data in jobs_list:
new_job = DistributedJobManager.create_render_job(new_job_data, loaded_project_local_path)
results.append(new_job.json())
return results, 200
except Exception as e:
logger.exception(f"Unknown error adding job: {e}")
logger.exception(f"Error adding job: {e}")
return 'unknown error', 500

View File

@@ -17,7 +17,8 @@ status_colors = {RenderStatus.ERROR: "red", RenderStatus.CANCELLED: 'orange1', R
RenderStatus.RUNNING: 'cyan', RenderStatus.WAITING_FOR_SUBJOBS: 'blue'}
categories = [RenderStatus.RUNNING, RenderStatus.WAITING_FOR_SUBJOBS, RenderStatus.ERROR, RenderStatus.NOT_STARTED,
RenderStatus.SCHEDULED, RenderStatus.COMPLETED, RenderStatus.CANCELLED, RenderStatus.UNDEFINED]
RenderStatus.SCHEDULED, RenderStatus.COMPLETED, RenderStatus.CANCELLED, RenderStatus.UNDEFINED,
RenderStatus.CONFIGURING]
logger = logging.getLogger()
OFFLINE_MAX = 4