Wait for subjob completion and download render files to host (#17)

* Fix Blender image sequence -> video conversion and change video to use ProRes

* Wait for child jobs to complete

* Download and extract render files from subjobs

* Fix issue where zip was not removed

* Update client to use new method names in server proxy

* Fix minor download issue
This commit is contained in:
2023-06-15 17:44:34 -05:00
committed by GitHub
parent 0a0a228731
commit e6eb344d19
6 changed files with 154 additions and 103 deletions

View File

@@ -10,6 +10,7 @@ from PIL import Image, ImageTk
from lib.client.new_job_window import NewJobWindow from lib.client.new_job_window import NewJobWindow
from lib.server.server_proxy import RenderServerProxy from lib.server.server_proxy import RenderServerProxy
from lib.server.zeroconf_server import ZeroconfServer from lib.server.zeroconf_server import ZeroconfServer
from lib.workers.base_worker import RenderStatus
from lib.utilities.misc_helper import launch_url, file_exists_in_mounts, get_time_elapsed from lib.utilities.misc_helper import launch_url, file_exists_in_mounts, get_time_elapsed
logger = logging.getLogger() logger = logging.getLogger()
@@ -82,7 +83,7 @@ class DashboardWindow:
# Setup the Tree # Setup the Tree
self.job_tree = ttk.Treeview(server_frame, show="headings") self.job_tree = ttk.Treeview(server_frame, show="headings")
self.job_tree.tag_configure('running', background='lawn green', font=('', 0, 'bold')) self.job_tree.tag_configure(RenderStatus.RUNNING.value, background='lawn green', font=('', 0, 'bold'))
self.job_tree.bind("<<TreeviewSelect>>", self.job_picked) self.job_tree.bind("<<TreeviewSelect>>", self.job_picked)
self.job_tree["columns"] = ("id", "Name", "Renderer", "Priority", "Status", "Time Elapsed", "Frames", self.job_tree["columns"] = ("id", "Name", "Renderer", "Priority", "Status", "Time Elapsed", "Frames",
"Date Added", "Parent", "") "Date Added", "Parent", "")
@@ -206,7 +207,7 @@ class DashboardWindow:
def delete_job(self): def delete_job(self):
job_ids = self.selected_job_ids() job_ids = self.selected_job_ids()
if len(job_ids) == 1: if len(job_ids) == 1:
job = next((d for d in self.current_server_proxy.get_jobs() if d.get('id') == job_ids[0]), None) job = next((d for d in self.current_server_proxy.get_all_jobs() if d.get('id') == job_ids[0]), None)
display_name = job['name'] or os.path.basename(job['input_path']) display_name = job['name'] or os.path.basename(job['input_path'])
message = f"Are you sure you want to delete the job:\n{display_name}?" message = f"Are you sure you want to delete the job:\n{display_name}?"
else: else:
@@ -250,9 +251,9 @@ class DashboardWindow:
self.set_image(self.default_image) self.set_image(self.default_image)
# update button status # update button status
current_jobs = self.current_server_proxy.get_jobs() or [] current_jobs = self.current_server_proxy.get_all_jobs() or []
job = next((d for d in current_jobs if d.get('id') == job_id), None) job = next((d for d in current_jobs if d.get('id') == job_id), None)
stop_button_state = 'normal' if job and job['status'] == 'running' else 'disabled' stop_button_state = 'normal' if job and job['status'] == RenderStatus.RUNNING.value else 'disabled'
self.stop_button.config(state=stop_button_state) self.stop_button.config(state=stop_button_state)
generic_button_state = 'normal' if job else 'disabled' generic_button_state = 'normal' if job else 'disabled'
@@ -261,16 +262,16 @@ class DashboardWindow:
self.logs_button.config(state=generic_button_state) self.logs_button.config(state=generic_button_state)
def show_files(self): def show_files(self):
output_path = None if not self.selected_job_ids():
if self.selected_job_ids(): return
job = next((d for d in self.current_server_proxy.get_jobs() if d.get('id') == self.selected_job_ids()[0]), None)
job = next((d for d in self.current_server_proxy.get_all_jobs() if d.get('id') == self.selected_job_ids()[0]), None)
output_path = os.path.dirname(job['output_path']) # check local filesystem output_path = os.path.dirname(job['output_path']) # check local filesystem
if not os.path.exists(output_path): if not os.path.exists(output_path):
output_path = file_exists_in_mounts(output_path) # check any attached network shares output_path = file_exists_in_mounts(output_path) # check any attached network shares
if output_path: if not output_path:
return messagebox.showerror("File Not Found", "The file could not be found. Check your network mounts.")
launch_url(output_path) launch_url(output_path)
else:
messagebox.showerror("File Not Found", "The file could not be found. Check your network mounts.")
def open_logs(self): def open_logs(self):
if self.selected_job_ids(): if self.selected_job_ids():
@@ -338,21 +339,24 @@ class DashboardWindow:
if clear_table: if clear_table:
self.job_tree.delete(*self.job_tree.get_children()) self.job_tree.delete(*self.job_tree.get_children())
job_fetch = self.current_server_proxy.get_jobs(ignore_token=clear_table) job_fetch = self.current_server_proxy.get_all_jobs(ignore_token=clear_table)
if job_fetch: if job_fetch:
for job in job_fetch: for job in job_fetch:
display_status = job['status'] if job['status'] != 'running' else \ display_status = job['status'] if job['status'] != RenderStatus.RUNNING.value else \
('%.0f%%' % (job['percent_complete'] * 100)) # if running, show percent, otherwise just show status ('%.0f%%' % (job['percent_complete'] * 100)) # if running, show percent, otherwise just show status
tags = (job['status'],) tags = (job['status'],)
start_time = datetime.datetime.fromisoformat(job['start_time']) if job['start_time'] else None start_time = datetime.datetime.fromisoformat(job['start_time']) if job['start_time'] else None
end_time = datetime.datetime.fromisoformat(job['end_time']) if job['end_time'] else None end_time = datetime.datetime.fromisoformat(job['end_time']) if job['end_time'] else None
time_elapsed = "" if (job['status'] != RenderStatus.RUNNING.value and not end_time) else \
get_time_elapsed(start_time, end_time)
values = (job['id'], values = (job['id'],
job['name'] or os.path.basename(job['input_path']), job['name'] or os.path.basename(job['input_path']),
job['renderer'] + "-" + job['renderer_version'], job['renderer'] + "-" + job['renderer_version'],
job['priority'], job['priority'],
display_status, display_status,
get_time_elapsed(start_time, end_time), time_elapsed,
job['total_frames'], job['total_frames'],
job['date_created'], job['date_created'],
job['parent']) job['parent'])

View File

@@ -193,8 +193,7 @@ def make_job_ready(job_id):
found_job = RenderQueue.job_with_id(job_id) found_job = RenderQueue.job_with_id(job_id)
if found_job.status in [RenderStatus.NOT_READY, RenderStatus.NOT_STARTED]: if found_job.status in [RenderStatus.NOT_READY, RenderStatus.NOT_STARTED]:
if found_job.children: if found_job.children:
for child_name in found_job.children.split(','): for hostname, child_id in found_job.children.items():
child_id, hostname = child_name.split('@')
RenderServerProxy(hostname).request_data(f'/api/job/<child_id>/make_ready') RenderServerProxy(hostname).request_data(f'/api/job/<child_id>/make_ready')
found_job.status = RenderStatus.NOT_STARTED found_job.status = RenderStatus.NOT_STARTED
RenderQueue.save_state() RenderQueue.save_state()
@@ -323,7 +322,7 @@ def add_job_handler():
logger.info(f"Attempting to download URL: {project_url}") logger.info(f"Attempting to download URL: {project_url}")
try: try:
downloaded_file_url, info = urlretrieve(project_url) downloaded_file_url, info = urlretrieve(project_url)
referred_name = info.get_filename() referred_name = info.get_filename() or os.path.basename(project_url)
except Exception as e: except Exception as e:
err_msg = f"Error downloading file: {e}" err_msg = f"Error downloading file: {e}"
logger.error(err_msg) logger.error(err_msg)
@@ -507,7 +506,8 @@ def create_subjobs(worker, job_data, project_path):
# start subjobs # start subjobs
logger.debug(f"Starting {len(server_frame_ranges) - 1} attempted subjobs") logger.debug(f"Starting {len(server_frame_ranges) - 1} attempted subjobs")
worker.children = ",".join([f"{results['id']}@{hostname}" for hostname, results in submission_results.items()]) for hostname, results in submission_results.items():
worker.children[hostname] = results['id']
worker.name = f"{worker.name}[{worker.start_frame}-{worker.end_frame}]" worker.name = f"{worker.name}[{worker.start_frame}-{worker.end_frame}]"
except Exception as e: except Exception as e:

View File

@@ -83,7 +83,10 @@ class RenderServerProxy:
def stop_background_update(self): def stop_background_update(self):
self.__update_in_background = False self.__update_in_background = False
def get_jobs(self, timeout=5, ignore_token=False): def get_job_info(self, job_id, timeout=5):
return self.request_data(f'job/{job_id}', timeout=timeout)
def get_all_jobs(self, timeout=5, ignore_token=False):
if not self.__update_in_background or ignore_token: if not self.__update_in_background or ignore_token:
self.__update_job_cache(timeout, ignore_token) self.__update_job_cache(timeout, ignore_token)
return self.__jobs_cache.copy() if self.__jobs_cache else None return self.__jobs_cache.copy() if self.__jobs_cache else None
@@ -130,3 +133,17 @@ class RenderServerProxy:
response = requests.post(f'http://{self.hostname}:{self.port}/api/add_job', data=monitor, headers=headers) response = requests.post(f'http://{self.hostname}:{self.port}/api/add_job', data=monitor, headers=headers)
return response return response
def get_job_files(self, job_id, save_path):
url = f"http://{self.hostname}:{self.port}/api/job/{job_id}/download_all"
return self.download_file(url, filename=save_path)
@staticmethod
def download_file(url, filename):
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return filename

View File

@@ -2,10 +2,9 @@ import subprocess
from lib.engines.ffmpeg_engine import FFMPEG from lib.engines.ffmpeg_engine import FFMPEG
def image_sequence_to_video(source_glob_pattern, output_path, framerate=24, encoder="libx264", pix_fmt="yuv420p"): def image_sequence_to_video(source_glob_pattern, output_path, framerate=24, encoder="prores_ks", profile=4):
subprocess.run([FFMPEG.renderer_path(), "-framerate", str(framerate), "-i", f"{source_glob_pattern}", subprocess.run([FFMPEG.renderer_path(), "-framerate", str(framerate), "-i", f"{source_glob_pattern}",
"-c:v", encoder, "-pix_fmt", pix_fmt, output_path], stdout=subprocess.DEVNULL, "-c:v", encoder, "-profile:v", str(profile), '-pix_fmt', 'yuva444p10le', output_path], check=True)
stderr=subprocess.DEVNULL, check=True)
def save_first_frame(source_path, dest_path, max_width=1280): def save_first_frame(source_path, dest_path, max_width=1280):

View File

@@ -1,18 +1,20 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import io import io
import json
import logging import logging
import os import os
import subprocess import subprocess
import threading import threading
import json import time
import zipfile
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.ext.declarative import declarative_base
from lib.utilities.misc_helper import get_time_elapsed
import psutil import psutil
from sqlalchemy import Column, Integer, String, DateTime, JSON
from sqlalchemy.ext.declarative import declarative_base
from lib.utilities.misc_helper import get_time_elapsed
logger = logging.getLogger() logger = logging.getLogger()
Base = declarative_base() Base = declarative_base()
@@ -25,6 +27,7 @@ class RenderStatus(Enum):
CANCELLED = "cancelled" CANCELLED = "cancelled"
ERROR = "error" ERROR = "error"
SCHEDULED = "scheduled" SCHEDULED = "scheduled"
WAITING = "waiting"
NOT_READY = "not_ready" NOT_READY = "not_ready"
UNDEFINED = "undefined" UNDEFINED = "undefined"
@@ -52,7 +55,7 @@ class BaseRenderWorker(Base):
start_frame = Column(Integer) start_frame = Column(Integer)
end_frame = Column(Integer, nullable=True) end_frame = Column(Integer, nullable=True)
parent = Column(String, nullable=True) parent = Column(String, nullable=True)
children = Column(String, nullable=True) children = Column(JSON)
name = Column(String) name = Column(String)
file_hash = Column(String) file_hash = Column(String)
_status = Column(String) _status = Column(String)
@@ -84,7 +87,7 @@ class BaseRenderWorker(Base):
self.renderer_version = self.engine.version() self.renderer_version = self.engine.version()
self.priority = priority self.priority = priority
self.parent = parent self.parent = parent
self.children = None self.children = {}
self.name = name or os.path.basename(input_path) self.name = name or os.path.basename(input_path)
# Frame Ranges # Frame Ranges
@@ -101,14 +104,11 @@ class BaseRenderWorker(Base):
self.status = RenderStatus.NOT_READY self.status = RenderStatus.NOT_READY
self.warnings = [] self.warnings = []
self.errors = [] self.errors = []
self.failed_attempts = 0
self.maximum_attempts = 1
# Threads and processes # Threads and processes
self.__thread = threading.Thread(target=self.run, args=()) self.__thread = threading.Thread(target=self.run, args=())
self.__thread.daemon = True self.__thread.daemon = True
self.__process = None self.__process = None
self.is_finished = False
self.last_output = None self.last_output = None
@property @property
@@ -192,11 +192,6 @@ class BaseRenderWorker(Base):
log_dir = os.path.dirname(self.log_path()) log_dir = os.path.dirname(self.log_path())
os.makedirs(log_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True)
while self.failed_attempts < self.maximum_attempts and self.status is not RenderStatus.COMPLETED:
if self.failed_attempts:
logger.info(f'Attempt #{self.failed_attempts} failed. Starting attempt #{self.failed_attempts + 1}')
# Start process and get updates # Start process and get updates
subprocess_cmds = self.generate_subprocess() subprocess_cmds = self.generate_subprocess()
logger.debug("Renderer commands generated - {}".format(" ".join(subprocess_cmds))) logger.debug("Renderer commands generated - {}".format(" ".join(subprocess_cmds)))
@@ -220,28 +215,65 @@ class BaseRenderWorker(Base):
self.end_time = datetime.now() self.end_time = datetime.now()
# Return early if job was cancelled # Return early if job was cancelled
if self.status in [RenderStatus.CANCELLED, RenderStatus.ERROR]: if self.status in [RenderStatus.CANCELLED, RenderStatus.ERROR]:
self.is_finished = True
return return
if return_code: if return_code:
message = f"{self.engine.name()} render failed with return_code {return_code} after {self.time_elapsed()}" message = f"{self.engine.name()} render failed with return_code {return_code} after {self.time_elapsed()}"
logger.error(message) logger.error(message)
self.failed_attempts = self.failed_attempts + 1
else:
message = f"{self.engine.name()} render completed successfully in {self.time_elapsed()}"
logger.info(message)
self.status = RenderStatus.COMPLETED
f.write(message) f.write(message)
if self.failed_attempts >= self.maximum_attempts and self.status is not RenderStatus.CANCELLED:
logger.error('{} Render of {} failed after {} attempts'.format(self.engine.name(), self.input_path,
self.failed_attempts))
self.status = RenderStatus.ERROR self.status = RenderStatus.ERROR
if not self.errors: if not self.errors:
self.errors = [self.last_output] self.errors = [message]
self.is_finished = True return
message = f"{self.engine.name()} render completed successfully in {self.time_elapsed()}"
logger.info(message)
f.write(message)
from lib.server.server_proxy import RenderServerProxy
# Wait on children jobs, if necessary
if self.children:
self.status = RenderStatus.WAITING
subjobs_still_running = self.children.copy()
while len(subjobs_still_running):
for hostname, job_id in subjobs_still_running.copy().items():
proxy = RenderServerProxy(hostname)
response = proxy.get_job_info(job_id)
if not response:
logger.warning(f"No response from: {hostname}")
else:
status = string_to_status(response.get('status', ''))
status_msg = f"Subjob {job_id}@{hostname} | Status: {status} | {response.get('percent_complete')}%"
if status in [RenderStatus.CANCELLED, RenderStatus.ERROR, RenderStatus.COMPLETED]:
logger.info(f"Downloading completed subjob files from {hostname} to localhost")
try:
zip_file_path = self.output_path + f'_{hostname}_{job_id}.zip'
proxy.get_job_files(job_id, zip_file_path)
logger.debug("Zip file download successfully - Preparing to unzip.")
extract_path = os.path.dirname(zip_file_path)
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall(extract_path)
logger.info(f"Successfully extracted zip to: {extract_path}")
os.remove(zip_file_path)
except Exception as e:
err_msg = f"Error transferring output from subjob {job_id}@{hostname}: {e}"
logger.exception(err_msg)
self.errors.append(err_msg)
finally:
subjobs_still_running.pop(hostname)
else:
logger.debug(status_msg)
logger.debug(f"Waiting on {len(subjobs_still_running)} subjobs on {', '.join(list(subjobs_still_running.keys()))}")
time.sleep(5)
logger.info("All subjobs complete")
# Post Render Work
logger.debug("Starting post-processing work")
self.post_processing() self.post_processing()
self.status = RenderStatus.COMPLETED
def post_processing(self): def post_processing(self):
pass pass
@@ -260,7 +292,6 @@ class BaseRenderWorker(Base):
def stop(self, is_error=False): def stop(self, is_error=False):
if hasattr(self, '__process'): if hasattr(self, '__process'):
try: try:
self.maximum_attempts = 0
process = psutil.Process(self.__process.pid) process = psutil.Process(self.__process.pid)
for proc in process.children(recursive=True): for proc in process.children(recursive=True):
proc.kill() proc.kill()
@@ -285,10 +316,13 @@ class BaseRenderWorker(Base):
return get_time_elapsed(self.start_time, self.end_time) return get_time_elapsed(self.start_time, self.end_time)
def file_list(self): def file_list(self):
try:
job_dir = os.path.dirname(self.output_path) job_dir = os.path.dirname(self.output_path)
file_list = [os.path.join(job_dir, file) for file in os.listdir(job_dir)] file_list = [os.path.join(job_dir, file) for file in os.listdir(job_dir)]
file_list.sort() file_list.sort()
return file_list return file_list
except FileNotFoundError:
return []
def json(self): def json(self):
job_dict = { job_dict = {

View File

@@ -1,12 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import json
import re import re
try: from collections import Counter
from .base_worker import *
except ImportError:
from base_worker import *
from ..engines.blender_engine import Blender from lib.engines.blender_engine import Blender
from lib.utilities.ffmpeg_helper import image_sequence_to_video
from lib.workers.base_worker import *
class BlenderRenderWorker(BaseRenderWorker): class BlenderRenderWorker(BaseRenderWorker):
@@ -42,9 +40,7 @@ class BlenderRenderWorker(BaseRenderWorker):
if self.camera: if self.camera:
cmd.extend(['--python-expr', f"import bpy;bpy.context.scene.camera = bpy.data.objects['{self.camera}'];"]) cmd.extend(['--python-expr', f"import bpy;bpy.context.scene.camera = bpy.data.objects['{self.camera}'];"])
# add dash at end of given path to separate frame numbers cmd.extend(['-E', self.blender_engine, '-o', self.output_path, '-F', self.export_format])
path_with_ending_dash = os.path.splitext(self.output_path)[0] + "-" + os.path.splitext(self.output_path)[1]
cmd.extend(['-E', self.blender_engine, '-o', path_with_ending_dash, '-F', self.export_format])
# set frame range # set frame range
cmd.extend(['-s', self.start_frame, '-e', self.end_frame, '-a']) cmd.extend(['-s', self.start_frame, '-e', self.end_frame, '-a'])
@@ -117,19 +113,20 @@ class BlenderRenderWorker(BaseRenderWorker):
return max(total_percent, 0) return max(total_percent, 0)
def post_processing(self): def post_processing(self):
output_dir = os.listdir(os.path.dirname(self.output_path))
if self.total_frames > 1 and len(output_dir) > 1: def most_common_extension(file_paths):
from ..utilities.ffmpeg_helper import image_sequence_to_video extensions = [os.path.splitext(path)[1] for path in file_paths]
counter = Counter(extensions)
most_common_ext, _ = counter.most_common(1)[0]
return most_common_ext
output_dir_files = os.listdir(os.path.dirname(self.output_path))
if self.total_frames > 1 and len(output_dir_files) > 1:
logger.info("Generating preview for image sequence") logger.info("Generating preview for image sequence")
# get proper file extension
path_with_ending_dash = os.path.splitext(self.output_path)[0] + "-"
found_output = next(obj for obj in output_dir if os.path.basename(path_with_ending_dash) in obj)
glob_pattern = path_with_ending_dash + '%04d' + ('.' + found_output.split('.')[-1] if found_output else "")
try: try:
image_sequence_to_video(source_glob_pattern=glob_pattern, pattern = self.output_path + "%04d" + most_common_extension(output_dir_files)
output_path=self.output_path + '.mp4', image_sequence_to_video(source_glob_pattern=pattern,
output_path=self.output_path + '.mov',
framerate=self.scene_info['fps']) framerate=self.scene_info['fps'])
logger.info('Successfully generated preview video from image sequence') logger.info('Successfully generated preview video from image sequence')
except Exception as e: except Exception as e: