Integrate watchdog into render worker (#88)

* Add a watchdog to base_worker

* Logging cleanup

* Prevent multiple watchdogs from running if render process restarts

* Add process timeout parameter to Config

* Refactor

* Add error handling to process output parsing

* Fix issue where start_time was not getting set consistently
This commit is contained in:
2024-08-06 10:48:24 -05:00
committed by GitHub
parent 90d5e9b7af
commit 6afb6e65a6
5 changed files with 99 additions and 41 deletions

View File

@@ -14,6 +14,7 @@ from src.api.preview_manager import PreviewManager
from src.api.server_proxy import RenderServerProxy
from src.engines.engine_manager import EngineManager
from src.render_queue import RenderQueue
from src.utilities.config import Config
from src.utilities.misc_helper import get_file_size_human
from src.utilities.status_utils import RenderStatus, string_to_status
from src.utilities.zeroconf_server import ZeroconfServer
@@ -135,8 +136,7 @@ class DistributedJobManager:
"""
Creates render jobs.
This method takes a list of job data, a local path to a loaded project, and a job directory. It creates a render
job for each job data in the list and appends the result to a list. The list of results is then returned.
This method job data and a local path to a loaded project. It creates and returns new a render job.
Args:
job_data (dict): Job data.
@@ -172,6 +172,7 @@ class DistributedJobManager:
worker.priority = int(job_data.get('priority', worker.priority))
worker.start_frame = int(job_data.get("start_frame", worker.start_frame))
worker.end_frame = int(job_data.get("end_frame", worker.end_frame))
worker.watchdog_timeout = Config.worker_process_timeout
worker.hostname = socket.gethostname()
# determine if we can / should split the job