Integrate watchdog into render worker (#88)

* Add a watchdog to base_worker * Logging cleanup * Prevent multiple watchdogs from running if render process restarts * Add process timeout parameter to Config * Refactor * Add error handling to process output parsing * Fix issue where start_time was not getting set consistently
2026-02-05 13:46:10 +00:00 · 2024-08-06 10:48:24 -05:00
parent 90d5e9b7af
commit 6afb6e65a6
5 changed files with 99 additions and 41 deletions
--- a/src/distributed_job_manager.py
+++ b/src/distributed_job_manager.py
@@ -14,6 +14,7 @@ from src.api.preview_manager import PreviewManager
 from src.api.server_proxy import RenderServerProxy
 from src.engines.engine_manager import EngineManager
 from src.render_queue import RenderQueue
+from src.utilities.config import Config
 from src.utilities.misc_helper import get_file_size_human
 from src.utilities.status_utils import RenderStatus, string_to_status
 from src.utilities.zeroconf_server import ZeroconfServer
@@ -135,8 +136,7 @@ class DistributedJobManager:
        """
        Creates render jobs.

-        This method takes a list of job data, a local path to a loaded project, and a job directory. It creates a render
-        job for each job data in the list and appends the result to a list. The list of results is then returned.
+        This method  job data and a local path to a loaded project. It creates and returns new a render job.

        Args:
            job_data (dict): Job data.
@@ -172,6 +172,7 @@ class DistributedJobManager:
        worker.priority = int(job_data.get('priority', worker.priority))
        worker.start_frame = int(job_data.get("start_frame", worker.start_frame))
        worker.end_frame = int(job_data.get("end_frame", worker.end_frame))
+        worker.watchdog_timeout = Config.worker_process_timeout
        worker.hostname = socket.gethostname()

        # determine if we can / should split the job