Fix processes not ending when stopped (#98)

* Fix processes not ending when stopped

* Fix error when removing a job

* Better error handling

* Refactored killprocess code and fixed windows support

* Improved error handling

* Add try to code that deletes project files

* Wait for the thread to finish after killing the process

* Don't try to stop process multiple times

* Misc cleanup
This commit is contained in:
2024-08-13 11:16:31 -05:00
committed by GitHub
parent 94a40c46dc
commit e2333c4451
2 changed files with 61 additions and 29 deletions

View File

@@ -320,30 +320,34 @@ def delete_job(job_id):
# Check if we can remove the 'output' directory # Check if we can remove the 'output' directory
found_job = RenderQueue.job_with_id(job_id) found_job = RenderQueue.job_with_id(job_id)
project_dir = os.path.dirname(os.path.dirname(found_job.input_path))
output_dir = os.path.dirname(found_job.output_path) output_dir = os.path.dirname(found_job.output_path)
if server.config['UPLOAD_FOLDER'] in output_dir and os.path.exists(output_dir): found_job.stop()
shutil.rmtree(output_dir)
try: try:
PreviewManager.delete_previews_for_job(found_job) PreviewManager.delete_previews_for_job(found_job)
except Exception as e: except Exception as e:
logger.error(f"Error deleting previews for {found_job}: {e}") logger.error(f"Error deleting previews for {found_job}: {e}")
# See if we own the project_dir (i.e. was it uploaded) # finally delete the job
project_dir = os.path.dirname(os.path.dirname(found_job.input_path))
if server.config['UPLOAD_FOLDER'] in project_dir and os.path.exists(project_dir):
# check to see if any other projects are sharing the same project file
project_dir_files = [f for f in os.listdir(project_dir) if not f.startswith('.')]
if len(project_dir_files) == 0 or (len(project_dir_files) == 1 and 'source' in project_dir_files[0]):
logger.info(f"Removing project directory: {project_dir}")
shutil.rmtree(project_dir)
RenderQueue.delete_job(found_job) RenderQueue.delete_job(found_job)
if request.args.get('redirect', False):
return redirect(url_for('index'))
else:
return "Job deleted", 200
# delete the output_dir
if server.config['UPLOAD_FOLDER'] in output_dir and os.path.exists(output_dir):
shutil.rmtree(output_dir)
# See if we own the project_dir (i.e. was it uploaded) - if so delete the directory
try:
if server.config['UPLOAD_FOLDER'] in project_dir and os.path.exists(project_dir):
# check to see if any other projects are sharing the same project file
project_dir_files = [f for f in os.listdir(project_dir) if not f.startswith('.')]
if len(project_dir_files) == 0 or (len(project_dir_files) == 1 and 'source' in project_dir_files[0]):
logger.info(f"Removing project directory: {project_dir}")
shutil.rmtree(project_dir)
except Exception as e:
logger.error(f"Error removing project files: {e}")
return "Job deleted", 200
except Exception as e: except Exception as e:
logger.error(f"Error deleting job: {e}") logger.error(f"Error deleting job: {e}")
return f"Error deleting job: {e}", 500 return f"Error deleting job: {e}", 500

View File

@@ -3,6 +3,7 @@ import io
import json import json
import logging import logging
import os import os
import signal
import subprocess import subprocess
import threading import threading
import time import time
@@ -285,6 +286,7 @@ class BaseRenderWorker(Base):
message = f"{self.engine.name()} render ended with status '{self.status.value}' " \ message = f"{self.engine.name()} render ended with status '{self.status.value}' " \
f"after {self.time_elapsed()}" f"after {self.time_elapsed()}"
self.log_and_print(message, log_file) self.log_and_print(message, log_file)
log_file.close()
return return
# Post Render Work # Post Render Work
@@ -307,7 +309,7 @@ class BaseRenderWorker(Base):
time_since_last_update = time.time() - self.__last_output_time time_since_last_update = time.time() - self.__last_output_time
if time_since_last_update > self.watchdog_timeout: if time_since_last_update > self.watchdog_timeout:
logger.error(f"Process for {self} terminated due to exceeding timeout ({self.watchdog_timeout}s)") logger.error(f"Process for {self} terminated due to exceeding timeout ({self.watchdog_timeout}s)")
self.__process.kill() self.__kill_process()
break break
# logger.debug(f'Watchdog for {self} - Time since last update: {time_since_last_update}') # logger.debug(f'Watchdog for {self} - Time since last update: {time_since_last_update}')
time.sleep(1) time.sleep(1)
@@ -320,8 +322,13 @@ class BaseRenderWorker(Base):
try: try:
# Start process and get updates # Start process and get updates
self.__process = subprocess.Popen(subprocess_cmds, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, if os.name == 'posix': # linux / mac
universal_newlines=False) self.__process = subprocess.Popen(subprocess_cmds, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
universal_newlines=False, preexec_fn=os.setsid)
else: # windows
self.__process = subprocess.Popen(subprocess_cmds, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
universal_newlines=False,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
# Start watchdog # Start watchdog
self.__last_output_time = time.time() self.__last_output_time = time.time()
@@ -350,7 +357,7 @@ class BaseRenderWorker(Base):
message = f'Uncaught error running render process: {e}' message = f'Uncaught error running render process: {e}'
f.write(message) f.write(message)
logger.exception(message) logger.exception(message)
self.__process.kill() self.__kill_process()
# let watchdog end before continuing - prevents multiple watchdogs running when process restarts # let watchdog end before continuing - prevents multiple watchdogs running when process restarts
if watchdog_thread.is_alive(): if watchdog_thread.is_alive():
@@ -358,11 +365,32 @@ class BaseRenderWorker(Base):
return return_code return return_code
def __kill_process(self):
try:
if self.__process.poll():
return
logger.debug(f"Trying to kill process {self.__process}")
self.__process.terminate()
self.__process.kill()
if os.name == 'posix': # linux / macos
os.killpg(os.getpgid(self.__process.pid), signal.SIGTERM)
os.killpg(os.getpgid(self.__process.pid), signal.SIGKILL)
else: # windows
parent = psutil.Process(self.__process.pid)
for child in parent.children(recursive=True):
child.kill()
self.__process.wait(timeout=5)
logger.debug(f"Process ended with status {self.__process.poll()}")
except (ProcessLookupError, AttributeError, psutil.NoSuchProcess):
pass
except Exception as e:
logger.error(f"Error stopping the process: {e}")
def post_processing(self): def post_processing(self):
pass pass
def is_running(self): def is_running(self):
if self.__thread: if hasattr(self, '__thread'):
return self.__thread.is_alive() return self.__thread.is_alive()
return False return False
@@ -373,15 +401,11 @@ class BaseRenderWorker(Base):
self.stop(is_error=True) self.stop(is_error=True)
def stop(self, is_error=False): def stop(self, is_error=False):
if hasattr(self, '__process'): logger.debug(f"Stopping {self}")
try:
process = psutil.Process(self.__process.pid) # cleanup status
for proc in process.children(recursive=True): if self.status in [RenderStatus.RUNNING, RenderStatus.NOT_STARTED, RenderStatus.SCHEDULED,
proc.kill() RenderStatus.CONFIGURING]:
process.kill()
except Exception as e:
logger.debug(f"Error stopping the process: {e}")
if self.status in [RenderStatus.RUNNING, RenderStatus.NOT_STARTED, RenderStatus.SCHEDULED]:
if is_error: if is_error:
err_message = self.errors[-1] if self.errors else 'Unknown error' err_message = self.errors[-1] if self.errors else 'Unknown error'
logger.error(f"Halting render due to error: {err_message}") logger.error(f"Halting render due to error: {err_message}")
@@ -389,6 +413,10 @@ class BaseRenderWorker(Base):
else: else:
self.status = RenderStatus.CANCELLED self.status = RenderStatus.CANCELLED
self.__kill_process()
if self.is_running(): # allow the log files to close
self.__thread.join(timeout=5)
def percent_complete(self): def percent_complete(self):
if self.status == RenderStatus.COMPLETED: if self.status == RenderStatus.COMPLETED:
return 1.0 return 1.0