Fix processes not ending when stopped (#98)

* Fix processes not ending when stopped

* Fix error when removing a job

* Better error handling

* Refactored killprocess code and fixed windows support

* Improved error handling

* Add try to code that deletes project files

* Wait for the thread to finish after killing the process

* Don't try to stop process multiple times

* Misc cleanup
This commit is contained in:
2024-08-13 11:16:31 -05:00
committed by GitHub
parent 94a40c46dc
commit e2333c4451
2 changed files with 61 additions and 29 deletions

View File

@@ -320,30 +320,34 @@ def delete_job(job_id):
# Check if we can remove the 'output' directory
found_job = RenderQueue.job_with_id(job_id)
project_dir = os.path.dirname(os.path.dirname(found_job.input_path))
output_dir = os.path.dirname(found_job.output_path)
if server.config['UPLOAD_FOLDER'] in output_dir and os.path.exists(output_dir):
shutil.rmtree(output_dir)
found_job.stop()
try:
PreviewManager.delete_previews_for_job(found_job)
except Exception as e:
logger.error(f"Error deleting previews for {found_job}: {e}")
# See if we own the project_dir (i.e. was it uploaded)
project_dir = os.path.dirname(os.path.dirname(found_job.input_path))
if server.config['UPLOAD_FOLDER'] in project_dir and os.path.exists(project_dir):
# check to see if any other projects are sharing the same project file
project_dir_files = [f for f in os.listdir(project_dir) if not f.startswith('.')]
if len(project_dir_files) == 0 or (len(project_dir_files) == 1 and 'source' in project_dir_files[0]):
logger.info(f"Removing project directory: {project_dir}")
shutil.rmtree(project_dir)
# finally delete the job
RenderQueue.delete_job(found_job)
if request.args.get('redirect', False):
return redirect(url_for('index'))
else:
return "Job deleted", 200
# delete the output_dir
if server.config['UPLOAD_FOLDER'] in output_dir and os.path.exists(output_dir):
shutil.rmtree(output_dir)
# See if we own the project_dir (i.e. was it uploaded) - if so delete the directory
try:
if server.config['UPLOAD_FOLDER'] in project_dir and os.path.exists(project_dir):
# check to see if any other projects are sharing the same project file
project_dir_files = [f for f in os.listdir(project_dir) if not f.startswith('.')]
if len(project_dir_files) == 0 or (len(project_dir_files) == 1 and 'source' in project_dir_files[0]):
logger.info(f"Removing project directory: {project_dir}")
shutil.rmtree(project_dir)
except Exception as e:
logger.error(f"Error removing project files: {e}")
return "Job deleted", 200
except Exception as e:
logger.error(f"Error deleting job: {e}")
return f"Error deleting job: {e}", 500

View File

@@ -3,6 +3,7 @@ import io
import json
import logging
import os
import signal
import subprocess
import threading
import time
@@ -285,6 +286,7 @@ class BaseRenderWorker(Base):
message = f"{self.engine.name()} render ended with status '{self.status.value}' " \
f"after {self.time_elapsed()}"
self.log_and_print(message, log_file)
log_file.close()
return
# Post Render Work
@@ -307,7 +309,7 @@ class BaseRenderWorker(Base):
time_since_last_update = time.time() - self.__last_output_time
if time_since_last_update > self.watchdog_timeout:
logger.error(f"Process for {self} terminated due to exceeding timeout ({self.watchdog_timeout}s)")
self.__process.kill()
self.__kill_process()
break
# logger.debug(f'Watchdog for {self} - Time since last update: {time_since_last_update}')
time.sleep(1)
@@ -320,8 +322,13 @@ class BaseRenderWorker(Base):
try:
# Start process and get updates
self.__process = subprocess.Popen(subprocess_cmds, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
universal_newlines=False)
if os.name == 'posix': # linux / mac
self.__process = subprocess.Popen(subprocess_cmds, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
universal_newlines=False, preexec_fn=os.setsid)
else: # windows
self.__process = subprocess.Popen(subprocess_cmds, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
universal_newlines=False,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
# Start watchdog
self.__last_output_time = time.time()
@@ -350,7 +357,7 @@ class BaseRenderWorker(Base):
message = f'Uncaught error running render process: {e}'
f.write(message)
logger.exception(message)
self.__process.kill()
self.__kill_process()
# let watchdog end before continuing - prevents multiple watchdogs running when process restarts
if watchdog_thread.is_alive():
@@ -358,11 +365,32 @@ class BaseRenderWorker(Base):
return return_code
def __kill_process(self):
try:
if self.__process.poll():
return
logger.debug(f"Trying to kill process {self.__process}")
self.__process.terminate()
self.__process.kill()
if os.name == 'posix': # linux / macos
os.killpg(os.getpgid(self.__process.pid), signal.SIGTERM)
os.killpg(os.getpgid(self.__process.pid), signal.SIGKILL)
else: # windows
parent = psutil.Process(self.__process.pid)
for child in parent.children(recursive=True):
child.kill()
self.__process.wait(timeout=5)
logger.debug(f"Process ended with status {self.__process.poll()}")
except (ProcessLookupError, AttributeError, psutil.NoSuchProcess):
pass
except Exception as e:
logger.error(f"Error stopping the process: {e}")
def post_processing(self):
pass
def is_running(self):
if self.__thread:
if hasattr(self, '__thread'):
return self.__thread.is_alive()
return False
@@ -373,15 +401,11 @@ class BaseRenderWorker(Base):
self.stop(is_error=True)
def stop(self, is_error=False):
if hasattr(self, '__process'):
try:
process = psutil.Process(self.__process.pid)
for proc in process.children(recursive=True):
proc.kill()
process.kill()
except Exception as e:
logger.debug(f"Error stopping the process: {e}")
if self.status in [RenderStatus.RUNNING, RenderStatus.NOT_STARTED, RenderStatus.SCHEDULED]:
logger.debug(f"Stopping {self}")
# cleanup status
if self.status in [RenderStatus.RUNNING, RenderStatus.NOT_STARTED, RenderStatus.SCHEDULED,
RenderStatus.CONFIGURING]:
if is_error:
err_message = self.errors[-1] if self.errors else 'Unknown error'
logger.error(f"Halting render due to error: {err_message}")
@@ -389,6 +413,10 @@ class BaseRenderWorker(Base):
else:
self.status = RenderStatus.CANCELLED
self.__kill_process()
if self.is_running(): # allow the log files to close
self.__thread.join(timeout=5)
def percent_complete(self):
if self.status == RenderStatus.COMPLETED:
return 1.0