Files
Zordon/dashboard.py
Brett Williams 34fbdaa4d9 Refactor: DistributedJobManager with pub/sub status change notifications (#25)
* Add pubsub to render_queue and base_worker

* Refactor: Convert ZeroconfServer to Singleton with Class Methods

* New API for subjob servers to notify parent job servers of status changes

* Refactor: Move all subjob related methods to distributed_job_manager.py

* Rewrite for wait_for_subjobs

* Fix: DistributedJobManager.find_available_servers() takes 1 positional argument but 3 were given

* DistributedJobManager should now notify / be notified abotu background job changes

* Fix the make_ready api. Change children keyname to be id@hostname so it can be unique

* Fixes

* Image sequence to movie needs to find the actual start frame

* Fix: subjob_status_change did not return a valid response

* Fix client renderer selection

* Small fix for subjob status checking

* Fix issue with divide_frames_equally

* Fix issue where downloads were not occurring

* Fix issue where old status was being reported

* Add docstrings and code cleanup
2023-06-30 19:49:57 -05:00

281 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
import datetime
import os.path
import socket
import threading
import time
import traceback
import requests
from rich import box
from rich.console import Console
from rich.layout import Layout
from rich.live import Live
from rich.panel import Panel
from rich.table import Column
from rich.table import Table
from rich.text import Text
from rich.tree import Tree
from lib.workers.base_worker import RenderStatus, string_to_status
from lib.server.server_proxy import RenderServerProxy
from lib.utilities.misc_helper import get_time_elapsed
from start_server import start_server
"""
The RenderDashboard is designed to be run on a remote machine or on the local server
This provides a detailed status of all jobs running on the server
"""
status_colors = {RenderStatus.ERROR: "red", RenderStatus.CANCELLED: 'orange1', RenderStatus.COMPLETED: 'green',
RenderStatus.NOT_STARTED: "yellow", RenderStatus.SCHEDULED: 'purple',
RenderStatus.RUNNING: 'cyan'}
categories = [RenderStatus.RUNNING, RenderStatus.ERROR, RenderStatus.NOT_STARTED, RenderStatus.SCHEDULED,
RenderStatus.COMPLETED, RenderStatus.CANCELLED, RenderStatus.UNDEFINED]
renderer_colors = {'ffmpeg': '[magenta]', 'blender': '[orange1]', 'aerender': '[purple]'}
local_hostname = socket.gethostname()
def status_string_to_color(status_string):
job_status = string_to_status(status_string)
job_color = '[{}]'.format(status_colors[job_status])
return job_color
def sorted_jobs(all_jobs):
sort_by_date = True
if not sort_by_date:
sorted_job_list = []
if all_jobs:
for status_category in categories:
found_jobs = [x for x in all_jobs if x['status'] == status_category.value]
if found_jobs:
sorted_found_jobs = sorted(found_jobs, key=lambda d: datetime.datetime.fromisoformat(d['date_created']), reverse=True)
sorted_job_list.extend(sorted_found_jobs)
else:
sorted_job_list = sorted(all_jobs, key=lambda d: datetime.datetime.fromisoformat(d['date_created']), reverse=True)
return sorted_job_list
def create_node_tree(all_server_data) -> Tree:
main_tree = Tree("[magenta]Server Cluster")
for server_host, server_data in all_server_data['servers'].items():
node_title_local = f"[cyan bold]{server_host}[/] [yellow](This Computer)[default]"
node_title_remote = f"[cyan]{server_host} [magenta](Remote)[default]"
node_tree_text = node_title_local if (server_host == local_hostname) else node_title_remote
if server_data.get('is_online', False):
node_tree_text = node_tree_text + " - [green]Running"
node_tree = Tree(node_tree_text)
stats_text = f"CPU: [yellow]{server_data['status']['cpu_percent']}% [default]| RAM: " \
f"[yellow]{server_data['status']['memory_percent']}% [default]| Cores: " \
f"[yellow]{server_data['status']['cpu_count']} [default]| " \
f"{server_data['status']['platform'].split('-')[0]}"
node_tree.add(Tree(stats_text))
running_jobs = [job for job in server_data['jobs'] if job['status'] == RenderStatus.RUNNING.value]
not_started = [job for job in server_data['jobs'] if job['status'] == RenderStatus.NOT_STARTED.value]
scheduled = [job for job in server_data['jobs'] if job['status'] == RenderStatus.SCHEDULED.value]
jobs_to_display = running_jobs + not_started + scheduled
jobs_tree = Tree(f"Running: [green]{len(running_jobs)} [default]| Queued: [cyan]{len(not_started)}"
f"[default] | Scheduled: [cyan]{len(scheduled)}")
for job in jobs_to_display:
renderer = f"{renderer_colors[job['renderer']]}{job['renderer']}[default]"
filename = os.path.basename(job['input_path']).split('.')[0]
if job['status'] == RenderStatus.RUNNING.value:
jobs_tree.add(f"[bold]{renderer} {filename} ({job['id']}) - {status_string_to_color(job['status'])}{(float(job['percent_complete']) * 100):.1f}%")
else:
jobs_tree.add(f"{filename} ({job['id']}) - {status_string_to_color(job['status'])}{job['status'].title()}")
if not jobs_to_display:
jobs_tree.add("[italic]No running jobs")
node_tree.add(jobs_tree)
main_tree.add(node_tree)
else:
# if server is offline
node_tree_text = node_tree_text + " - [red]Offline"
node_tree = Tree(node_tree_text)
main_tree.add(node_tree)
return main_tree
def create_jobs_table(all_server_data) -> Table:
table = Table("ID", "Name", "Renderer", Column(header="Priority", justify="center"),
Column(header="Status", justify="center"), Column(header="Time Elapsed", justify="right"),
Column(header="# Frames", justify="right"), "Client", show_lines=True,
box=box.HEAVY_HEAD)
all_jobs = []
for server_name, server_data in all_server_data['servers'].items():
for job in server_data['jobs']:
#todo: clean this up
all_jobs.append(job)
all_jobs = sorted_jobs(all_jobs)
for job in all_jobs:
job_status = string_to_status(job['status'])
job_color = '[{}]'.format(status_colors[job_status])
job_text = f"{job_color}" + job_status.value.title()
if job_status == RenderStatus.ERROR and job['errors']:
job_text = job_text + "\n" + "\n".join(job['errors'])
# Project name
project_name = job_color + (job['name'] or os.path.basename(job['input_path']))
elapsed_time = get_time_elapsed(datetime.datetime.fromisoformat(job['start_time']),
datetime.datetime.fromisoformat(job['end_time']))
if job_status == RenderStatus.RUNNING:
job_text = f"{job_color}[bold]Running - {float(job['percent_complete']) * 100:.1f}%"
elapsed_time = "[bold]" + elapsed_time
project_name = "[bold]" + project_name
elif job_status == RenderStatus.CANCELLED or job_status == RenderStatus.ERROR:
project_name = "[strike]" + project_name
# Priority
priority_color = ["red", "yellow", "cyan"][(job['priority'] - 1)]
client_name = job['client'] or 'unknown'
client_colors = {'unknown': '[red]', local_hostname: '[yellow]'}
client_title = client_colors.get(client_name, '[magenta]') + client_name
table.add_row(
job['id'],
project_name,
renderer_colors.get(job['renderer'], '[cyan]') + job['renderer'] + '[default]-' + job['renderer_version'],
f"[{priority_color}]{job['priority']}",
job_text,
elapsed_time,
str(max(int(job['total_frames']), 1)),
client_title
)
return table
def create_status_panel(all_server_data):
for key, value in all_server_data['servers'].items():
if key == local_hostname:
return str(value['status'])
return "no status"
class KeyboardThread(threading.Thread):
def __init__(self, input_cbk = None, name='keyboard-input-thread'):
self.input_cbk = input_cbk
super(KeyboardThread, self).__init__(name=name)
self.start()
def run(self):
while True:
self.input_cbk(input()) #waits to get input + Return
def my_callback(inp):
#evaluate the keyboard input
print('You Entered:', inp)
if __name__ == '__main__':
get_server_ip = input("Enter server IP or None for local: ") or local_hostname
server_proxy = RenderServerProxy(get_server_ip, "8080")
if not server_proxy.connect():
if server_proxy.hostname == local_hostname:
start_server_input = input("Local server not running. Start server? (y/n) ")
if start_server_input and start_server_input[0].lower() == "y":
# Startup the local server
start_server(background_thread=True)
test = server_proxy.connect()
print(f"connected? {test}")
else:
print(f"\nUnable to connect to server: {server_proxy.hostname}")
print("\nVerify IP address is correct and server is running")
exit(1)
# start the Keyboard thread
# kthread = KeyboardThread(my_callback)
# Console Layout
console = Console()
layout = Layout()
# Divide the "screen" in to three parts
layout.split(
Layout(name="header", size=3),
Layout(ratio=1, name="main")
# Layout(size=10, name="footer"),
)
# Divide the "main" layout in to "side" and "body"
layout["main"].split_row(
Layout(name="side"),
Layout(name="body",
ratio=3))
# Divide the "side" layout in to two
layout["side"].split(Layout(name="side_top"), Layout(name="side_bottom"))
# Server connection header
header_text = Text(f"Connected to server: ")
header_text.append(f"{server_proxy.hostname} ", style="green")
if server_proxy.hostname == local_hostname:
header_text.append("(This Computer)", style="yellow")
else:
header_text.append("(Remote)", style="magenta")
# background process to update server data independent of the UI
def fetch_server_data(server):
while True:
fetched_data = server.get_data(timeout=5)
if fetched_data:
server.fetched_status_data = fetched_data
time.sleep(1)
x = threading.Thread(target=fetch_server_data, args=(server_proxy,))
x.daemon = True
x.start()
# draw and update the UI
with Live(console=console, screen=False, refresh_per_second=1, transient=True) as live:
while True:
try:
if server_proxy.fetched_status_data:
server_online = False
if server_proxy.fetched_status_data.get('timestamp', None):
timestamp = datetime.datetime.fromisoformat(server_proxy.fetched_status_data['timestamp'])
time_diff = datetime.datetime.now() - timestamp
server_online = time_diff.seconds < 10 # client is offline if not updated in certain time
layout["body"].update(create_jobs_table(server_proxy.fetched_status_data))
layout["side_top"].update(Panel(create_node_tree(server_proxy.fetched_status_data)))
layout["side_bottom"].update(Panel(create_status_panel(server_proxy.fetched_status_data)))
online_text = "Online" if server_online else "Offline"
online_color = "green" if server_online else "red"
layout["header"].update(Panel(Text(f"Zordon Render Client - Version 0.0.1 alpha - {online_text}",
justify="center", style=online_color)))
live.update(layout, refresh=False)
except Exception as e:
print(f"Exception updating table: {e}")
traceback.print_exception(e)
time.sleep(1)
# # # todo: Add input prompt to manage running jobs (ie add, cancel, get info, etc)