Cpu benchmarks #48 (#76)

* Add benchmark.py

* Add cpu / disk benchmark APIs

* Add cpu_benchmark method to distributed_job_manager.py

* Make sure cpu_benchmark is an int

* Improve distributed_job_manager test
This commit is contained in:
2024-02-11 05:19:24 -06:00
committed by GitHub
parent 79db960383
commit a31fe98964
3 changed files with 163 additions and 10 deletions

View File

@@ -4,8 +4,10 @@ import socket
import time
import zipfile
import requests
from plyer import notification
from pubsub import pub
from concurrent.futures import ThreadPoolExecutor
from src.api.server_proxy import RenderServerProxy
from src.render_queue import RenderQueue
@@ -291,7 +293,7 @@ class DistributedJobManager:
return post_results
@staticmethod
def distribute_server_work(start_frame, end_frame, available_servers, method='cpu_count'):
def distribute_server_work(start_frame, end_frame, available_servers, method='cpu_benchmark'):
"""
Splits the frame range among available servers proportionally based on their performance (CPU count).
@@ -300,8 +302,9 @@ class DistributedJobManager:
end_frame (int): The end frame number of the animation to be rendered.
available_servers (list): A list of available server dictionaries. Each server dictionary should include
'hostname' and 'cpu_count' keys (see find_available_servers).
method (str, optional): Specifies the distribution method. Possible values are 'cpu_count' and 'equally'.
Defaults to 'cpu_count'.
method (str, optional): Specifies the distribution method. Possible values are 'cpu_benchmark', 'cpu_count'
and 'evenly'.
Defaults to 'cpu_benchmark'.
Returns:
list: A list of server dictionaries where each dictionary includes the frame range and total number of
@@ -310,7 +313,7 @@ class DistributedJobManager:
# Calculate respective frames for each server
def divide_frames_by_cpu_count(frame_start, frame_end, servers):
total_frames = frame_end - frame_start + 1
total_performance = sum(server['cpu_count'] for server in servers)
total_cpus = sum(server['cpu_count'] for server in servers)
frame_ranges = {}
current_frame = frame_start
@@ -321,7 +324,47 @@ class DistributedJobManager:
# Give all remaining frames to the last server
num_frames = total_frames - allocated_frames
else:
num_frames = round((server['cpu_count'] / total_performance) * total_frames)
num_frames = round((server['cpu_count'] / total_cpus) * total_frames)
allocated_frames += num_frames
frame_end_for_server = current_frame + num_frames - 1
if current_frame <= frame_end_for_server:
frame_ranges[server['hostname']] = (current_frame, frame_end_for_server)
current_frame = frame_end_for_server + 1
return frame_ranges
def divide_frames_by_benchmark(frame_start, frame_end, servers):
def fetch_benchmark(server):
try:
benchmark = requests.get(f'http://{server["hostname"]}:{ZeroconfServer.server_port}'
f'/api/cpu_benchmark').text
server['cpu_benchmark'] = benchmark
logger.debug(f'Benchmark for {server["hostname"]}: {benchmark}')
except requests.exceptions.RequestException as e:
logger.error(f'Error fetching benchmark for {server["hostname"]}: {e}')
# Number of threads to use (can adjust based on your needs or number of servers)
threads = len(servers)
with ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(fetch_benchmark, servers)
total_frames = frame_end - frame_start + 1
total_performance = sum(int(server['cpu_benchmark']) for server in servers)
frame_ranges = {}
current_frame = frame_start
allocated_frames = 0
for i, server in enumerate(servers):
if i == len(servers) - 1: # if it's the last server
# Give all remaining frames to the last server
num_frames = total_frames - allocated_frames
else:
num_frames = round((int(server['cpu_benchmark']) / total_performance) * total_frames)
allocated_frames += num_frames
frame_end_for_server = current_frame + num_frames - 1
@@ -350,12 +393,18 @@ class DistributedJobManager:
return frame_ranges
if method == 'equally':
breakdown = divide_frames_equally(start_frame, end_frame, available_servers)
# elif method == 'benchmark_score': # todo: implement benchmark score
# pass
if len(available_servers) == 1:
breakdown = {available_servers[0]['hostname']: (start_frame, end_frame)}
else:
breakdown = divide_frames_by_cpu_count(start_frame, end_frame, available_servers)
logger.debug(f'Splitting between {len(available_servers)} servers by {method} method')
if method == 'evenly':
breakdown = divide_frames_equally(start_frame, end_frame, available_servers)
elif method == 'cpu_benchmark':
breakdown = divide_frames_by_benchmark(start_frame, end_frame, available_servers)
elif method == 'cpu_count':
breakdown = divide_frames_by_cpu_count(start_frame, end_frame, available_servers)
else:
raise ValueError(f"Invalid distribution method: {method}")
server_breakdown = [server for server in available_servers if breakdown.get(server['hostname']) is not None]
for server in server_breakdown:
@@ -381,3 +430,17 @@ class DistributedJobManager:
available_servers.append(response)
return available_servers
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
ZeroconfServer.configure("_zordon._tcp.local.", 'testing', 8080)
ZeroconfServer.start(listen_only=True)
print("Starting Zeroconf...")
time.sleep(2)
available_servers = DistributedJobManager.find_available_servers('blender')
print(f"AVAILABLE SERVERS: {available_servers}")
results = DistributedJobManager.distribute_server_work(1, 100, available_servers)
print(f"RESULTS: {results}")
ZeroconfServer.stop()