Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 65 additions & 1 deletion api/system/metrics.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import tracemalloc

from fastapi import Query
from fastapi.responses import PlainTextResponse

Expand Down Expand Up @@ -66,7 +68,69 @@ async def get_system_metrics(
result += f'ayon_user_requests{{name="{name}"}} {num_requests}\n'

# Get system metrics

result += await system_metrics.render_prometheus()

return PlainTextResponse(result)


#
# Tracemalloc metrics
#

snapshot: tracemalloc.Snapshot | None = None


@router.get("/metrics/tracemalloc", dependencies=[NoTraces])
async def get_tracemalloc_metrics(
user: CurrentUserOptional,
api_key: ApiKey,
) -> PlainTextResponse:
"""Get tracemalloc metrics in Prometheus format"""

result = ""

if user is not None and not user.is_admin:
user = None

if user is None:
if api_key is None:
raise ForbiddenException("Access denied")
if api_key != ayonconfig.metrics_api_key:
raise ForbiddenException("Access denied")

global snapshot
if snapshot is None:
tracemalloc.start()
snapshot = tracemalloc.take_snapshot()

current_snapshot = tracemalloc.take_snapshot()
top_stats = current_snapshot.compare_to(snapshot, "lineno")

result += "# Tracemalloc metrics\n"
for stat in top_stats[:10]:
result += f'tracemalloc_allocations{{file="{stat.traceback[0].filename}", line="{stat.traceback[0].lineno}"}} {stat.size_diff}\n' # noqa: E501

return PlainTextResponse(result)


@router.delete("/metrics/tracemalloc", dependencies=[NoTraces])
async def stop_tracemalloc_metrics(
user: CurrentUserOptional,
api_key: ApiKey,
) -> PlainTextResponse:
"""Stop tracemalloc metrics collection"""

if user is not None and not user.is_admin:
user = None

if user is None:
if api_key is None:
raise ForbiddenException("Access denied")
if api_key != ayonconfig.metrics_api_key:
raise ForbiddenException("Access denied")

global snapshot
snapshot = None
tracemalloc.stop()

return PlainTextResponse("Tracemalloc metrics collection stopped")
7 changes: 7 additions & 0 deletions ayon_server/metrics/system.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import asyncio
import os
import time
from typing import Any

Expand Down Expand Up @@ -94,15 +96,20 @@ async def status(self) -> list[Metric]:
mem = psutil.virtual_memory()
mem_usage = 100 * ((mem.total - mem.available) / mem.total)

process = psutil.Process(os.getpid())

redis_size = await Redis.get_total_size()

return [
Metric("cpu_usage", psutil.cpu_percent()),
Metric("memory_usage", mem_usage),
Metric("process_memory_rss", process.memory_info().rss),
Metric("process_memory_vms", process.memory_info().vms),
Metric("swap_usage", psutil.swap_memory().percent),
Metric("uptime_seconds", time.time() - self.boot_time),
Metric("runtime_seconds", time.time() - self.run_time),
Metric("redis_size_total", redis_size),
Metric("asyncio_tasks_total", len(asyncio.all_tasks())),
]

async def render_prometheus(self) -> str:
Expand Down