Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .eslintrc
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"extends": "eslint:recommended",
"env": {
"node": true
"node": true,
"es6": true
},
"parserOptions": {
"ecmaVersion": 2015
"ecmaVersion": 2017
},
"rules": {
// Possible errors
Expand Down Expand Up @@ -153,7 +154,7 @@
"no-path-concat": 2, // `2` is default
"no-process-exit": 0, // `2` is default
"no-restricted-modules": 0, // no default, optionally set `[2, "fs", "os"]`
"no-sync": 1, // `2` is default
"no-sync": 1 // `2` is default

// eslint v2
//"keyword-spacing": 2
Expand Down
52 changes: 52 additions & 0 deletions lib/db/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -1661,4 +1661,56 @@ dbapi.loadAccessToken = function(id) {
return db.run(r.table('accessTokens').get(id))
}

// Metrics-specific function that provides aggregate device statistics
// without exposing individual device data or bypassing access control
dbapi.getDeviceMetrics = function() {
return Promise.all([
// Get total device count
db.run(r.table('devices').count())
// Get device counts by status
, db.run(r.table('devices').group('status').count().ungroup())
// Get provider count (line split to meet max length)
, db.run(r.table('devices').hasFields('provider')
.map(r.row('provider')('name')).distinct().count())
])
.then(function(results) {
const totalCount = results[0]
const statusCounts = results[1] || []
const providerCount = results[2] || 0

const stats = {
total: totalCount
, usable: 0
, busy: 0
, providers: providerCount
, byStatus: {}
}

statusCounts.forEach(function(item) {
const status = item.group || 'unknown'
const count = item.reduction
stats.byStatus[status] = count

if (status === 'available' || status === 'busy') {
stats.usable += count
}
if (status === 'busy') {
stats.busy += count
}
Comment on lines +1675 to +1699
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getDeviceMetrics() computes usable/busy by comparing the DB 'status' field to strings 'available'/'busy'. In STF, devices.status is an enum value (ONLINE/OFFLINE/UNAUTHORIZED/…) from wireutil.toDeviceStatus(), so these comparisons will never match and usable/busy will always be 0. Rework the aggregation to use the actual schema (e.g., busy based on owner != null / usable based on present+ready+owner==null, and map enum values to readable label names).

Suggested change
])
.then(function(results) {
const totalCount = results[0]
const statusCounts = results[1] || []
const providerCount = results[2] || 0
const stats = {
total: totalCount
, usable: 0
, busy: 0
, providers: providerCount
, byStatus: {}
}
statusCounts.forEach(function(item) {
const status = item.group || 'unknown'
const count = item.reduction
stats.byStatus[status] = count
if (status === 'available' || status === 'busy') {
stats.usable += count
}
if (status === 'busy') {
stats.busy += count
}
// Get busy device count: any device that has an owner
, db.run(r.table('devices').filter(r.row('owner').ne(null)).count())
// Get usable device count: present + ready and no owner
, db.run(
r.table('devices')
.filter(
r.row('present').eq(true)
.and(r.row('ready').eq(true))
.and(r.row('owner').eq(null))
)
.count()
)
])
.then(function(results) {
const totalCount = results[0]
const statusCounts = results[1] || []
const providerCount = results[2] || 0
const busyCount = results[3] || 0
const usableCount = results[4] || 0
const stats = {
total: totalCount
, usable: usableCount
, busy: busyCount
, providers: providerCount
, byStatus: {}
}
statusCounts.forEach(function(item) {
const rawStatus = item.group
// Normalize enum status to a readable label
const statusLabel = rawStatus == null
? 'unknown'
: String(rawStatus).toLowerCase()
const count = item.reduction
stats.byStatus[statusLabel] = count

Copilot uses AI. Check for mistakes.
})

return stats
})
.catch(function(error) {
log.error('Error getting device metrics:', error)
return {
total: 0
, usable: 0
, busy: 0
, providers: 0
, byStatus: {}
}
})
}

module.exports = dbapi
48 changes: 48 additions & 0 deletions lib/units/api/controllers/metrics.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/**
* Copyright © 2025 STF Metrics Controller - Licensed under the Apache license 2.0
*
* Prometheus metrics endpoint controller
*/

// Fix for Node.js versions where util.isError was removed
const util = require('util')
if (!util.isError) {
util.isError = function(e) {
return e && typeof e === 'object' && e instanceof Error
}
}

Comment on lines +7 to +14
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This controller mutates Node’s built-in util module by polyfilling util.isError, but nothing in this repo uses util.isError. Global monkey-patches like this are hard to reason about and can have unexpected side effects; please remove it unless a concrete dependency requires it.

Suggested change
// Fix for Node.js versions where util.isError was removed
const util = require('util')
if (!util.isError) {
util.isError = function(e) {
return e && typeof e === 'object' && e instanceof Error
}
}

Copilot uses AI. Check for mistakes.
const metrics = require('../../../util/metrics')
const logger = require('../../../util/logger')
const log = logger.createLogger('api:controllers:metrics')

/**
* GET /metrics
*
* Returns Prometheus metrics in the expected format
* @param {Object} req - Express request object
* @param {Object} res - Express response object
* @returns {void}
*/
function getMetrics(req, res) {
// Set the content type to plain text as expected by Prometheus
res.set('Content-Type', metrics.register.contentType)

// Return the metrics (handle Promise from prom-client v15+)
metrics.register.metrics()
.then(metricsData => {
res.end(metricsData)
log.debug('Served Prometheus metrics')
})
.catch(error => {
log.error('Error serving metrics:', error)
res.status(500).json({
success: false
, description: 'Internal server error while fetching metrics'
})
})
Comment thread
matanbaruch marked this conversation as resolved.
}

module.exports = {
getMetrics
}
12 changes: 11 additions & 1 deletion lib/units/api/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ var zmqutil = require('../../util/zmqutil')
var srv = require('../../util/srv')
var lifecycle = require('../../util/lifecycle')
var wireutil = require('../../wire/util')
var MetricsCollector = require('../../util/metrics-collector')
Comment thread
matanbaruch marked this conversation as resolved.

module.exports = function(options) {
var log = logger.createLogger('api')
Expand Down Expand Up @@ -138,8 +139,17 @@ module.exports = function(options) {

app.disable('x-powered-by')

// Initialize metrics collection
const metricsCollector = new MetricsCollector({
interval: 30000 // Collect metrics every 30 seconds
})
metricsCollector.start()

lifecycle.observe(function() {
[push, sub, pushdev, subdev].forEach(function(sock) {
// Stop metrics collection on shutdown
metricsCollector.stop()

;[push, sub, pushdev, subdev].forEach(function(sock) {
try {
sock.close()
}
Expand Down
21 changes: 21 additions & 0 deletions lib/units/api/swagger/api_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,28 @@ tags:
description: Groups Operations
- name: admin
description: Privileged Operations
- name: metrics
description: Prometheus Metrics Operations
paths:
/metrics:
x-swagger-router-controller: metrics
get:
summary: Get Prometheus metrics
description: Returns metrics in Prometheus format for monitoring STF system health and usage
Comment on lines +40 to +44
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/metrics is defined without any security requirement, while most other endpoints declare accessTokenAuth. Unless this endpoint is intentionally public (and deployments are expected to protect it at the network layer), it should be protected (e.g., accessTokenAuth and/or admin-only) to avoid exposing operational data to unauthenticated callers.

Copilot uses AI. Check for mistakes.
operationId: getMetrics
tags:
- metrics
responses:
"200":
description: Prometheus metrics
schema:
type: string
default:
description: >
Unexpected Error:
* 500: Internal Server Error
schema:
$ref: "#/definitions/UnexpectedErrorResponse"
/groups:
x-swagger-router-controller: groups
get:
Expand Down
148 changes: 148 additions & 0 deletions lib/util/metrics-collector.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/**
* Copyright © 2025 STF Metrics Collector - Licensed under the Apache license 2.0
*
* Service for collecting STF metrics from database and external sources
*/

const logger = require('./logger')
const dbapi = require('../db/api')
const metrics = require('./metrics')

const log = logger.createLogger('metrics-collector')

class MetricsCollector {
constructor(options = {}) {
this.interval = options.interval || 30000 // 30 seconds default
this.timer = null
this.isRunning = false
}

start() {
if (!this.isRunning) {
log.info('Starting metrics collection with interval:', this.interval + 'ms')
this.isRunning = true
this.collectMetrics() // Collect immediately
this.timer = setInterval(() => this.collectMetrics(), this.interval)
}
}

stop() {
if (this.isRunning) {
log.info('Stopping metrics collection')
this.isRunning = false
if (this.timer) {
clearInterval(this.timer)
this.timer = null
}
}
}

async collectMetrics() {
try {
log.debug('Collecting metrics...')

const [
deviceData
, userData
, groupData
] = await Promise.all([
this.collectDeviceMetrics()
, this.collectUserMetrics()
, this.collectGroupMetrics()
])

// Update the metrics
metrics.updateDeviceMetrics(deviceData)
metrics.updateUserMetrics(userData)
metrics.updateGroupMetrics(groupData)
Comment on lines +54 to +57
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR introduces new aggregation/mapping logic for Prometheus metrics, but it isn’t covered by tests and the repo already has util-level tests (test/util/*). Adding unit tests around the collector/metric update behavior (e.g., status mapping, group active/ready/pending counts, devicesByStatus reset) would help prevent silent metric drift/regressions.

Copilot uses AI. Check for mistakes.

log.debug('Metrics collection completed')
}
catch (error) {
log.error('Error during metrics collection:', error)
}
}

async collectDeviceMetrics() {
try {
// Get device statistics from database using secure aggregation function
// This avoids access control bypass by not exposing individual device data
const deviceStats = await dbapi.getDeviceMetrics()
return deviceStats
}
catch (error) {
log.error('Error collecting device metrics:', error)
return {
total: 0
, usable: 0
, busy: 0
, providers: 0
, byStatus: {}
}
}
}

async collectUserMetrics() {
try {
// Get user statistics from database
const users = await dbapi.getUsers()

return {
total: users.length
Comment on lines +87 to +91
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collectUserMetrics() loads the full users table (dbapi.getUsers()) just to compute a count, which can become expensive as the user table grows. Prefer a DB-side count/aggregation (e.g., r.table('users').count()) to reduce memory and DB traffic.

Suggested change
// Get user statistics from database
const users = await dbapi.getUsers()
return {
total: users.length
// Get user statistics from database, preferring a DB-side count if available
let total
if (typeof dbapi.getUserCount === 'function') {
// Use optimized aggregation function when provided by dbapi
total = await dbapi.getUserCount()
}
else {
// Fallback to fetching users and counting in memory
const users = await dbapi.getUsers()
total = Array.isArray(users) ? users.length : 0
}
return {
total: total

Copilot uses AI. Check for mistakes.
}
}
catch (error) {
log.error('Error collecting user metrics:', error)
return {
total: 0
}
}
}

async collectGroupMetrics() {
try {
// Get group statistics from database
const groups = await dbapi.getGroups()

const groupStats = {
total: groups.length
, active: groups.filter(g => g.state === 'active').length
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collectGroupMetrics() counts “active” groups via g.state === 'active', but groups use an isActive boolean and state values like 'ready'/'pending'/'waiting'. This will report 0 active groups and mislead dashboards. Count active via g.isActive (and decide how to interpret state vs isActive for the other buckets).

Suggested change
, active: groups.filter(g => g.state === 'active').length
, active: groups.filter(g => g.isActive).length

Copilot uses AI. Check for mistakes.
, ready: groups.filter(g => g.state === 'ready').length
, pending: groups.filter(g => g.state === 'pending').length
}

Comment on lines +106 to +113
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collectGroupMetrics() similarly loads the entire groups table via dbapi.getGroups() just to compute counts. Consider doing DB-side aggregations (count, group-by state/isActive) to avoid scanning and transferring all group rows every interval.

Suggested change
const groupStats = {
total: groups.length
, active: groups.filter(g => g.state === 'active').length
, ready: groups.filter(g => g.state === 'ready').length
, pending: groups.filter(g => g.state === 'pending').length
}
const groupStats = {
total: groups.length
, active: 0
, ready: 0
, pending: 0
}
for (const g of groups) {
if (!g || typeof g.state !== 'string') {
continue
}
switch (g.state) {
case 'active':
groupStats.active++
break
case 'ready':
groupStats.ready++
break
case 'pending':
groupStats.pending++
break
}
}

Copilot uses AI. Check for mistakes.
return groupStats
}
catch (error) {
log.error('Error collecting group metrics:', error)
return {
total: 0
, active: 0
, ready: 0
, pending: 0
}
}
}

// Method to collect quota metrics for a specific user
async collectUserQuotaMetrics(user) {
try {
// This would depend on how quotas are implemented in STF
// For now, return placeholder data
const quotaTypes = ['devices', 'duration']

quotaTypes.forEach(quotaType => {
// Example: Get quota usage from database
const consumed = 0 // Would be actual consumed amount
const allocated = 10 // Would be actual allocated amount

metrics.updateUserQuota(user, quotaType, consumed, allocated)
})
}
catch (error) {
log.error('Error collecting user quota metrics:', error)
}
}
}

module.exports = MetricsCollector
Loading
Loading