Prometheus client for Julia
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

267 lines
11 KiB

# SPDX-License-Identifier: MIT
#################################
# ProcessCollector <: Collector #
#################################
mutable struct ProcessCollector <: Collector
@const pid::Function
@atomic initialized::Ptr{Nothing}
@atomic system_boot_time::Int
@atomic clock_ticks_per_second::Int
@atomic pagesize::Int
function ProcessCollector(
pid::Function = () -> "self";
registry::Union{CollectorRegistry, Nothing} = DEFAULT_REGISTRY,
)
procc = new(pid, C_NULL, 0, 0, 0)
if registry !== nothing
register(registry, procc)
end
return procc
end
end
# Initialize the ProcessCollector on first use in a given process. This is necessary because
# typically collectors are defined as global variables which may have been cached during
# precompilation. The struct field initialized::Ptr is used to detect this: if it is NULL,
# then either the collector was constructed in this session (since it is set to null in the
# inner constructor), or it was deserialized from a cache file (since pointers are zeroed in
# the precompilation serialize/deserialize process). Important to note is that this property
# holds even if the collector was initialized in the process that output the serialized
# file. This would not be hold for e.g. a initialized::Bool field.
function initialize_process_collector(procc::ProcessCollector)
if procc.initialized !== C_NULL
return
end
system_boot_time = 0
try
proc_stat = read("/proc/stat", String)
m = match(r"^btime\s+(\d+)"m, proc_stat)::RegexMatch
system_boot_time = parse(Int, m.captures[1]::AbstractString)
catch e
@debug "ProcessCollector: /proc is not available or not readable, disabling." e
end
# Fetch clock ticks per second
clock_ticks_per_second = 0
try
cmd = pipeline(`getconf CLK_TCK`, stderr = devnull)
str = read(cmd, String)
clock_ticks_per_second = parse(Int, strip(str))
catch e
if system_boot_time > 0
@debug "ProcessCollector: /proc is available but could not read " *
"CLK_TCK from getconf, partially disabling." e
end
end
# Fetch pagesize
pagesize = 0
try
cmd = pipeline(`getconf PAGESIZE`, stderr = devnull)
str = read(cmd, String)
pagesize = parse(Int, strip(str))
catch e
if system_boot_time > 0
@debug "ProcessCollector: /proc is available but could not read " *
"PAGESIZE from getconf, partially disabling." e
end
end
# Set the values and return
@atomic procc.system_boot_time = system_boot_time
@atomic procc.clock_ticks_per_second = clock_ticks_per_second
@atomic procc.pagesize = pagesize
@atomic procc.initialized = Ptr{Nothing}(0xdeadbeef % UInt)
return
end
"""
Prometheus.ProcessCollector(pid; registry=DEFAULT_REGISTRY)
Create a process collector for the process id given by the `pid` function. The collector
exposes metrics about the process' CPU time, start time, memory usage, file usage, and I/O
operations.
**Arguments**
- `pid :: Function`: a function returning a process id as a string or integer for which to
collect metrics. By default the `"self"` pid is used, i.e. the current process.
**Keyword arguments**
- `registry :: Prometheus.CollectorRegistry`: the registry in which to register the
collector. The default registry is used by default. Pass `registry = nothing` to skip
registration.
!!! note
A `ProcessCollector` for the current process is registered automatically with the
default registry. If necessary it can be removed by calling
```julia
Prometheus.unregister(Prometheus.DEFAULT_REGISTRY, Prometheus.PROCESS_COLLECTOR)
```
!!! note
The process collector is currently only available on Linux since it requires the `/proc`
file system. On Windows and macOS this collector will not expose any metrics.
"""
ProcessCollector(::Function; kwargs...)
function metric_names(::ProcessCollector)
return (
"process_cpu_seconds_total", "process_start_time_seconds",
"process_virtual_memory_bytes", "process_resident_memory_bytes", "process_open_fds",
"process_io_rchar_bytes_total", "process_io_wchar_bytes_total",
"process_io_syscr_total", "process_io_syscw_total", "process_io_read_bytes_total",
"process_io_write_bytes_total",
)
end
function collect!(metrics::Vector, procc::ProcessCollector)
initialize_process_collector(procc)
@assert procc.initialized !== C_NULL
# Unpack variables
system_boot_time = procc.system_boot_time
clock_ticks_per_second = procc.clock_ticks_per_second
pagesize = procc.pagesize
# If reading the system boot time from /proc/stat failed then that is used as an
# indicator for a missing or unreadable /proc fs so then return early
procc.system_boot_time == 0 && return metrics
# Fetch the pid
pid = try
String(strip(string(procc.pid()::Union{AbstractString, Integer})))::String
catch e
@error "ProcessCollector: could not look up the pid from the lambda" e
return metrics
end
if isempty(pid) || !isdir("/proc/$(pid)")
@error "ProcessCollector: invalid pid '$(pid)' from lamba: /proc/$(pid)/ does not exist"
return metrics
end
# Read /proc/$(pid)/stat
proc_stat = nothing
try
proc_stat = read("/proc/$(pid)/stat", String)
catch e
@error "ProcessCollector: could not read /proc/$(pid)/stat" e
end
if proc_stat !== nothing
fields = split(split(proc_stat, ')')[end]) # This strips off the first two fields
# CPU time and start time requires clock_ticks_per_second
if clock_ticks_per_second > 0
utime = parse(Int, fields[14 - 2]) / clock_ticks_per_second
stime = parse(Int, fields[15 - 2]) / clock_ticks_per_second
label_names = LabelNames(("mode",))
proc_cpu_seconds = Metric(
"counter", "process_cpu_seconds_total",
"Total CPU time (user and system mode) in seconds.",
[
Sample(nothing, label_names, LabelValues(("system",)), stime),
Sample(nothing, label_names, LabelValues(("user",)), utime),
],
)
push!(metrics, proc_cpu_seconds)
# Process start time
starttime = parse(Int, fields[22 - 2]) / clock_ticks_per_second
proc_start_time = Metric(
"gauge", "process_start_time_seconds",
"Start time since unix epoch in seconds.",
Sample(nothing, nothing, nothing, starttime + system_boot_time),
)
push!(metrics, proc_start_time)
end
# Virtual memory
vsize = parse(Int, fields[23 - 2])
proc_virtual_memory = Metric(
"gauge", "process_virtual_memory_bytes", "Virtual memory size in bytes.",
Sample(nothing, nothing, nothing, vsize),
)
push!(metrics, proc_virtual_memory)
if pagesize > 0
# Resident memory
rss = parse(Int, fields[24 - 2])
proc_resident_memory = Metric(
"gauge", "process_resident_memory_bytes",
"Resident memory size (RSS) in bytes.",
Sample(nothing, nothing, nothing, rss * pagesize),
)
push!(metrics, proc_resident_memory)
end
end
# Read /proc/$(pid)/fds
proc_fd = nothing
try
proc_fd = length(readdir("/proc/$(pid)/fd"))
catch e
@error "ProcessCollector: could not read /proc/$(pid)/fd" e
end
if proc_fd !== nothing
# Open file descriptors
proc_open_fds = Metric(
"gauge", "process_open_fds",
"Number of open file descriptors.",
Sample(nothing, nothing, nothing, proc_fd),
)
push!(metrics, proc_open_fds)
# TODO: Maybe add maximum open fds from /proc/$(pid)/limits like the Python client
end
# Read /proc/$(pid)/io
proc_io = nothing
try
proc_io = read("/proc/$(pid)/io", String)
catch e
@error "ProcessCollector: could not read /proc/$(pid)/io" e
end
if proc_io !== nothing
rchar = match(r"rchar:\s+(\d+)", proc_io)
if rchar !== nothing
proc_io_rchar = Metric(
"counter", "process_io_rchar_bytes_total",
"Total number of bytes read in bytes (rchar from /proc/[pid]/io).",
Sample(nothing, nothing, nothing, parse(Int, rchar.captures[1]::AbstractString)),
)
push!(metrics, proc_io_rchar)
end
wchar = match(r"wchar:\s+(\d+)", proc_io)
if wchar !== nothing
proc_io_wchar = Metric(
"counter", "process_io_wchar_bytes_total",
"Total number of bytes written in bytes (wchar from /proc/[pid]/io).",
Sample(nothing, nothing, nothing, parse(Int, wchar.captures[1]::AbstractString)),
)
push!(metrics, proc_io_wchar)
end
syscr = match(r"syscr:\s+(\d+)", proc_io)
if syscr !== nothing
proc_io_syscr = Metric(
"counter", "process_io_syscr_total",
"Total number of read I/O operations (syscalls) (syscr from /proc/[pid]/io).",
Sample(nothing, nothing, nothing, parse(Int, syscr.captures[1]::AbstractString)),
)
push!(metrics, proc_io_syscr)
end
syscw = match(r"syscw:\s+(\d+)", proc_io)
if syscw !== nothing
proc_io_syscw = Metric(
"counter", "process_io_syscw_total",
"Total number of write I/O operations (syscalls) (syscw from /proc/[pid]/io).",
Sample(nothing, nothing, nothing, parse(Int, syscw.captures[1]::AbstractString)),
)
push!(metrics, proc_io_syscw)
end
read_bytes = match(r"read_bytes:\s+(\d+)", proc_io)
if read_bytes !== nothing
proc_io_read_bytes = Metric(
"counter", "process_io_read_bytes_total",
"Total number of bytes read from the file system (read_bytes from /proc/[pid]/io).",
Sample(nothing, nothing, nothing, parse(Int, read_bytes.captures[1]::AbstractString)),
)
push!(metrics, proc_io_read_bytes)
end
write_bytes = match(r"write_bytes:\s+(\d+)", proc_io)
if write_bytes !== nothing
proc_io_write_bytes = Metric(
"counter", "process_io_write_bytes_total",
"Total number of bytes written to the file system (write_bytes from /proc/[pid]/io).",
Sample(nothing, nothing, nothing, parse(Int, write_bytes.captures[1]::AbstractString)),
)
push!(metrics, proc_io_write_bytes)
end
end
return metrics
end