Browse Source

ProcessCollector: initialize on first use

This patch changes how the ProcessCollector is initialized. Previously
some values where read from /proc filesystem in the constructor but
since typically a collector is defined as a global variable this results
in values beeing cached from precompilation. With this patch the
collector is initialized on first use (first `Prometheus.collect!`)
instead.
pull/14/head
Fredrik Ekre 2 years ago
parent
commit
78379253b0
  1. 4
      CHANGELOG.md
  2. 118
      src/process_collector.jl

4
CHANGELOG.md

@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `Base.getindex` is overloaded for the `Prometheus.Family` collector to have the same - `Base.getindex` is overloaded for the `Prometheus.Family` collector to have the same
meaning as `Prometheus.labels`. `family[labels]` is equivalent to meaning as `Prometheus.labels`. `family[labels]` is equivalent to
`Prometheus.labels(family, labels)`. ([#13][github-13]) `Prometheus.labels(family, labels)`. ([#13][github-13])
### Fixed
- The `ProcessCollector` is now initialized on first use in a given process. This fixes a
bug where values cached during precompilation (e.g. system boot time) would be used
instead of the current values. ([#14][github-14])
## [1.2.0] - 2023-11-22 ## [1.2.0] - 2023-11-22
### Added ### Added

118
src/process_collector.jl

@ -4,48 +4,15 @@
mutable struct ProcessCollector <: Collector mutable struct ProcessCollector <: Collector
@const pid::Function @const pid::Function
@const system_boot_time::Int @atomic initialized::Ptr{Nothing}
@const clock_ticks_per_second::Int @atomic system_boot_time::Int
@const pagesize::Int @atomic clock_ticks_per_second::Int
@atomic pagesize::Int
function ProcessCollector( function ProcessCollector(
pid::Function = () -> "self"; pid::Function = () -> "self";
registry::Union{CollectorRegistry, Nothing}=DEFAULT_REGISTRY, registry::Union{CollectorRegistry, Nothing}=DEFAULT_REGISTRY,
) )
# Read boot time as a way to check if /proc is available and readable procc = new(pid, C_NULL, 0, 0, 0)
system_boot_time = 0
try
proc_stat = read("/proc/stat", String)
m = match(r"^btime\s+(\d+)"m, proc_stat)::RegexMatch
system_boot_time = parse(Int, m.captures[1]::AbstractString)
catch e
@debug "ProcessCollector: /proc is not available or not readable, disabling." e
end
# Fetch clock ticks per second
clock_ticks_per_second = 0
try
cmd = pipeline(`getconf CLK_TCK`, stderr=devnull)
str = read(cmd, String)
clock_ticks_per_second = parse(Int, strip(str))
catch e
if system_boot_time > 0
@debug "ProcessCollector: /proc is available but could not read " *
"CLK_TCK from getconf, partially disabling." e
end
end
# Fetch pagesize
pagesize = 0
try
cmd = pipeline(`getconf PAGESIZE`, stderr=devnull)
str = read(cmd, String)
pagesize = parse(Int, strip(str))
catch e
if system_boot_time > 0
@debug "ProcessCollector: /proc is available but could not read " *
"PAGESIZE from getconf, partially disabling." e
end
end
# Create the collector
procc = new(pid, system_boot_time, clock_ticks_per_second, pagesize)
if registry !== nothing if registry !== nothing
register(registry, procc) register(registry, procc)
end end
@ -53,6 +20,58 @@ mutable struct ProcessCollector <: Collector
end end
end end
# Initialize the ProcessCollector on first use in a given process. This is necessary because
# typically collectors are defined as global variables which may have been cached during
# precompilation. The struct field initialized::Ptr is used to detect this: if it is NULL,
# then either the collector was constructed in this session (since it is set to null in the
# inner constructor), or it was deserialized from a cache file (since pointers are zeroed in
# the precompilation serialize/deserialize process). Important to note is that this property
# holds even if the collector was initialized in the process that output the serialized
# file. This would not be hold for e.g. a initialized::Bool field.
function initialize_process_collector(procc::ProcessCollector)
if procc.initialized !== C_NULL
return
end
system_boot_time = 0
try
proc_stat = read("/proc/stat", String)
m = match(r"^btime\s+(\d+)"m, proc_stat)::RegexMatch
system_boot_time = parse(Int, m.captures[1]::AbstractString)
catch e
@debug "ProcessCollector: /proc is not available or not readable, disabling." e
end
# Fetch clock ticks per second
clock_ticks_per_second = 0
try
cmd = pipeline(`getconf CLK_TCK`, stderr=devnull)
str = read(cmd, String)
clock_ticks_per_second = parse(Int, strip(str))
catch e
if system_boot_time > 0
@debug "ProcessCollector: /proc is available but could not read " *
"CLK_TCK from getconf, partially disabling." e
end
end
# Fetch pagesize
pagesize = 0
try
cmd = pipeline(`getconf PAGESIZE`, stderr=devnull)
str = read(cmd, String)
pagesize = parse(Int, strip(str))
catch e
if system_boot_time > 0
@debug "ProcessCollector: /proc is available but could not read " *
"PAGESIZE from getconf, partially disabling." e
end
end
# Set the values and return
@atomic procc.system_boot_time = system_boot_time
@atomic procc.clock_ticks_per_second = clock_ticks_per_second
@atomic procc.pagesize = pagesize
@atomic procc.initialized = Ptr{Nothing}(0xdeadbeef % UInt)
return
end
""" """
Prometheus.ProcessCollector(pid; registry=DEFAULT_REGISTRY) Prometheus.ProcessCollector(pid; registry=DEFAULT_REGISTRY)
@ -92,7 +111,14 @@ function metric_names(::ProcessCollector)
end end
function collect!(metrics::Vector, procc::ProcessCollector) function collect!(metrics::Vector, procc::ProcessCollector)
# If we could not read /proc just return early initialize_process_collector(procc)
@assert procc.initialized !== C_NULL
# Unpack variables
system_boot_time = procc.system_boot_time
clock_ticks_per_second = procc.clock_ticks_per_second
pagesize = procc.pagesize
# If reading the system boot time from /proc/stat failed then that is used as an
# indicator for a missing or unreadable /proc fs so then return early
procc.system_boot_time == 0 && return metrics procc.system_boot_time == 0 && return metrics
# Fetch the pid # Fetch the pid
pid = try pid = try
@ -115,9 +141,9 @@ function collect!(metrics::Vector, procc::ProcessCollector)
if proc_stat !== nothing if proc_stat !== nothing
fields = split(split(proc_stat, ')')[end]) # This strips off the first two fields fields = split(split(proc_stat, ')')[end]) # This strips off the first two fields
# CPU time and start time requires clock_ticks_per_second # CPU time and start time requires clock_ticks_per_second
if procc.clock_ticks_per_second > 0 if clock_ticks_per_second > 0
utime = parse(Int, fields[14 - 2]) / procc.clock_ticks_per_second utime = parse(Int, fields[14 - 2]) / clock_ticks_per_second
stime = parse(Int, fields[15 - 2]) / procc.clock_ticks_per_second stime = parse(Int, fields[15 - 2]) / clock_ticks_per_second
label_names = LabelNames(("mode",)) label_names = LabelNames(("mode",))
proc_cpu_seconds = Metric( proc_cpu_seconds = Metric(
"counter", "process_cpu_seconds_total", "counter", "process_cpu_seconds_total",
@ -129,11 +155,11 @@ function collect!(metrics::Vector, procc::ProcessCollector)
) )
push!(metrics, proc_cpu_seconds) push!(metrics, proc_cpu_seconds)
# Process start time # Process start time
starttime = parse(Int, fields[22 - 2]) / procc.clock_ticks_per_second starttime = parse(Int, fields[22 - 2]) / clock_ticks_per_second
proc_start_time = Metric( proc_start_time = Metric(
"gauge", "process_start_time_seconds", "gauge", "process_start_time_seconds",
"Start time since unix epoch in seconds.", "Start time since unix epoch in seconds.",
Sample(nothing, nothing, nothing, starttime + procc.system_boot_time), Sample(nothing, nothing, nothing, starttime + system_boot_time),
) )
push!(metrics, proc_start_time) push!(metrics, proc_start_time)
end end
@ -144,13 +170,13 @@ function collect!(metrics::Vector, procc::ProcessCollector)
Sample(nothing, nothing, nothing, vsize), Sample(nothing, nothing, nothing, vsize),
) )
push!(metrics, proc_virtual_memory) push!(metrics, proc_virtual_memory)
if procc.pagesize > 0 if pagesize > 0
# Resident memory # Resident memory
rss = parse(Int, fields[24 - 2]) rss = parse(Int, fields[24 - 2])
proc_resident_memory = Metric( proc_resident_memory = Metric(
"gauge", "process_resident_memory_bytes", "gauge", "process_resident_memory_bytes",
"Resident memory size (RSS) in bytes.", "Resident memory size (RSS) in bytes.",
Sample(nothing, nothing, nothing, rss * procc.pagesize), Sample(nothing, nothing, nothing, rss * pagesize),
) )
push!(metrics, proc_resident_memory) push!(metrics, proc_resident_memory)
end end

Loading…
Cancel
Save