3 * UCD SNMP module for systemStats section of UCD-SNMP-MIB for SunOS/Solaris
4 * Jochen Kmietsch <kmietsch@jochen.de>
5 * with fixes and additions from the UCD-SNMP community
6 * Uses some ideas from xosview and top
7 * Some comments paraphrased from the SUN man pages
8 * Version 0.1 initial release (Dec 1999)
9 * Version 0.2 added support for multiprocessor machines (Jan 2000)
10 * Version 0.3 some reliability enhancements and compile time fixes (Feb 2000)
11 * Version 0.4 portability issue and raw cpu value support (Jun 2000)
12 * Version 0.5 64-bit Solaris support and new data gathering routine (Aug 2000)
13 * Version 0.6 Memory savings, overroll precautions and lint checks (Aug 2000)
14 * Version 0.7 More raw counters and some cosmetic changes (Jan 2001)
19 * To make lint skip the debug code and stop complaining
22 #define SNMP_NO_DEBUGGING 1
34 #include <sys/types.h>
39 * kstat and sysinfo structs
42 #include <sys/sysinfo.h>
44 #include <net-snmp/net-snmp-config.h>
45 #include <net-snmp/net-snmp-includes.h>
46 #include <net-snmp/agent/net-snmp-agent-includes.h>
49 #include "util_funcs.h"
52 * Header file for this module
55 #include "vmstat_solaris2.h"
63 * Global structures start here
67 * A structure to save data gathered from the kernel kstat interface to.
70 * We used to have the sys/sysinfo.h cpu_stat_t here but we did not need
73 * all of it, some in a different size and some additional ones so we build
78 struct cpu_stat_snapshot {
80 unsigned int css_cpus;
81 unsigned long long css_swapin;
82 unsigned long long css_swapout;
83 unsigned long long css_blocks_read;
84 unsigned long long css_blocks_write;
85 unsigned long long css_interrupts;
86 unsigned long long css_context_sw;
87 unsigned long long css_cpu[CPU_STATES];
91 * Global structures end here
96 * Global variables start here
103 * Provides access to the kernel statistics library by
106 * initializing a kstat control structure and returning a pointer
109 * to this structure. This pointer must be used as the kc argument in
112 * following function calls from libkstat (here kc is called kstat_fd).
115 * Pointer to structure to be opened with kstat_open in main procedure.
118 * We share this one with memory_solaris2 and kernel_sunos5, where it's
123 extern kstat_ctl_t *kstat_fd;
126 * Variables for the calculated values, filled in update_stats
129 * Need to be global since we need them in more than one function
132 static ulong swapout;
133 static ulong blocks_read;
134 static ulong blocks_write;
135 static ulong interrupts;
136 static ulong context_sw;
139 * Since MIB wants CPU_SYSTEM, which is CPU_KERNEL + CPU_WAIT
141 static long cpu_perc[CPU_STATES + 1];
144 * How many snapshots we have already taken, needed for the first
147 * POLL_INTERVAL * POLL_VALUES seconds of agent running
149 static unsigned int number_of_snapshots;
152 * The place to store the snapshots of system data in
154 static struct cpu_stat_snapshot snapshot[POLL_VALUES + 1];
157 * And one for the raw counters, which we fill when the raw values are
160 * requested, as opposed to the absolute values, which are taken every
163 * POLL_INTERVAL seconds and calculated over POLL_INTERVAL * POLL_VALUES time
165 static struct cpu_stat_snapshot raw_values;
168 * Global variables end here
173 * Functions start here
179 static void update_stats(unsigned int registrationNumber,
181 static int take_snapshot(struct cpu_stat_snapshot *css);
184 * init_vmstat_solaris2 starts here
187 * Init function for this module, from prototype
190 * Defines variables handled by this module, defines root OID for
193 * this module and registers it with the agent
196 FindVarMethod var_extensible_vmstat;
199 init_vmstat_solaris2(void)
203 * Which variables do we service ?
205 struct variable2 extensible_vmstat_variables[] = {
206 {MIBINDEX, ASN_INTEGER, RONLY, var_extensible_vmstat, 1,
208 {ERRORNAME, ASN_OCTET_STR, RONLY, var_extensible_vmstat, 1,
210 {SWAPIN, ASN_INTEGER, RONLY, var_extensible_vmstat, 1, {SWAPIN}},
211 {SWAPOUT, ASN_INTEGER, RONLY, var_extensible_vmstat, 1, {SWAPOUT}},
212 {IOSENT, ASN_INTEGER, RONLY, var_extensible_vmstat, 1, {IOSENT}},
213 {IORECEIVE, ASN_INTEGER, RONLY, var_extensible_vmstat, 1,
215 {SYSINTERRUPTS, ASN_INTEGER, RONLY, var_extensible_vmstat, 1,
217 {SYSCONTEXT, ASN_INTEGER, RONLY, var_extensible_vmstat, 1,
219 {CPUUSER, ASN_INTEGER, RONLY, var_extensible_vmstat, 1, {CPUUSER}},
220 {CPUSYSTEM, ASN_INTEGER, RONLY, var_extensible_vmstat, 1,
222 {CPUIDLE, ASN_INTEGER, RONLY, var_extensible_vmstat, 1, {CPUIDLE}},
223 {CPURAWUSER, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
225 {CPURAWSYSTEM, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
227 {CPURAWIDLE, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
229 {CPURAWWAIT, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
231 {CPURAWKERNEL, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
233 {IORAWSENT, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
235 {IORAWRECEIVE, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
237 {SYSRAWINTERRUPTS, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
239 {SYSRAWCONTEXT, ASN_COUNTER, RONLY, var_extensible_vmstat, 1,
245 * {ERRORFLAG, ASN_INTEGER, RONLY, var_extensible_vmstat, 1, {ERRORFLAG }},
246 * {ERRORMSG, ASN_OCTET_STR, RONLY, var_extensible_vmstat, 1, {ERRORMSG }}
251 * Define the OID pointer to the top of the mib tree that we're
254 * registering underneath
256 oid vmstat_variables_oid[] = { UCDAVIS_MIB, 11 };
259 * register ourselves with the agent to handle our mib tree
262 * LINTED Trust me, I know what I'm doing
264 REGISTER_MIB("ucd-snmp/vmstat", extensible_vmstat_variables, variable2,
265 vmstat_variables_oid);
268 * First check whether shared kstat contol is NULL, if so, try to open our
273 if (kstat_fd == NULL) {
274 kstat_fd = kstat_open();
278 * Then check whether either shared kstat was found or we succeeded in
283 if (kstat_fd == NULL) {
285 "vmstat_solaris2 (init): kstat_open() failed and no shared kstat control found.\n");
289 * Start with some useful data
291 update_stats(0, NULL);
294 * update_stats is run every POLL_INTERVAL seconds using this routine
297 * (see 'man snmp_alarm')
300 * This is only executed once to get some useful data in the beginning
302 if (snmp_alarm_register(5, NULL, update_stats, NULL) == 0) {
303 snmp_log(LOG_WARNING,
304 "vmstat_solaris2 (init): snmp_alarm_register failed.\n");
307 * This is the one that runs update_stats every POLL_INTERVAL seconds
309 if (snmp_alarm_register(POLL_INTERVAL, SA_REPEAT, update_stats, NULL)
312 "vmstat_solaris2 (init): snmp_alarm_register failed, cannot service requests.\n");
315 } /* init_vmstat_solaris2 ends here */
318 * Data collection function take_snapshot starts here
321 * Get data from kernel and save into the snapshot strutcs
324 * Argument is the snapshot struct to save to. Global anyway, but looks nicer
327 take_snapshot(struct cpu_stat_snapshot *css)
330 * Variables start here
334 * From sys/kstat.h (included from kstat.h):
337 * Pointer to current kstat
344 unsigned int cpu_num = 0;
347 * High resolution time counter
349 hrtime_t current_time;
352 * see sys/sysinfo.h, holds CPU data
357 * The usual stuff to count on, err, by
366 * Function starts here
372 current_time = gethrtime();
375 * If we have just gotten the data, return the values from last run (skip if-clause)
378 * This happens on a snmpwalk request. No need to read the kstat again
381 * if we just did it less than 2 seconds ago
384 * Jumps into if-clause either when snapshot is empty or when too old
387 if ((css->css_time == 0)
388 || (current_time > css->css_time + 2000000000)) {
390 * Make sure we clean up before we put new data into snapshot
392 memset(css, 0, sizeof *css);
397 css->css_time = current_time;
400 * Look thru all the cpu slots on the machine whether they holds a CPU
403 * and if so, get the data from that CPU
406 * We walk through the whole kstat chain and sum up all the found cpu_stat kstats,
409 * there's one for every CPU in a machine
411 for (ksp = kstat_fd->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
413 * If we encounter an invalid kstat, skip it and continue with next one
415 if (ksp->ks_flags & KSTAT_FLAG_INVALID) {
419 if (strcmp(ksp->ks_module, "cpu_stat") == 0) {
421 * Yeah, we found a CPU.
426 * Read data from kstat into cs structure
429 * kstat_fd is the control structure, ksp the kstat we are reading
432 * and cs the buffer we are writing to.
434 if ((ksp->ks_type != KSTAT_TYPE_RAW) ||
435 (ksp->ks_data_size != sizeof cs) ||
436 (kstat_read(kstat_fd, ksp, &cs) == -1)) {
438 "vmstat_solaris2 (take_snapshot): could not read cs structure.\n");
443 * Get the data from the cs structure and sum it up in our own structure
446 (unsigned long long) cs.cpu_vminfo.swapin;
448 (unsigned long long) cs.cpu_vminfo.swapout;
449 css->css_blocks_read +=
450 (unsigned long long) cs.cpu_sysinfo.bread;
451 css->css_blocks_write +=
452 (unsigned long long) cs.cpu_sysinfo.bwrite;
453 css->css_interrupts +=
454 (unsigned long long) cs.cpu_sysinfo.intr;
455 css->css_context_sw +=
456 (unsigned long long) cs.cpu_sysinfo.pswitch;
459 * We need a for-loop for the CPU values
461 for (i = 0; i < CPU_STATES; i++) {
463 (unsigned long long) cs.cpu_sysinfo.cpu[i];
469 * Increment number of CPUs we gathered data from, for future use
471 css->css_cpus = cpu_num;
475 * All engines running at warp speed, no problems (if there are any engines, that is)
477 return (cpu_num > 0 ? 0 : -1);
478 } /* take_snapshot ends here */
481 * This gets called every POLL_INTERVAL seconds to update the snapshots.
482 * It takes a new snapshot and drops the oldest one. This way we move
483 * the time window so we always take the values over
484 * POLL_INTERVAL * POLL_VALUES seconds and update the data used every
485 * POLL_INTERVAL seconds
486 * The alarm timer is in the init function of this module (snmp_alarm_register)
492 update_stats(unsigned int registrationNumber, void *clientarg)
495 * The time between the samples we compare
500 * Easier to use these than the snapshots, short hand pointers
502 struct cpu_stat_snapshot *css_old, *css_new;
505 * The usual stuff to count on, err, by
510 * Kstat chain id, to check whether kstat chain changed
515 * The sum of the CPU ticks that have passed on the different CPU states, so we can calculate
518 * the percentages of each state
520 unsigned long long cpu_sum = 0;
522 DEBUGMSGTL(("ucd-snmp/vmstat_solaris2.c:update_stats",
523 "updating stats\n"));
526 * Just in case someone added (or removed) some CPUs during operation (or other kstat chain changes)
528 kid = kstat_chain_update(kstat_fd);
531 snmp_log(LOG_WARNING,
532 "vmstat_solaris2 (update_stats): Could not update kstat chain.\n");
535 * On some machines this floods the logfile, thus commented out
538 * snmp_log(LOG_INFO, "vmstat_solaris2 (update_stats): Kstat chain changed.");
544 * Take the current snapshot
546 if (take_snapshot(&snapshot[0]) == -1) {
547 snmp_log(LOG_WARNING,
548 "vmstat_solaris2 (update_stats): Something went wrong with take_snapshot.\n");
553 * Do we have some data we can use ? An issue right after the start of the agent
555 if (number_of_snapshots > 0) {
557 * Huh, the number of CPUs changed during run time. That is indeed s.th. worth noting, we
560 * output a humorous (more or less) syslog message and need to retake the snapshots
562 if (snapshot[0].css_cpus != snapshot[1].css_cpus) {
563 if (snapshot[0].css_cpus > snapshot[1].css_cpus) {
565 "vmstat_solaris2 (update_stats): Cool ! Number of CPUs increased, must be hot-pluggable.\n");
568 "vmstat_solaris2 (update_stats): Lost at least one CPU, RIP.\n");
571 * Make all snapshots but the current one invalid
573 number_of_snapshots = 1;
575 * Move the current one in the "first" [1] slot
577 memmove(&snapshot[1], &snapshot[0], sizeof snapshot[0]);
579 * Erase the current one
581 memset(&snapshot[0], 0, sizeof snapshot[0]);
583 * Try to get a new snapshot in five seconds so we can return s.th. useful
585 if (snmp_alarm_register(5, NULL, update_stats, NULL) == 0) {
586 snmp_log(LOG_WARNING,
587 "vmstat_solaris2 (update_stats): snmp_alarm_register failed.\n");
593 * Short hand pointers
595 css_new = &snapshot[0];
596 css_old = &snapshot[number_of_snapshots];
599 * How much time has passed between the snapshots we get the values from ?
602 * Time is in nanoseconds so a few zeros here to juggle with
605 * But the hrtime is not subject to change (s.b. setting the clock), unlike the normal time
608 (snapshot[0].css_time -
609 snapshot[number_of_snapshots].css_time) / 1000000;
611 DEBUGMSGTL(("ucd-snmp/vmstat_solaris2.c:update_stats",
612 "time_diff: %lld\n", time_diff));
615 * swapin and swapout are in pages, MIB wants kB/s,so we just need to get kB and seconds
618 * For the others we need to get value per second
621 * getpagesize() returns pagesize in bytes
624 * decided to use sysconf(_SC_PAGESIZE) instead to get around an #ifndef (I don't like those)
627 * that was needed b/c some old Solaris versions don't have getpagesize()
630 * LINTED cast needed, really
633 (uint_t) ((css_new->css_swapin -
634 css_old->css_swapin) * (hrtime_t) 1000 *
635 sysconf(_SC_PAGESIZE) / 1024 / time_diff);
637 * LINTED cast needed, really
640 (uint_t) ((css_new->css_swapout -
641 css_old->css_swapout) * (hrtime_t) 1000 *
642 sysconf(_SC_PAGESIZE) / 1024 / time_diff);
644 * LINTED cast needed, really
647 (uint_t) ((css_new->css_blocks_read -
648 css_old->css_blocks_read) * (hrtime_t) 1000 /
651 * LINTED cast needed, really
654 (uint_t) ((css_new->css_blocks_write -
655 css_old->css_blocks_write) * (hrtime_t) 1000 /
658 * LINTED cast needed, really
661 (uint_t) ((css_new->css_interrupts -
662 css_old->css_interrupts) * (hrtime_t) 1000 /
665 * LINTED cast needed, really
668 (uint_t) ((css_new->css_context_sw -
669 css_old->css_context_sw) * (hrtime_t) 1000 /
673 * Loop thru all the CPU_STATES and get the differences
675 for (i = 0; i < CPU_STATES; i++) {
676 cpu_sum += (css_new->css_cpu[i] - css_old->css_cpu[i]);
680 * Now calculate the absolute percentage values
683 * Looks somewhat complicated sometimes but tries to get around using floats to increase speed
685 for (i = 0; i < CPU_STATES; i++) {
687 * Since we don't return fractions we use + 0.5 to get between 99 and 101 percent adding the values
690 * together, otherwise we would get less than 100 most of the time
693 * LINTED has to be 'long'
696 (long) (((css_new->css_cpu[i] -
697 css_old->css_cpu[i]) * 100 +
698 (cpu_sum / 2)) / cpu_sum);
702 * As said before, MIB wants CPU_SYSTEM which is CPU_KERNEL + CPU_WAIT
705 * LINTED has to be 'long'
707 cpu_perc[CPU_SYSTEM] =
708 (long) ((((css_new->css_cpu[CPU_KERNEL] -
709 css_old->css_cpu[CPU_KERNEL])
710 + (css_new->css_cpu[CPU_WAIT] -
711 css_old->css_cpu[CPU_WAIT]))
712 * 100 + (cpu_sum / 2)) / cpu_sum);
716 * Make the current one the first one and move the whole thing one place down
718 memmove(&snapshot[1], &snapshot[0],
719 (size_t) (((char *) &snapshot[POLL_VALUES]) -
720 ((char *) &snapshot[0])));
723 * Erase the current one
725 memset(&snapshot[0], 0, sizeof snapshot[0]);
728 * Only important on start up, we keep track of how many snapshots we have taken so far
730 if (number_of_snapshots < POLL_VALUES) {
731 number_of_snapshots++;
733 } /* update_stats ends here */
736 * *var_extensible_vmstat starts here
739 * The guts of the module, this routine gets called to service a request
742 var_extensible_vmstat(struct variable *vp,
746 size_t * var_len, WriteMethod ** write_method)
749 * Needed for returning the values
751 static long long_ret;
752 static char errmsg[300];
755 * set to 0 as default
760 * generic check whether the options passed make sense and whether the
763 * right variable is requested
765 if (header_generic(vp, name, length, exact, var_len, write_method) !=
771 * The function that actually returns s.th.
776 return ((u_char *) (&long_ret));
777 case ERRORNAME: /* dummy name */
778 sprintf(errmsg, "systemStats");
779 *var_len = strlen(errmsg);
780 return ((u_char *) (errmsg));
782 return ((u_char *) (&swapin));
784 return ((u_char *) (&swapout));
786 return ((u_char *) (&blocks_write));
788 return ((u_char *) (&blocks_read));
790 return ((u_char *) (&interrupts));
792 return ((u_char *) (&context_sw));
794 return ((u_char *) (&cpu_perc[CPU_USER]));
796 return ((u_char *) (&cpu_perc[CPU_SYSTEM]));
798 return ((u_char *) (&cpu_perc[CPU_IDLE]));
800 take_snapshot(&raw_values);
802 * LINTED has to be 'long'
805 (long) (raw_values.css_cpu[CPU_USER] / raw_values.css_cpus);
806 return ((u_char *) (&long_ret));
808 * We are missing CPURAWNICE, Solaris does not account for this in the kernel so this OID can not
811 * be returned. Also, these values will roll over sooner or later and then return inaccurate data
814 * but the MIB wants Integer32 so we cannot put a counter here
817 * (Has been changed to Counter32 in the latest MIB version!)
820 take_snapshot(&raw_values);
822 * LINTED has to be 'long'
825 (long) ((raw_values.css_cpu[CPU_KERNEL] +
826 raw_values.css_cpu[CPU_WAIT]) / raw_values.css_cpus);
827 return ((u_char *) (&long_ret));
829 take_snapshot(&raw_values);
831 * LINTED has to be 'long'
834 (long) (raw_values.css_cpu[CPU_IDLE] / raw_values.css_cpus);
835 return ((u_char *) (&long_ret));
837 take_snapshot(&raw_values);
839 * LINTED has to be 'long'
842 (long) (raw_values.css_cpu[CPU_WAIT] / raw_values.css_cpus);
843 return ((u_char *) (&long_ret));
845 take_snapshot(&raw_values);
847 * LINTED has to be 'long'
850 (long) (raw_values.css_cpu[CPU_KERNEL] / raw_values.css_cpus);
851 return ((u_char *) (&long_ret));
853 long_ret = (long) (raw_values.css_blocks_write);
854 return ((u_char *) (&long_ret));
856 long_ret = (long) (raw_values.css_blocks_read);
857 return ((u_char *) (&long_ret));
858 case SYSRAWINTERRUPTS:
859 long_ret = (long) (raw_values.css_interrupts);
860 return ((u_char *) (&long_ret));
862 long_ret = (long) (raw_values.css_context_sw);
863 return ((u_char *) (&long_ret));
866 * reserved for future use
870 * return((u_char *) (&long_ret));
872 * return((u_char *) (&long_ret));
876 "vmstat_solaris2: Error in request, no match found.\n");
879 } /* *var_extensible_vmstat ends here */