diff -ur nagios-2.9.orig/base/freshness.c nagios-2.9/base/freshness.c
--- nagios-2.9.orig/base/freshness.c 2007-09-26 09:41:32.000000000 +0100
+++ nagios-2.9/base/freshness.c 2007-09-26 09:45:12.000000000 +0100
@@ -28,8 +28,10 @@
extern int interval_length;
extern int additional_freshness_latency;
+extern int max_service_check_spread;
+extern int max_host_check_spread;
-extern time_t program_start;
+extern time_t monitoring_start;
int calculate_service_freshness_threshold(service *svc){
int temp_threshold;
@@ -46,8 +46,9 @@
* Sets an arbitrary 1 hour threshold on the check interval of services that are pushed because
* if the check_interval is a day and a reload occurs at least once a day, the service would never
* go stale. A reload every hour is unlikely */
- if(svc->has_been_checked==TRUE && program_start>svc->last_check && svc->check_interval*interval_length<=3600)
- temp_threshold=temp_threshold+(int)(program_start-svc->last_check);
+ /* Altinity patch: Push a bit further out by the max_service_check_spread */
+ if(svc->has_been_checked==TRUE && monitoring_start>svc->last_check && svc->check_interval*interval_length<=3600)
+ temp_threshold=temp_threshold+(int)(monitoring_start-svc->last_check)+(max_service_check_spread*interval_length);
}
else
temp_threshold=svc->freshness_threshold;
@@ -64,8 +65,9 @@
if(hst->freshness_threshold==0){
temp_threshold=(hst->check_interval*interval_length)+hst->latency+additional_freshness_latency;
- if(hst->has_been_checked==TRUE && program_start>hst->last_check)
- temp_threshold=temp_threshold+(int)(program_start-hst->last_check);
+ /* Altinity patch: Add extra period of max_host_check_spread */
+ if(hst->has_been_checked==TRUE && monitoring_start>hst->last_check)
+ temp_threshold=temp_threshold+(int)(monitoring_start-hst->last_check)+(max_host_check_spread*interval_length);
}
else
temp_threshold=hst->freshness_threshold;
diff -ur nagios-2.9.orig/base/nagios.c nagios-2.9/base/nagios.c
--- nagios-2.9.orig/base/nagios.c 2007-09-26 09:41:34.000000000 +0100
+++ nagios-2.9/base/nagios.c 2007-09-26 09:48:07.000000000 +0100
@@ -163,6 +163,7 @@
int currently_running_service_checks=0;
time_t program_start=0L;
+time_t monitoring_start=0L;
int nagios_pid=0;
int enable_notifications=TRUE;
int execute_service_checks=TRUE;
@@ -757,6 +758,16 @@
broker_program_state(NEBTYPE_PROCESS_EVENTLOOPSTART,NEBFLAG_NONE,NEBATTR_NONE,NULL);
#endif
+ /* get monitoring (re)start time and save as macro */
+ monitoring_start=time(NULL);
+ if(macro_x[MACRO_MONITORINGSTARTTIME]!=NULL)
+ free(macro_x[MACRO_MONITORINGSTARTTIME]);
+ macro_x[MACRO_MONITORINGSTARTTIME]=(char *)malloc(MAX_DATETIME_LENGTH);
+ if(macro_x[MACRO_MONITORINGSTARTTIME]!=NULL){
+ snprintf(macro_x[MACRO_MONITORINGSTARTTIME],MAX_DATETIME_LENGTH,"%lu",(unsigned long)monitoring_start);
+ macro_x[MACRO_MONITORINGSTARTTIME][MAX_DATETIME_LENGTH-1]='\x0';
+ }
+
/***** start monitoring all services *****/
/* (doesn't return until a restart or shutdown signal is encountered) */
event_execution_loop();
diff -ur nagios-2.9.orig/base/utils.c nagios-2.9/base/utils.c
--- nagios-2.9.orig/base/utils.c 2007-09-26 09:41:32.000000000 +0100
+++ nagios-2.9/base/utils.c 2007-09-26 09:49:29.000000000 +0100
@@ -2198,6 +2198,7 @@
case MACRO_HOSTPERFDATAFILE:
case MACRO_SERVICEPERFDATAFILE:
case MACRO_PROCESSSTARTTIME:
+ case MACRO_MONITORINGSTARTTIME:
break;
default:
if(macro_x[x]!=NULL){
@@ -2265,6 +2266,7 @@
case MACRO_HOSTPERFDATAFILE:
case MACRO_SERVICEPERFDATAFILE:
case MACRO_PROCESSSTARTTIME:
+ case MACRO_MONITORINGSTARTTIME:
if(macro_x[x]!=NULL){
free(macro_x[x]);
macro_x[x]=NULL;
@@ -2395,6 +2397,7 @@
add_macrox_name(MACRO_PROCESSSTARTTIME,"PROCESSSTARTTIME");
add_macrox_name(MACRO_HOSTCHECKTYPE,"HOSTCHECKTYPE");
add_macrox_name(MACRO_SERVICECHECKTYPE,"SERVICECHECKTYPE");
+ add_macrox_name(MACRO_MONITORINGSTARTTIME,"MONITORINGSTARTTIME");
#ifdef DEBUG0
printf("init_macrox_names() end\n");
diff -ur nagios-2.9.orig/html/docs/macros.html nagios-2.9/html/docs/macros.html
--- nagios-2.9.orig/html/docs/macros.html 2005-10-23 17:37:08.000000000 +0100
+++ nagios-2.9/html/docs/macros.html 2007-09-26 09:50:58.000000000 +0100
@@ -641,6 +641,11 @@
Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
+| $MONITORINGSTARTTIME$ |
+Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
+
+
+
| $ADMINEMAIL$ |
Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
@@ -1277,6 +1282,12 @@
+
+| $MONITORINGSTARTTIME$ |
+Time stamp in time_t format (seconds since the UNIX epoch) indicating when the Nagios process last (re)started monitoring. You can determine the number of seconds that Nagios has been monitoring (since it was last restarted) by subtracting $MONITORINGSTARTTIME$ from $TIMET$. The length of time Nagios takes to initialise can be deduced from '$MONITORINGSTARTTIME$ - $PROCESSSTARTTIME$'. |
+
+
+
| $ADMINEMAIL$ |
Global administrative email address. This value is taken from the admin_email directive. |
diff -ur nagios-2.9.orig/include/nagios.h.in nagios-2.9/include/nagios.h.in
--- nagios-2.9.orig/include/nagios.h.in 2007-09-26 09:41:32.000000000 +0100
+++ nagios-2.9/include/nagios.h.in 2007-09-26 09:51:23.000000000 +0100
@@ -49,7 +49,7 @@
#define MACRO_ENV_VAR_PREFIX "NAGIOS_"
-#define MACRO_X_COUNT 99 /* size of macro_x[] array */
+#define MACRO_X_COUNT 100 /* size of macro_x[] array */
#define MACRO_HOSTNAME 0
#define MACRO_HOSTALIAS 1
@@ -150,6 +150,7 @@
#define MACRO_PROCESSSTARTTIME 96
#define MACRO_HOSTCHECKTYPE 97
#define MACRO_SERVICECHECKTYPE 98
+#define MACRO_MONITORINGSTARTTIME 99
diff -ur nagios-2.9.orig/t/test_freshness.c nagios-2.9/t/test_freshness.c
--- nagios-2.9.orig/t/test_freshness.c 2007-09-26 09:36:25.000000000 +0100
+++ nagios-2.9/t/test_freshness.c 2007-09-26 09:55:16.000000000 +0100
@@ -26,7 +26,9 @@
int interval_length=60;
int additional_freshness_latency=15;
-time_t program_start=1234567890;
+time_t monitoring_start=1234567890;
+int max_host_check_spread=30;
+int max_service_check_spread=15;
service *create_base_test_service() {
service *temp_service;
@@ -82,8 +84,8 @@
service->state_type=SOFT_STATE; /* I don't think this is valid combination, but test anyway */
ok( calculate_service_freshness_threshold(service) == (int)((5*60)+53.7+15), "Soft OK state should have higher freshness threshold");
- program_start = (time_t)1234569111; /* Simulate a restart */
- ok( calculate_service_freshness_threshold(service) == (int)((5*60)+53.7+15+111), "Added extra allowance if program_start is greater than last_check to allow slaves more time to send status");
+ monitoring_start = (time_t)1234569111; /* Simulate a restart */
+ ok( calculate_service_freshness_threshold(service) == (int)((5*60)+53.7+15+111+(15*60)), "Added extra allowance if monitoring_start is greater than last_check to allow slaves more time to send status");
/* If check_interval is > hour, don't add that extra 111 seconds */
service->check_interval=61;
@@ -95,7 +97,7 @@
#ifdef DIAG
diag("Running service checks with modified additional_freshness_latency");
#endif
- program_start = (time_t)1234567890;
+ monitoring_start = (time_t)1234567890;
additional_freshness_latency=30;
service=create_base_test_service();
@@ -117,8 +119,8 @@
service->state_type=SOFT_STATE; /* I don't think this is valid combination, but test anyway */
ok( calculate_service_freshness_threshold(service) == (int)((5*60)+53.7+30), "Soft OK state should have higher freshness threshold");
- program_start = (time_t)1234569111; /* Simulate a restart */
- ok( calculate_service_freshness_threshold(service) == (int)((5*60)+53.7+30+111), "Added extra allowance if program_start is greater than last_check to allow slaves more time to send status");
+ monitoring_start = (time_t)1234569111; /* Simulate a restart */
+ ok( calculate_service_freshness_threshold(service) == (int)((5*60)+53.7+30+111+(15*60)), "Added extra allowance if monitoring_start is greater than last_check to allow slaves more time to send status");
/* If check_interval is > hour, don't add that extra 111 seconds */
service->check_interval=61;
@@ -129,7 +131,7 @@
#ifdef DIAG
diag("Running hosts checks");
#endif
- program_start = (time_t)1234567890;
+ monitoring_start = (time_t)1234567890;
additional_freshness_latency=15;
host=create_base_test_host();
@@ -144,15 +146,15 @@
host->last_check=(time_t)1234569000;
ok( calculate_host_freshness_threshold(host) == (int)((10*60)+82.9+15), "Freshness after a check result");
- program_start = (time_t)1234569111; /* Simulate a restart */
- ok( calculate_host_freshness_threshold(host) == (int)((10*60)+82.9+15+111), "Added extra allowance if program_start is greater than last_check to allow slaves more time to send status");
+ monitoring_start = (time_t)1234569111; /* Simulate a restart */
+ ok( calculate_host_freshness_threshold(host) == (int)((10*60)+82.9+15+111+(30*60)), "Added extra allowance if monitoring_start is greater than last_check to allow slaves more time to send status");
free(host);
#ifdef DIAG
diag("Running hosts checks with modified additional_freshness_latency");
#endif
- program_start = (time_t)1234567890;
+ monitoring_start = (time_t)1234567890;
additional_freshness_latency=30;
host=create_base_test_host();
@@ -167,8 +169,8 @@
host->last_check=(time_t)1234569000;
ok( calculate_host_freshness_threshold(host) == (int)((10*60)+82.9+30), "Freshness after a check result");
- program_start = (time_t)1234569111; /* Simulate a restart */
- ok( calculate_host_freshness_threshold(host) == (int)((10*60)+82.9+30+111), "Added extra allowance if program_start is greater than last_check to allow slaves more time to send status");
+ monitoring_start = (time_t)1234569111; /* Simulate a restart */
+ ok( calculate_host_freshness_threshold(host) == (int)((10*60)+82.9+30+111+(30*60)), "Added extra allowance if monitoring_start is greater than last_check to allow slaves more time to send status");
free(host);