IBM Tivoli Monitoring, Version 6.3 Fix Pack 2

Private situation examples

Define private situations for monitoring criteria that is pertinent to your local agent environment and not dependent on or relevant to the enterprise environment. These examples can be used as a template for your private situations.

Tip: Sample private situation configuration files are provided on the Tivoli® Monitoring Agent installation media in the PrivateConfigSamples directory.

Linux OS lz_situations.xml

<PRIVATECONFIGURATION>
<!--  Situation Description: Percentage of time the processor is busy 
is extremely high --> 
<PRIVATESIT>
  <SITUATION>Linux_High_CPU_Overload_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE Linux_CPU.Idle_CPU *LT 10  *AND  *VALUE Linux_CPU.CPU_ID 
   *EQ Aggregate ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
  </PRIVATESIT>
<!-- Situation Description: Percentage of packet collisions during data 
transmission is high --> 
<PRIVATESIT>
  <SITUATION>Linux_High_Packet_Collisons_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE Linux_Network.Collision_Percent *GT 10 ]]> 
  </CRITERIA>
  <INTERVAL>000500</INTERVAL> 
</PRIVATESIT>
<!-- Situation Description: Percentage of available i-nodes is low --> 
<PRIVATESIT>
  <SITUATION>Linux_Low_Pct_Inodes_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE Linux_Disk.Inodes_Used_Percent *GT 80 ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!-- Situation Description: Percentage of space available on a filesystem 
is low --> 
<PRIVATESIT>
  <SITUATION>Linux_Low_Pct_Space_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE Linux_Disk.Space_Available_Percent *LT 15 ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!-- Situation Description: Tests if the SSH Daemon, sshd, is up running --> 
<PRIVATESIT>
  <SITUATION>Linux_Process_Missing_sshd_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *IF *MISSING Linux_Process.Process_Command_Name 
   *EQ ("/usr/sbin/sshd") ]]> 
  </CRITERIA>
  <INTERVAL>001000</INTERVAL>
</PRIVATESIT>
<!-- Situation Description: Percentage of Processor time used by 
a process high --> 
<PRIVATESIT>
  <SITUATION>Linux_Process_High_CPU_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE Linux_Process.Busy_CPU_Pct *GT 60 ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!-- Situation Description: High number of stopped processes on this system -->
<PRIVATESIT>
  <SITUATION>Linux_Process_Stopped_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE Linux_Process.State *NE Running  *AND  
   *VALUE Linux_Process.State *NE Sleeping  *AND  
   *VALUE Linux_Process.State *NE Disk  *AND  
   *VALUE Linux_Process.State *NE Trace ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL>
</PRIVATESIT>
<!--  Situation Description: Percentage of rejected RPC server or 
client calls is high --> 
<PRIVATESIT>
  <SITUATION>Linux_RPC_Bad_Calls_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE Linux_RPC_Statistics.RPC_Client_Calls_Retransmitted *GT 30
   *OR  *VALUE Linux_RPC_Statistics.RPC_Server_Calls_Rejected *GT 30 ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: The swap space paging activity on this system 
is extremely high  --> 
<PRIVATESIT>
  <SITUATION>Linux_System_Thrashing_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE Linux_System_Statistics.Pages_paged_out_per_sec *GT 400  
   *OR  *VALUE Linux_System_Statistics.Pages_paged_in_per_sec *GT 400 ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
</PRIVATECONFIGURATION>

UNIX OS ux_situations.xml

<PRIVATECONFIGURATION>
<!--  Situation Description: Reports High CPU processes  --> 
<PRIVATESIT>
  <SITUATION>UNIX_CMD_Runaway_Process_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *IF *VALUE Process.CPU_Utilization *GT 95 ]]> 
  </CRITERIA>
  <INTERVAL>001000</INTERVAL>
</PRIVATESIT>
<!--  Situation Description: Process CPU utilization is greater than 
or equal to 85% --> 
<PRIVATESIT>
  <SITUATION>UNIX_CPU_Critical_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *IF *VALUE Process.CPU_Utilization *GE 85 *AND *VALUE 
   Process.Command *NE kproc *AND *VALUE Process.Command *NE swapper ]]> 
  </CRITERIA>
  <INTERVAL>001000</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Notes typical I/O bound processor (NFS) --> 
<PRIVATESIT>
  <SITUATION>UNIX_HD_Exces_IO_Wait_prv</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE System.Wait_I/O *GT 20  ]]> 
  </CRITERIA>
  <INTERVAL>000200</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Tests if the Internet Services Daemon, inetd, 
is up running --> 
<PRIVATESIT>
  <SITUATION>UNIX_Process_Missing_inetd_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *MISSING Process.Command *EQ  ("/usr/sbin/inetd") ]]> 
  </CRITERIA>
  <INTERVAL>001000</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Checks the System CPU, Idle, I/O Wait, 
and Load Averages for the Busy state  --> 
<PRIVATESIT>
  <SITUATION>UNIX_System_Busy_Warning_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE System.System_CPU *GT 50 *AND 
   *VALUE System.Idle_CPU *GT 0 *AND *VALUE System.Wait_I/O *GT 0 *AND 
   *VALUE System.Load_Average_5_Min *GT 1 ]]> 
  </CRITERIA>
  <INTERVAL>000200</INTERVAL>
</PRIVATESIT>
</PRIVATECONFIGURATION>

Windows OS nt_situations.xml

<PRIVATECONFIGURATION>
<!--  Situation Description: One of the NT Logs is close to capacity  --> 
<PRIVATESIT>
  <SITUATION>NT_Log_Space_Low_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE NT_Monitored_Logs_Report.%_Usage *GE 95 ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!-- Situation Description: Test if the NT Scheduler process is running --> 
<PRIVATESIT>
  <SITUATION>NT_Missing_Scheduler_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *MISSING NT_Process.Process_Name *EQ ("schedule") ]]> 
  </CRITERIA>
  <INTERVAL>001000</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Percent of the Page File in use is too high --> 
<PRIVATESIT>
  <SITUATION>NT_Paging_File_Critical_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE NT_Paging_File.%_Usage *GE 80 ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Percent of the Page File in use is rising  --> 
<PRIVATESIT>
  <SITUATION>NT_Paging_File_Warning_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE NT_Paging_File.%_Usage *GE 75 *AND 
   *VALUE NT_Paging_File.%_Usage *LT 80  ]]> 
  </CRITERIA>
  <INTERVAL>001000</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Percent of the time the disk drive is busy 
is too high --> 
<PRIVATESIT>
  <SITUATION>NT_Phys_Disk_Busy_Crit_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE NT_Physical_Disk.%_Disk_Time *GT 90 *AND 
   *VALUE NT_Physical_Disk.Disk_Name *NE _Total ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Percent of the time the disk drive is busy 
is rising  --> 
<PRIVATESIT>
  <SITUATION>NT_Phys_Disk_Busy_Warn_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE NT_Physical_Disk.%_Disk_Time *GT 80 *AND 
   *VALUE NT_Physical_Disk.%_Disk_Time *LE 90 *AND 
   *VALUE NT_Physical_Disk.Disk_Name *NE _Total  ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Percentage of processor time used is too high  --> 
<PRIVATESIT>
  <SITUATION>NT_Proc_CPU_Critical_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE NT_Process.%_Processor_Time *GE 65 *AND *VALUE 
   NT_Process.Priority_Base *NE 0 *AND *VALUE NT_Process.Process_Name 
   *NE _Total  ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Percentage of processor time used is high  --> 
<PRIVATESIT>
  <SITUATION>NT_Proc_CPU_Warn_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE NT_Process.%_Processor_Time *GE 50 *AND 
   *VALUE NT_Process.%_Processor_Time *LT 65 *AND 
   *VALUE NT_Process.Priority_Base *NE 0 *AND 
   *VALUE NT_Process.Process_Name *NE _Total  ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: A Service Error was reported  --> 
<PRIVATESIT>
  <SITUATION>NT_Service_Error_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE NT_Event_Log.Source *EQ "Service Control Manager" 
   *AND *VALUE NT_Event_Log.Type *EQ Error  ]]> 
  </CRITERIA>
  <INTERVAL>001000</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Rate of operations to file system devices 
per second is too high  --> 
<PRIVATESIT>
  <SITUATION>NT_System_File_Critical_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE NT_System.File_Data_Operations/Sec *GE 100000 ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Rate of operations to file system devices per second 
 is rising --> 
<PRIVATESIT>
  <SITUATION>NT_System_File_Warn_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE NT_System.File_Data_Operations/Sec *GE 10000 *AND 
   *VALUE NT_System.File_Data_Operations/Sec *LT 100000  ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
</PRIVATECONFIGURATION>

Tivoli Data Warehouse Summarization and Pruning sy_situations.xml

<PRIVATECONFIGURATION>
<!--  Situation Description: No connectivity to Warehouse database  --> 
<PRIVATESIT>
  <SITUATION>KSY_DB_Connectivity_Fail_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE KSY_CONNECTIVITY.DB_Connectivity *EQ No ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Failures occurred in pruning  --> 
<PRIVATESIT>
  <SITUATION>KSY_Pruning_Failures_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE KSY_SUMMARIZATION_STATISTICS.Pruning_Failures *GT 0 ]]> 
  </CRITERIA>
  <INTERVAL>000000</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Failures occurred in summarization  --> 
<PRIVATESIT>
  <SITUATION>KSY_Summ_Failures_pr</SITUATION> 
  <CRITERIA>
   <![CDATA[  *VALUE KSY_SUMMARIZATION_STATISTICS.Summarization_Failures 
   *GT 0  ]]> 
  </CRITERIA>
  <INTERVAL>000000</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: No connectivity to the 
Tivoli Enterprise Portal Server  --> 
<PRIVATESIT>
  <SITUATION>KSY_TEPS_Conn_Fail_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE KSY_CONNECTIVITY.TEPS_Connectivity *EQ No  ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
</PRIVATECONFIGURATION>

Tivoli Data Warehouse warehouse_situations.xml

<PRIVATECONFIGURATION>
<!--  Situation Description: No connectivity to warehouse database  --> 
<PRIVATESIT>
  <SITUATION>KHD_DB_Connectivity_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE KHD_DB_INFO.DB_Connectivity *EQ No ]]> 
  </CRITERIA>
  <INTERVAL>001500</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Critical errors during the execution 
of the Warehouse Proxy  --> 
<PRIVATESIT>
  <SITUATION>KHD_Error_Critical_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE KHD_LAST_ERROR_DETAILS.Error_Severity *EQ Critical ]]> 
  </CRITERIA>
  <INTERVAL>000000</INTERVAL> 
</PRIVATESIT>
<!--  Situation Description: Fatal errors during the execution 
of the Warehouse Proxy  --> 
<PRIVATESIT>
  <SITUATION>KHD_Error_Fatal_pr</SITUATION>
  <CRITERIA>
   <![CDATA[  *VALUE KHD_LAST_ERROR_DETAILS.Error_Severity *EQ Fatal ]]> 
  </CRITERIA>
  <INTERVAL>000000</INTERVAL> 
</PRIVATESIT>
</PRIVATECONFIGURATION>


Feedback