@@ -163,6 +163,7 @@ package check_postgres;
163163 ' listening' => q{ listening} ,
164164 ' locks-msg' => q{ total "$1" locks: $2} ,
165165 ' locks-msg2' => q{ total locks: $1} ,
166+ ' lockwait-msg' => q{ $1: $2($3) blocking $4($5) for $6 blocked statement "$7"} ,
166167 ' logfile-bad' => q{ Invalid logfile "$1"} ,
167168 ' logfile-debug' => q{ Final logfile: $1} ,
168169 ' logfile-dne' => q{ logfile $1 does not exist!} ,
@@ -1902,6 +1903,7 @@ package check_postgres;
19021903 last_autovacuum => [0, ' Check the maximum time in seconds since any one table has been autovacuumed.' ],
19031904 listener => [0, ' Checks for specific listeners.' ],
19041905 locks => [0, ' Checks the number of locks.' ],
1906+ lockwait => [0, ' Checks for blocking locks.' ],
19051907 logfile => [1, ' Checks that the logfile is being written to correctly.' ],
19061908 new_version_bc => [0, ' Checks if a newer version of Bucardo is available.' ],
19071909 new_version_box => [0, ' Checks if a newer version of boxinfo is available.' ],
@@ -2709,6 +2711,9 @@ sub finishup {
27092711# # Check number and type of locks
27102712check_locks() if $action eq ' locks' ;
27112713
2714+ # # Check lock wait
2715+ check_lockwait() if $action eq ' lockwait' ;
2716+
27122717# # Logfile is being written to
27132718check_logfile() if $action eq ' logfile' ;
27142719
@@ -6177,6 +6182,63 @@ sub check_locks {
61776182
61786183} # # end of check_locks
61796184
6185+ sub check_lockwait {
6186+
6187+ # # Check lock wait
6188+ # # By default, checks all databases
6189+ # # Can check specific databases with include
6190+ # # Can ignore databases with exclude
6191+ # # Warning and critical is time
6192+ # # Example: --warning='1 min' --critical='2 min'
6193+
6194+ my ($warning , $critical ) = validate_range
6195+ ({
6196+ type => ' time' ,
6197+ default_warning => ' 1 min' ,
6198+ default_critical => ' 2 min' ,
6199+ });
6200+
6201+ $SQL = qq{ SELECT a.datname AS datname,
6202+ bl.pid AS blocked_pid,
6203+ a.usename AS blocked_user,
6204+ ka.pid AS blocking_pid,
6205+ ka.usename AS blocking_user,
6206+ round(extract (epoch from current_timestamp - a.query_start)) AS waited_sec,
6207+ a.query AS blocked_statement
6208+ FROM pg_catalog.pg_locks bl
6209+ JOIN pg_catalog.pg_stat_activity a ON a.pid = bl.pid
6210+ JOIN pg_catalog.pg_stat_activity ka ON (ka.pid = ANY(pg_blocking_pids(bl.pid)))
6211+ WHERE NOT bl.granted
6212+ } ;
6213+ my $info = run_command($SQL , { regex => qr {\w } , emptyok => 1 });
6214+ my $n = 0;
6215+ for $db (@{$info -> {db }}) {
6216+ ROW: for my $r (@{$db -> {slurp }}) {
6217+ my ($dbname ,$blocked_pid ,$blocked_user ,$blocking_pid ,$blocking_user ,$waited_sec ,$blocked_statement )
6218+ = ($r -> {datname },$r -> {blocked_pid }, $r -> {blocked_user }, $r -> {blocking_pid },
6219+ $r -> {blocking_user },$r -> {waited_sec },$r -> {blocked_statement });
6220+
6221+ # # May be forcibly skipping this database via arguments
6222+ next ROW if skip_item($dbname );
6223+
6224+ my $msg = msg ' lockwait-msg' ,$dbname ,$blocking_user ,$blocking_pid ,$blocked_user ,$blocked_pid ,pretty_time($waited_sec ),$blocked_statement ;
6225+ if (length $critical and $waited_sec >= $critical ) {
6226+ add_critical $msg ;
6227+ }
6228+ elsif (length $warning and $waited_sec >= $warning ) {
6229+ add_warning $msg ;
6230+ }
6231+ else {
6232+ add_ok $msg ;
6233+ }
6234+ $n ++;
6235+ }
6236+ }
6237+ add_ok ' No blocking locks' if ($n ==0);
6238+ do_mrtg( {one => $n } ) if $MRTG ;
6239+ return ;
6240+
6241+ } # # end of check_lockwait
61806242
61816243sub check_logfile {
61826244
@@ -10493,6 +10555,22 @@ =head2 B<locks>
1049310555
1049410556For MRTG output, returns the number of locks on the first line, and the name of the database on the fourth line.
1049510557
10558+ =head2 B<lockwait >
10559+
10560+ (C<symlink: check_postgres_lockwait > ) Check if there are blocking blocks and for how long. There is no
10561+ need to run this more than once per database cluster. Databases can be filtered
10562+ with the I<--include > and I<--exclude > options. See the L</"BASIC FILTERING"> section
10563+ for more details.
10564+
10565+ The I<--warning > and I<--critical > options is time,
10566+ which represent the time for which the lock has been blocking.
10567+
10568+ Example 1: Warn if a lock has been blocking for more than a minute, critcal if for more than 2 minutes
10569+
10570+ check_postgres_lockwait --host=garrett --warning='1 min' --critical='2 min'
10571+
10572+ For MRTG output, returns the number of blocked sessions.
10573+
1049610574=head2 B<logfile >
1049710575
1049810576(C<symlink: check_postgres_logfile > ) Ensures that the logfile is in the expected location and is being logged to.
0 commit comments