Skip to content
Snippets Groups Projects
Commit c4f31f45 authored by Jörg Sachse's avatar Jörg Sachse
Browse files

feat: add more sources for information on corrupt DB blocks

parent 4b834cb2
No related branches found
No related tags found
No related merge requests found
Pipeline #2777 passed
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
# REQUIREMENTS: # REQUIREMENTS:
# - Bash 4 or higher # - Bash 4 or higher
# - Oracle 11 DB or higher with sqlplus installed # - Oracle 11 DB or higher with sqlplus & rman installed
scriptname=$( basename "${0}" ".sh" ) scriptname=$( basename "${0}" ".sh" )
LOCKFILE="/var/lock/${scriptname}.lock" LOCKFILE="/var/lock/${scriptname}.lock"
...@@ -16,12 +16,90 @@ status=3 ...@@ -16,12 +16,90 @@ status=3
itemname='Oracle corrupt table blocks' itemname='Oracle corrupt table blocks'
perf_values="-" perf_values="-"
# check if sqlplus is installed # check if sqlplus & rman is installed
BINARY="$(su oracle -c 'command -v sqlplus')" SQLP_BINARY="$(su oracle -c 'command -v sqlplus')"
[[ ${BINARY} ]] || exit 1 [[ ${SQLP_BINARY} ]] || exit 1
RMAN_BINARY="$(su oracle -c 'command -v rman')"
[[ ${RMAN_BINARY} ]] || exit 1
create_commandlist(){ DATABASE_SID=$( sudo /bin/su - oracle -c'echo $ORACLE_SID' )
/bin/su - oracle -c"${BINARY} -S / as sysdba <<-\"EOF\"
grep_for_errors() {
if [[ -n "${1}" ]]; then
local FILE="${1}"
else
echo "ERROR: You need to pass a logfile to grep_for_errors()." && exit 1
fi
# THE FOLLOWING ERRORS ARE DETECTED:
# - ORA-01210: data file header is media corrupt
# (https://docs.oracle.com/database/121/ERRMG/ORA-00910.htm#ERRMG-GUID-D9EBDFFA-88C6-4185-BD2C-E1B959A97274)
# - ORA-01578: ORACLE data block corrupted (file # string, block # string)
# (https://docs.oracle.com/database/121/ERRMG/ORA-01500.htm#ERRMG-GUID-65B2B9E5-7075-4D53-91B8-FCAECA0AEE0E)
# FEEL FREE TO ADD MORE ERRORS AS NECESSARY.
if grep -q "ORA-01210" "${FILE}" || \
grep -q "ORA-01578" "${FILE}"; then
ORACLE_OUTPUT=3
fi
}
# Runs 35 sec on Dev system as of 2022-09-15. That should be plenty fast.
# https://db.geeksinsight.com/2012/11/15/basics-corruptions-series-3-how-to-find-physical-corruptions-and-limitations-with-approach/
# All the limitations that apply to DBV are applicable to RMAN VALIDATE also:
# - DBV cannot understand any table / index rowcount mismatch.
# - DBV cannot understand any complex corruptions, especially corruptions
# below cache layer of a datablock.
# - Some blocks that may not be part of Oracle, they would have been dropped. But
# DBV will still report that block as corrupted. When you check with the
# query against dba_extents (given below) there wont be any rows returned.
# And this corrupt block will not affect normal database operations as
# Oracle is not going to read this block. But while performing RMAN backups
# we still report this block as corrupted.
# - Some types of corruptions that cannot be explored while we do an OS level
# read.
# So, this function is indeed fast, but at the cost of not finding as many
# possible corruptions.
# https://docs.oracle.com/en/database/oracle/oracle-database/19/rcmrf/BACKUP.html#GUID-73642FF2-43C5-48B2-9969-99001C52EB50
check_rman() {
/bin/su - oracle -c"echo 'BACKUP VALIDATE CHECK LOGICAL DATABASE;' | rman target / > /tmp/${scriptname}_rman.log"
}
create_sqlp_commandlist_view() {
/bin/su - oracle -c"cat <<-'EOF'
SET TERMOUT OFF
SET PAGESIZE 0
/* SET FEEDBACK OFF makes sure that we only get output if there's actual data. */
SET FEEDBACK OFF
SET VERIFY OFF
SET TRIMSPOOL ON
/* write temporary file with all the instructions */
SPOOL /tmp/${scriptname}.sqlview
/* Show the complete table contents. */
select * from v\$database_block_corruption;
/*
For each row, find the segment impacted.
This can only be used if you have the absolute file number and block_id that
you get in alert log or by other means. We can't just call it and get a
result willy nilly without extraction identifiers from alert logs first.
We'll comment that out and just leave it in for documentation.
*/
/*
SELECT TABLESPACE_NAME, SEGMENT_NAME, PARTITION_NAME, SEGMENT_TYPE
FROM DBA_EXTENTS
WHERE FILE_ID=&FILE_ID AND
&BLOCK_ID BETWEEN BLOCK_ID AND BLOCK_ID + BLOCKS - 1;
*/
SPOOL OFF
exit;
'EOF'" > "/tmp/${scriptname}.sqlview"
}
# For large Databases, this function can take several hours. We'll leave it in
# for now to because it's quite thorough, but it's not nice.
create_sqlp_commandlist_validate(){
/bin/su - oracle -c"${SQLP_BINARY} -S / as sysdba <<-\"EOF\"
/* /*
shamelessly stolen (and adapted) from: shamelessly stolen (and adapted) from:
https://oracle-base.com/dba/script?category=miscellaneous&file=analyze_all.sql https://oracle-base.com/dba/script?category=miscellaneous&file=analyze_all.sql
...@@ -55,26 +133,28 @@ create_commandlist(){ ...@@ -55,26 +133,28 @@ create_commandlist(){
echo "exit;" >> "/tmp/${scriptname}.tempsql" echo "exit;" >> "/tmp/${scriptname}.tempsql"
} }
# IMPORTANT: Set lock using "flock", NOT "touch"!!! It's atomic and doesn't have to be cleared after the script ran. # IMPORTANT: Set lock using "flock", NOT "touch"!!! It's atomic and doesn't
# have to be cleared after the script ran.
( (
flock -n 9 || exit 1 flock -n 9 || exit 1
check_rman
create_sqlp_commandlist_view
/bin/su - oracle -c"${SQLP_BINARY} -S / as sysdba @/tmp/${scriptname}.sqlview > /tmp/${scriptname}_view.log"
if [[ ( ! -e "/tmp/${scriptname}.tempsql" ) || \ if [[ ( ! -e "/tmp/${scriptname}.tempsql" ) || \
( $(date -r "/tmp/${scriptname}.tempsql" +%s) -lt $(date -d 'now - 14 days' +%s) ) ]]; then ( $(date -r "/tmp/${scriptname}.tempsql" +%s) -lt $(date -d 'now - 14 days' +%s) ) ]]; then
create_commandlist create_sqlp_commandlist_validate
fi fi
/bin/su - oracle -c"${SQLP_BINARY} -S / as sysdba @/tmp/${scriptname}.tempsql > /tmp/${scriptname}_validate.log"
/bin/su - oracle -c"${BINARY} -S / as sysdba @/tmp/${scriptname}.tempsql > /tmp/${scriptname}.log" LOGS="/tmp/${scriptname}_view.log
/tmp/${scriptname}_validate.log
# THE FOLLOWING ERRORS ARE DETECTED: /tmp/${scriptname}_rman.log
# - ORA-01210: data file header is media corrupt /exlibris/app/oracle/diag/rdbms/${DATABASE_SID}/${DATABASE_SID}/trace/alert_${DATABASE_SID}.log"
# (https://docs.oracle.com/database/121/ERRMG/ORA-00910.htm#ERRMG-GUID-D9EBDFFA-88C6-4185-BD2C-E1B959A97274) for LOG in ${LOGS}; do
# - ORA-01578: ORACLE data block corrupted (file # string, block # string) grep_for_errors "${LOG}"
# (https://docs.oracle.com/database/121/ERRMG/ORA-01500.htm#ERRMG-GUID-65B2B9E5-7075-4D53-91B8-FCAECA0AEE0E) done
# FEEL FREE TO ADD MORE ERRORS AS NECESSARY.
if grep -q "ORA-01210" "/tmp/${scriptname}.log" || \
grep -q "ORA-01578" "/tmp/${scriptname}.log"; then
ORACLE_OUTPUT=3
fi
if [[ ( ${ORACLE_OUTPUT} -eq 0 ) ]]; then if [[ ( ${ORACLE_OUTPUT} -eq 0 ) ]]; then
status=0 status=0
...@@ -90,6 +170,9 @@ flock -n 9 || exit 1 ...@@ -90,6 +170,9 @@ flock -n 9 || exit 1
) 9>"${LOCKFILE}" ) 9>"${LOCKFILE}"
rm -f "/tmp/${scriptname}.tempsql" rm -f "/tmp/${scriptname}.tempsql"
rm -f "/tmp/${scriptname}.log" rm -f "/tmp/${scriptname}_validate.log"
rm -f "/tmp/${scriptname}.sqlview"
rm -f "/tmp/${scriptname}_view.log"
rm -f "/tmp/${scriptname}_rman.log"
# Code checked by shellcheck (https://github.com/koalaman/shellcheck) on 2022-09-09 # Code checked by shellcheck (https://github.com/koalaman/shellcheck) on 2022-09-09
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment