Monday, December 15, 2008

Database Historical Trend Analysis using OS/WEB Tiers

Database Historical Trend Analysis using OS/WEB Tiers:
In this artical, I will be concentrating on capturing different levels of reports for OS/DB/WEB(IIS/Apache) tiers to perform Trend Analysis.
These reports can be useful for troubleshooting the performance or capturing the baseline of the system.
In Troubleshooting approach, Understanding the problem and Measuring the change results are the keys to resolve the issue.


OS Trend Analysis:
As part of being able to monitor the performance effectively use OS utilities. Using correct utilities to find CPU, memory and disk I/O is crucial.
Side Note: We can also use Oracle OS Watcher(OSW), it depends on how easy to refer/compare the days worth of old data.

CPU:
sar -u

CREATE TABLE SYSTEM.OSMON_SARS
(
SAMPLE_DATE DATE,
SERVER_NAME VARCHAR2(50),
USER_CPU NUMBER,
SYSTEM_CPU NUMBER,
WAITIO_CPU NUMBER,
IDLE_CPU NUMBER
);


$ cat sar.sh

#!/bin/sh
############################################################
# File Name : sar.sh
# Author : Vijay Dumpa
# Usage : sar.sh ORACLE_SID
#
# Modification History
# Who When What
# -------- -------- ----------------------------------------
#
#############################################################
##
# Set up environment for job
##
#set -x
ORACLE_SID=$1; export ORACLE_SID
ORACLE_HOME=`cat /var/opt/oracle/oratab | grep -v "#" | grep $ORACLE_SID | cut -d ":" -f2`; export ORACLE_HOME
PATH=$ORACLE_HOME/bin:$PATH; export PATH
SERVER_NAME=`uname -a|awk '{print $2}'`; export SERVER_NAME

sar -u > /tmp/msg$$

# Run sar and direct the output into the Oracle table . . .
cat /tmp/msg$$| tail -3 | head -1 | awk '{printf("%s %s %s %s\n",$2,$3,$4,$5)}' | while read USER_CPU SYSTEM_CPU WAITIO_CPU IDLE_CPU
do
$ORACLE_HOME/bin/sqlplus -s system/xxxxxx@$1 </dev/null
insert into system.osmon_sars(sample_date, server_name, user_cpu, system_cpu, waitio_cpu, idle_cpu)
values (SYSDATE, '$SERVER_NAME', $USER_CPU, $SYSTEM_CPU, $WAITIO_CPU, $IDLE_CPU);
commit;
exit;
EOF
done

rm /tmp/msg$$


Memory:
Swap: swap -s or vmstat


CREATE TABLE SYSTEM.OSMON_VMSTATS
(
SAMPLE_DATE DATE,
SAMPLE_TIME NUMBER,
SERVER_NAME VARCHAR2(50),
RUNQUE NUMBER,
FREE_SWAP NUMBER,
FREE_MEMORY NUMBER,
PAGE_IN NUMBER,
PAGE_OUT NUMBER
);


$ cat vmstat.sh

#!/bin/sh
############################################################
# File Name : vmstat.sh
# Author : Vijay Dumpa
# Usage : vmstat.sh ORACLE_SID
#
# Modification History
# Who When What
# -------- -------- ----------------------------------------
#
#############################################################
##
# Set up environment for job
##
#set -x
ORACLE_SID=$1; export ORACLE_SID
ORACLE_HOME=`cat /var/opt/oracle/oratab | grep -v "#" | grep $ORACLE_SID | cut -d ":" -f2`; export ORACLE_HOME
PATH=$ORACLE_HOME/bin:$PATH; export PATH
SERVER_NAME=`uname -a|awk '{print $2}'`; export SERVER_NAME

# sample every 10 seconds.
SAMPLE_TIME=10
vmstat ${SAMPLE_TIME} 2 > /tmp/msg$$

# Run vmstat and direct the output into the Oracle table . . .
#Solaris
cat /tmp/msg$$|sed 1,3d | awk '{printf("%s %s %s %s %s\n",$1,$8,$9,$4,$5)}' | while read RUNQUE PAGE_IN PAGE_OUT FREE_SWAP FREE_MEMORY
do
$ORACLE_HOME/bin/sqlplus -s system/xxxxxx@$1 </dev/null
insert into system.osmon_vmstats(sample_date, sample_time, server_name, runque, page_in, page_out, free_swap, free_memory)
values (SYSDATE, $SAMPLE_TIME, '$SERVER_NAME', $RUNQUE, $PAGE_IN, $PAGE_OUT, $FREE_SWAP, $FREE_MEMORY);
commit;
exit;
EOF
done

rm /tmp/msg$$


Disk I/O:
iostat -xnp.

CREATE TABLE SYSTEM.OSMON_IOSTATS
(
SAMPLE_DATE DATE,
SERVER_NAME VARCHAR2(50),
READS_PER_SEC NUMBER,
WRITES_PER_SEC NUMBER,
KB_READS_PER_SEC NUMBER,
KB_WRITES_PER_SEC NUMBER,
PERCENT_TRANS_WAIT NUMBER,
PERCENT_DISK_BUSY NUMBER,
FILE_SYSTEM_DEVICE VARCHAR2(50),
MOUNT_POINT VARCHAR2(10)
);


$ cat iostat.sh (Replace physic device names in below INSERT statement)

#!/bin/sh
############################################################
# File Name : iostat.sh
# Author : Vijay Dumpa
# Usage : iostat.sh ORACLE_SID
#
# Modification History
# Who When What
# -------- -------- ----------------------------------------
#
#############################################################
##
# Set up environment for job
##
#set -x
ORACLE_SID=$1; export ORACLE_SID
ORACLE_HOME=`cat /var/opt/oracle/oratab | grep -v "#" | grep $ORACLE_SID | cut -d ":" -f2`; export ORACLE_HOME
PATH=$ORACLE_HOME/bin:$PATH; export PATH
SERVER_NAME=`uname -a|awk '{print $2}'`; export SERVER_NAME

iostat -xnp > /tmp/msg_io
cat /tmp/msg_io | grep s6 > /tmp/msg$$

# Run iostat and direct the output into the Oracle table . . .
#Solaris
cat /tmp/msg$$| awk '{printf("%s %s %s %s %s %s %s\n",$1,$2,$3,$4,$9,$10,$11)}' | while read READS_PER_SEC WRITES_PER_SEC KB_READS_PER_SEC KB_WRITES_PER_SEC PERCENT_TRANS_WAIT PERCENT_DISK_BUSY FILE_SYSTEM_DEVICE
do
$ORACLE_HOME/bin/sqlplus -s system/xxxxxx@$1 </dev/null
insert into system.osmon_iostats (sample_date, server_name, reads_per_sec, writes_per_sec, kb_reads_per_sec, kb_writes_per_sec,
percent_trans_wait, percent_disk_busy, file_system_device, mount_point)
values (SYSDATE, '$SERVER_NAME', $READS_PER_SEC, $WRITES_PER_SEC, $KB_READS_PER_SEC, $KB_WRITES_PER_SEC,
$PERCENT_TRANS_WAIT, $PERCENT_DISK_BUSY, '$FILE_SYSTEM_DEVICE',
(case when '$FILE_SYSTEM_DEVICE' = 'xxxxxxxxxxxxxxxxxxxxxxxx1' then 'u01'
when '$FILE_SYSTEM_DEVICE' = 'xxxxxxxxxxxxxxxxxxxxxxxx2' then 'u02'
when '$FILE_SYSTEM_DEVICE' = 'xxxxxxxxxxxxxxxxxxxxxxxx3' then 'u03'
when '$FILE_SYSTEM_DEVICE' = 'xxxxxxxxxxxxxxxxxxxxxxxx4' then 'u04'
when '$FILE_SYSTEM_DEVICE' = 'xxxxxxxxxxxxxxxxxxxxxxxx5' then 'u05'
when '$FILE_SYSTEM_DEVICE' = 'xxxxxxxxxxxxxxxxxxxxxxxx6' then 'u06'
when '$FILE_SYSTEM_DEVICE' = 'xxxxxxxxxxxxxxxxxxxxxxxx7' then 'u07'
when '$FILE_SYSTEM_DEVICE' = 'xxxxxxxxxxxxxxxxxxxxxxxx8' then 'u08'
else 'new'
end)
);
commit;
exit;
EOF
done

rm /tmp/msg_io
rm /tmp/msg$$



DB Trend Analysis:
What STATSPACK, AWR Data is Best for Trend Analysis?
With hundreds of statistics available in STATSPACK, AWR the question arises as to what metrics are the most useful?

Sorts:
It is important to monitor the amount of sorting in the database, especially the amount of sorting that is done in the TEMP tablespace (sorts (disk)).

Physical disk reads:
The reduction of I/O is the primary goal of Oracle tuning, so a trend report showing hourly disk reads can be very useful for measuring the effect of table reorganizations the re-sequence rows into index order.

Physical disk writes:
The amount of disk writes has a great impact on the overall throughput of the Oracle database, and is especially important when rollback segment activity is critical.

I/O waits:
This is a very important metric that can be used to identify and correct I/O contention. High I/O waits occur when two or more data files are being accessed simultaneously and the read-write head movement of the DASD causes the disk to wait for the completion of I/O.

Buffer Busy Waits:
A buffer bust wait occurs when a Oracle data block resides in a buffer, but the requesting program must wait to retrieve the data block.
Buffer Busy Waits can commonly occur when a table has concurrent 'UPDATE' or 'INSERT DML' and only one freelist is defined for the table.

Redo log space requests:
Redo Log space requests occur when the log buffer is too small to hold the volume of redo log activity.

Latch pin hit ratio:
The pin hit ratios for the database instance give a good idea of the sizing of the shared_pool_size parameter in the init.ora file.

Table Fetch Continued Row:
This metric give a general indication of database migrated/chained rows. When chaining becomes excessive, additional I/O is required to service a query, and the DBA should increase PCTFREE for the table and reorganize the table to remove the chains.



Web Page Load Trend Analysis:
It's very importance to see, how the web pages are getting benefited by the particular DB change or any change(like Code, OS etc..).

Idea is to use IIS, Apache log file to generate webpage load report for that day.
Some thing like this:

Total Pages served : 22606

Pages 0 to 5 secs : 20041 (88.65%)
Pages 6 to 10 secs : 1515 (6.70%)
Pages 11 to 30 secs : xxxx (3%)
Pages more than 31 secs : xxxx (2.65%)

and Display TOP 10 pages URL's.


How to generate a web page load report from IIS log:

Step 1:
As part of this, you have to enable extended logging for cs-uri-stem and time-taken fields and also make sure the positions of the fields are 6 and 15 in IIS log file.

Step 2:
Perl Script: gen_IIS_webpage_load_report.pl (Schedule below script to run daily from Windows scheduler at 11:50pm after modifying email names, subject and log file location)

#!perl

use Time::localtime;
use Net::SMTP;
use Mail::Sender;


my $month = localtime->mon + 1;
#print "Month: $month\n";
my $day = localtime->mday;
#print "Day: $day\n";
my $year = localtime->year + 1900;
$year = sprintf("%02d", $year % 100);
my $hour = localtime->hour;
my $min = localtime->min;
my $second = localtime->sec;

if ($month < 10) {
$month = "0$month";
}
if ($day < 10) {
$day = "0$day";
# print "Day after 0: $day\n";
}
if ($hour < 10) {
$hour = "0$hour";
}
if ($min < 10) {
$min = "0$min";
}
if ($second < 10) {
$second = "0$second";
}

my $tmpDate = `date \/T`;
my $tmpTime = $hour . ":" . $min . ":" . $second;

my $dateNow = $tmpDate . $tmpTime;


my $time = $year . $month . $day;
#print "Time: $time\n";

my $alog = "c:\\perl_scripts\\windows\\" . "ex" . $time . ".log";
#print "Alog: $alog\n";

my $tmpFile0 = "c:\\perl_scripts\\windows\\tmplist0." . $time;
#print "TmpFile0: $tmpFile0\n";
my $tmpFile1 = "c:\\perl_scripts\\windows\\tmplist1." . $time;
#print "TmpFile1: $tmpFile1\n";
my $tmpFile2 = "c:\\perl_scripts\\windows\\tmplist2." . $time;
#print "TmpFile2: $tmpFile2\n";
my $tmpFile3 = "c:\\perl_scripts\\windows\\tmplist3." . $time;
#print "TmpFile3: $tmpFile3\n";
my $tmpFile4 = "c:\\perl_scripts\\windows\\tmplist4." . $time . ".html";
#print "TmpFile4: $tmpFile4\n";
my $tmpFile5 = "c:\\perl_scripts\\windows\\tmplist5." . $time;
#print "TmpFile5: $tmpFile5\n";
my $tmpFile6 = "c:\\perl_scripts\\windows\\tmplist6." . $time;
#print "TmpFile6: $tmpFile6\n";
my $tmpFile7 = "c:\\perl_scripts\\windows\\tmplist7." . $time;
#print "TmpFile6: $tmpFile7\n";

my $plusZero = 0;
my $plusFive = 0;
my $plusTen = 0;
my $plusFifteen = 0;
my $plusTwenty = 0;
my $plusTwentyFive = 0;
my $plusThirty = 0;
my $pageCnt = 0;
my $lineCnt = 0;

open (TMPFILE5, ">>$tmpFile5") or die "Cannot open $tmpFile5: $!";
open (TMPFILE0, ">$tmpFile0") or die "Cannot open $tmpFile0: $!";
open (LOG, "$alog") or die "Cannot open $alog: $!";
while () {
my $line = $_;
# print "Line from log: $line\n";
chomp $line;
$lineCnt++;
($date,$fileTime,$sitname,$computerName,$sip,$method,$uriStem,$query,$port,$username,$cip,$csVersion,$csUserAgent,$csCookie,$csReferer,$csHost,$scStatus,$subStatus,$win32Status,$scBytes,$csBytes,$timeTaken) = split(/\s+/, $line);
if ($lineCnt == 4) {
if ($line !~ /time-taken$/) {
sendMailMessage("message.xyzCompanyname.com", "firstname.lastname\@xyzCompanyname.com", "firstname.lastname\@xyzCompanyname.com", "$alog is not formatted as expected. Time taken field in $alog is blank.\n", "XYZsite Prod Report - Log not formatted correctly");
#system ("c:\\blat.exe \"$alog is not formatted as expected. Time taken field in $alog is blank.\n\" -server message.xyzCompanyname.com -f firstname.lastname\@xyzCompanyname.com -to firstname.lastname\@xyzCompanyname.com -s \"Log not formatted correctly\"");
exit 1;
}
}
#print "TimeTaken: $timeTaken\n";
if ($uriStem =~ m/.asp/) {
# print "$uriStem : $timeTaken\n";
print TMPFILE0 $uriStem . " " . $timeTaken . "\n";
}
$pageCnt++;
#print "PageCnt: $pageCnt\n";

if ($timeTaken < 500) {
$plusZero++;
# print "$plusZero\n";
} elsif ($timeTaken < 1000) {
$plusFive++;
# print "$plusFive\n";
} elsif ($timeTaken < 1500) {
$plusTen++;
# print "$plusTen\n";
} elsif ($timeTaken < 2000) {
$plusFifteen++;
# print "$plusFifteen\n";
} elsif ($timeTaken < 2500) {
$plusTwenty++;
# print "$plusTwenty\n";
} elsif ($timeTaken < 3000) {
$plusTwentyFive++;
# print "$plusTwentyFive\n";
} elsif ($timeTaken >= 3000) {
$plusThirty++;
#print "$plusThirty\n";
}
}

print TMPFILE5 "PAGECNT:$pageCnt\n";
print TMPFILE5 "PLUSZERO:$plusZero\n";
print TMPFILE5 "PLUSFIVE:$plusFive\n";
print TMPFILE5 "PLUSTEN:$plusTen\n";
print TMPFILE5 "PLUSFIFTEEN:$plusFifteen\n";
print TMPFILE5 "PLUSTWENTY:$plusTwenty\n";
print TMPFILE5 "PLUSTWENTYFIVE:$plusTwentyFive\n";
print TMPFILE5 "PLUSTHIRTY:$plusThirty\n";

close(LOG);
close(TMPFILE0);
close(TMPFILE5);

open (TMPFILE0, "$tmpFile0") or die "Cannot open $tmpFile0: $!";
#open (TMPFILE1, ">$tmpFile1") or die "Cannot open $tmpFile1: $!";
open (TMPFILE2, ">$tmpFile2") or die "Cannot open $tmpFile2: $!";

my @unique = ();
my @long = ();

while () {
my $line2 = $_;
chomp($line2);
($uriStem, $timeTaken) = split(/\s+/, $line2);

unless($i{$_}++) {
push(@long, $timeTaken);
}
}

foreach $time3 (@long) {
printf TMPFILE2 ("%09d", $time3);
print TMPFILE2 "\n";
}
close(TMPFILE2);

open (TMPFILE2, "$tmpFile2") or die "Cannot open $tmpFile2: $!";
my @long2 = ;
close(TMPFILE2);

@sorted = reverse sort @long2;
#print "sorted: @sorted\n";

my $count = 0;

open (TMPFILE6, ">$tmpFile6") or die "Cannot open $tmpFile6: $!";
foreach $sortedTime (@sorted) {
$count++;
if ($count <= 10) {
$sortedTime =~ s/^0*//;
print TMPFILE6 $sortedTime;
}
}
close(TMPFILE6);
close(TMPFILE0);
#close(TMPFILE1);
close(TMPFILE2);

open (TMPFILE6, "$tmpFile6") or die "Cannot open $tmpFile6: $!";
my @sortedTimes = ;
#print "SortedTimes: @sortedTimes\n";
close(TMPFILE6);

my @maxTime = ();
open (TMPFILE3, ">$tmpFile3") or die "Cannot open $tmpFile3: $!\n";
open (TMPFILE0, "$tmpFile0") or die "Cannot open $tmpFile0: $!";
my $match = "";

while () {
my $line2 = $_;
chomp($line2);
#print "Line2: $line\n";
foreach $num (@sortedTimes) {
if (/$num/) {
$match = $line2;
print TMPFILE3 "$match\n";
}
}
#($timeTaken) = split(/\s+/, $line2);
#push(@unique, $uriStem);

}

close(TMPFILE0);
#close(TMPFILE1);
close(TMPFILE2);
close(TMPFILE3);

my @temp = ();
my @sortTemp = ();
open (TMPFILE7, ">$tmpFile7") or die "Cannot open $tmpFile7: $!";
open (TMPFILE3, "$tmpFile3") or die "Cannot open $tmpFile3: $!";
while () {
my $line4 = $_;
chomp($line);
($uri, $ttaken) = split(/\s+/, $line4);
# print "$uri : $ttaken";
$ttaken2 = sprintf("%09d", $ttaken);
# print "T Taken: $ttaken2\n";
print TMPFILE7 "$ttaken2:$uri\n";
}
close(TMPFILE7);
close(TMPFILE3);

open (TMPFILE7, "$tmpFile7") or die "Cannot open $tmpFile7: $!";
@temp = ;
@sortTemp = reverse sort @temp;
#print "Sort Temp: @sortTemp\n";
close(TMPFILE7);



open (TMPFILE5, "$tmpFile5") or die "Cannot open $tmpFile5: $!";
while () {
my $line3 = $_;
chomp($line3);
($label, $field) = split (/:/, $line3);
if ($label eq "PAGECNT") {
$pageCnt = $field;
} elsif ($label eq "PLUSZERO") {
$plusZero = $field;
} elsif ($label eq "PLUSFIVE") {
$plusFive = $field;
} elsif ($label eq "PLUSTEN") {
$plusTen = $field;
} elsif ($label eq "PLUSFIFTEEN") {
$plusFifteen = $field;
} elsif ($label eq "PLUSTWENTY") {
$plusTwenty = $field;
} elsif ($label eq "PLUSTWENTYFIVE") {
$plusTwentyFive = $field;
} elsif ($label eq "PLUSTHIRTY") {
$plusThirty = $field;
}
}

close(TMPFILE5);

my $plusZeroCalc = ($plusZero/$pageCnt) * 100;
my $plusFiveCalc = ($plusFive/$pageCnt) * 100;
my $plusTenCalc = ($plusTen/$pageCnt) * 100;
my $plusFifteenCalc = ($plusFifteen/$pageCnt) * 100;
my $plusTwenyCalc = ($plusTwenty/$pageCnt) * 100;
my $plusTwentyFiveCalc = ($plusTwentyFive/$pageCnt) * 100;
my $plusThirtyCalc = ($plusThirty/$pageCnt) * 100;



open (TMPFILE4, ">>$tmpFile4") or die "Cannot open $tmpFile4: $!";
print TMPFILE4 "Content-Type: text/html; charset=\"US-ASCII\"";

print TMPFILE4 "< HTML>

Total Pages served :$pageCnt
";
print TMPFILE4 "
Pages 0 to 5 secs :$plusZero (";
printf TMPFILE4 ("%.2f",$plusZeroCalc);
print TMPFILE4 "\%)\n";
print TMPFILE4 "
Pages 6 to 10 secs :$plusFive (";
printf TMPFILE4 ("%.2f",$plusFiveCalc);
print TMPFILE4 "\%)\n";
print TMPFILE4 "
Pages 11 to 15 secs :$plusTen (";
printf TMPFILE4 ("%.2f",$plusTenCalc);
print TMPFILE4 "\%)\n";
print TMPFILE4 "
Pages 16 to 20 secs :$plusFifteen (";
printf TMPFILE4 ("%.2f",$plusFifteenCalc);
print TMPFILE4 "\%)\n";
print TMPFILE4 "
Pages 21 to 25 secs :$plusTwenty (";
printf TMPFILE4 ("%.2f",$plusTwentyCalc);
print TMPFILE4 "\%\n";
print TMPFILE4 "
Pages 26 to 30 secs :$plusTwentyFive (";
printf TMPFILE4 ("%.2f",$plusTwentyFiveCalc);
print TMPFILE4 "\%)\n";
print TMPFILE4 "
Pages +31 secs :$plusThirty (";
printf TMPFILE4 ("%.2f",$plusThirtyCalc);
print TMPFILE4 "\%)\n";


print TMPFILE4 "

Top 10 Slow XYZsite Production Web Pages for $dateNow

";
foreach my $line5 (@sortTemp) {
($ttake, $url) = split(/:/, $line5);
my $tt = $ttake;
$tt =~ s/^0*//;
print TMPFILE4 "URL: $url\n";
print TMPFILE4 "
MilliSeconds: $tt\n

";
}
close(TMPFILE4);

$sender = new Mail::Sender {[from => 'firstname.lastname@xyzCompanyname.com'], [to => 'firstname.lastname@xyzCompanyname.com']};
(ref ($sender->MailFile( {from => 'firstname.lastname@xyzCompanyname.com', to => 'firstname.lastname@xyzCompanyname.com, firstname2.lastname2@xyzCompanyname.com', smtp=> 'message.xyzCompanyname.com', subject=>"XYZsite Prod Web Page Report - $tmpDate", msg => "XYZsite Prod Web Page Report - $tmpDate\n\n", file => "$tmpFile4"})) and print "Mail sent OK.") or die "$Mail::Sender::Error\n";

unlink "$tmpFile0";
unlink "$tmpFile1";
unlink "$tmpFile2";
unlink "$tmpFile3";
unlink "$tmpFile4";
unlink "$tmpFile5";
unlink "$tmpFile6";
unlink "$tmpFile7";

sub sendMailMessage {
my $serverName = shift @_;
my $fromAddress = shift @_;
my $toAddress = shift @_;
my $theMessage = shift @_;
my $theSubject = shift @_;
$smtp = Net::SMTP->new($serverName);
$smtp->mail($fromAddress);
$smtp->to($toAddress);
$smtp->data();
$smtp->datasend("To: $toAddress");
$smtp->datasend("\n");
$smtp->datasend("Subject: $theSubject \n");
$smtp->datasend("\n");
$smtp->datasend($theMessage);
$smtp->dataend();
$smtp->quit;
}


How to generate a web page load report from Apache log:

Step 1:
Add below like in the apache config's file.

LogFormat "%h %l %u %t \"%r\" %>s %b \"%{User-Agent}i\" %T"
Note the %T at the end. This is where those #'s come from. The "normal" apache config doesn't usually include that %T.

Step 2:
Shell Script: gen_apache_webpage_load_report.sh (Schedule below script to runs daily from cron at 11:50pm after modifying email names, subject and log file location)

$ cat gen_apache_webpage_load_report.sh

#!/bin/ksh
############################################################
# File Name : gen_apache_webpage_load_report.sh
# Author : Vijay Dumpa
# Usage : gen_apache_webpage_load_report.sh
#
# Modification History
# Who When What
# -------- -------- ----------------------------------------
#
#############################################################
##
# Set up environment for the job
##
#set -x
TMPFILE0=/tmp/tmplist0.$$
TMPFILE1=/tmp/tmplist1.$$
TMPFILE2=/tmp/tmplist2.$$
TMPFILE3=/tmp/tmplist3.$$
TMPFILE4=/tmp/tmplist4.$$

ALOG=/local/apache/logs/https/access_log.`date +%y%m%d`
MAILEES=firstname.lastname@xyzCompanyname.com

PLUSZERO=0
PLUSFIVE=0
PLUSTEN=0
PLUSFIFTEEN=0
PLUSTWENTY=0
PLUSTWENTYFIVE=0
PLUSTHIRTY=0
PAGECNT=0

# filter the original Apche access log to get the two required fields, and sort them by URL and then in numeric order per URL
egrep -v '.gif|.css|.js|.jpg|.htm|.bmp' $ALOG | awk '{print $NF, " ", $7}' | awk -F\? '{print $1}' | sort -k 2,2 -k 1,1rn > $TMPFILE0


exec 3<&0 < $TMPFILE0
while read -r INPUT
do
PAGECNT=`expr $PAGECNT + 1`
NUM=`echo $INPUT | awk '{print $1}'`

if [ $NUM -lt 6 ]; then
PLUSZERO=`expr $PLUSZERO + 1`
elif [ $NUM -lt 11 ]; then
PLUSFIVE=`expr $PLUSFIVE + 1`
elif [ $NUM -lt 16 ]; then
PLUSTEN=`expr $PLUSTEN + 1`
elif [ $NUM -lt 21 ]; then
PLUSFIFTEEN=`expr $PLUSFIFTEEN + 1`
elif [ $NUM -lt 26 ]; then
PLUSTWENTY=`expr $PLUSTWENTY + 1`
elif [ $NUM -lt 31 ]; then
PLUSTWENTYFIVE=`expr $PLUSTWENTYFIVE + 1`
elif [ $NUM -ge 31 ]; then
PLUSTHIRTY=`expr $PLUSTHIRTY + 1`
fi
done
exec 0<&3 3<&-

# filter out the highest value for each URL and make a new list of the top 25
uniq -f 1 $TMPFILE0 | sort -rn | head -25 > $TMPFILE1

# sort the main list of all URLs and all times in numeric order, highest first
sort -rn $TMPFILE0 > $TMPFILE2

# Get all of the timings for each page view, and then take the top 50 columns
exec 3<&0 < $TMPFILE1
while read -r INPUT
do
TMP1=`echo $INPUT | awk '{print $2}'`
TMP2=`grep " $TMP1" $TMPFILE2 | awk '{printf "%s, ",$1}'`
TMP3=`echo $TMP2 | cut -c 1-120 | awk -F, '{print $1" "$2" "$3" "$4" "$5" "$6" "$7" "$8" "$9" "$10" "$11" "$12" "$13" "$14" "$15" "$16" "$17" "$18" "$19" "$20" "$21" "$22" "$23" "$24" "$25}'`
echo $TMP3:$TMP1 >> $TMPFILE3
done
exec 0<&3 3<&-

echo "Subject:"XYZsite" Prod web page report - `date`" > $TMPFILE4
echo "Content-Type: text/html; charset=\"US-ASCII\"" >> $TMPFILE4

echo "< HTML>Total Pages served :$PAGECNT\n
" >> $TMPFILE4
echo "
Pages 0 to 5 secs :$PLUSZERO (`echo $PLUSZERO $PAGECNT | awk '{printf "%.2f",(($1/$2)*100)}'`%)" >> $TMPFILE4
echo "
Pages 6 to 10 secs :$PLUSFIVE (`echo $PLUSFIVE $PAGECNT | awk '{printf "%.2f",(($1/$2)*100)}'`%)" >> $TMPFILE4
echo "
Pages 11 to 15 secs :$PLUSTEN (`echo $PLUSTEN $PAGECNT | awk '{printf "%.2f",(($1/$2)*100)}'`%)" >> $TMPFILE4
echo "
Pages 16 to 20 secs :$PLUSFIFTEEN (`echo $PLUSFIFTEEN $PAGECNT | awk '{printf "%.2f",(($1/$2)*100)}'`%)" >> $TMPFILE4
echo "
Pages 21 to 25 secs :$PLUSTWENTY (`echo $PLUSTWENTY $PAGECNT | awk '{printf "%.2f",(($1/$2)*100)}'`%)" >> $TMPFILE4
echo "
Pages 26 to 30 secs :$PLUSTWENTYFIVE (`echo $PLUSTWENTYFIVE $PAGECNT | awk '{printf "%.2f",(($1/$2)*100)}'`%)" >> $TMPFILE4
echo "
Pages +31 secs :$PLUSTHIRTY (`echo $PLUSTHIRTY $PAGECNT | awk '{printf "%.2f",(($1/$2)*100)}'`%)\n" >> $TMPFILE4


echo "

Top 10 Page times of the top 25 Slow "XYZsite" Production Web Pages for `date`

" >> $TMPFILE4

# re-sort the list with the slowest pages first
sort -rn $TMPFILE3 | awk -F: '{print "URL: "$2"\n
Seconds:
\n"$1"\n

"}' >> $TMPFILE4

cat $TMPFILE4 | /usr/lib/sendmail $MAILEES

rm $TMPFILE0 $TMPFILE1 $TMPFILE2 $TMPFILE3 $TMPFILE4