[NCLUG] July 1, 2003 talk - Linux System Performance
jbass at dmsd.com
jbass at dmsd.com
Thu Jun 26 06:35:38 MDT 2003
Mucho thanks to mike at verinet.com and jens001 at attbi.com for help
in debugging the counter overflow problem in mbench's runtime predictor.
Getting counters cast to unsigned long, plus an inversion seems
to get the right results out of gcc so that cron and other light
background activity get reasonably averaged while the test is running.
Have fun!
John
Another sample:
Dual Intel 850MHz PIII w/256KB L2 cache 100MHz FSB,
Tyan MB with IBM registerd ECC 512MB PC100 SDRAM
Linux reports:
Processor #0 Pentium(tm) Pro APIC version 17
Processor #1 Pentium(tm) Pro APIC version 17
Processors: 2
Initializing CPU#0
Detected 847.202 MHz processor.
Memory: 510756k/524288k available (1488k kernel code, 10968k reserved, 1099k data, 156k init, 0k highmem)
CPU: L1 I cache: 16K, L1 D cache: 16K
CPU: L2 cache: 256K
CPU: After generic, caps: 0383fbff 00000000 00000000 00000000
Interesting points to consider are the nature of the random/sequential
cache transitions, the performance penalty for working sets which
exceed L1/L2 cache, and the performance penalty on large working sets
for random vs. sequential access and the impact that has on common
programming methodology.
Looking forward to Mike Loseke's and Mike Jensen's GHz machine benchmarks,
as well as any fast non-pentium numbers from Alpha, PA Risc, Sparc or Mips
platforms - especially big iron cpu farms with cross bar memory.
# gcc -O mbench.c -o mbench
# ./mbench
Mbench by John. L. Bass, DMS Design copyright 1985-1996
You are free to copy and use this program providing configuration info
and results are shared with the author by email to jbass at dmsd.com
Counts are per clock tick, presumed to be HZ=100
SetSize Random Sequential
-------- ------------- ------------- 0% 25% 50% 75% 100%
1024 2674680 100% 3441307 100% | | | |* @
1536 2672016 99% 3441312 100% | * @
2048 2680040 100% 3434443 99% | | | |* @
3072 2682715 100% 3434432 99% | * @
4096 2682720 100% 3441315 100% | | | |* @
6144 2677365 100% 3441312 100% | * @
8192 2674701 100% 3444757 100% | | | |* @
12288 2680048 100% 3434465 99% | * @
16384 2682747 100% 3434464 99% | | | |* @
24576 2421493 90% 1796439 52% | @ *
32768 2229760 83% 1798246 52% | | |@ * | |
49152 1999598 74% 1796444 52% | @ *
65536 1904464 71% 1778661 51% | | |@* | |
98304 1769603 66% 1802083 52% | @
131072 1714729 64% 1798483 52% | | *@ | |
196608 984215 36% 680905 19% | @ *
262144 645002 24% 496545 14% | @ * | | | |
393216 349373 13% 277131 8% | @
524288 231375 8% 239234 6% | @ | | | |
786432 152112 5% 239952 6% | *@
1048576 125502 4% 239936 6% | *@ | | | |
1572864 105364 3% 239936 6% |* @
2097152 96575 3% 240176 6% |* @ | | | |
3145728 90004 3% 240192 6% |* @
4194304 86888 3% 240192 6% |* @ | | | |
6291456 84105 3% 240192 6% |* @
8388608 83017 3% 239713 6% |* @ | | | |
12582912 81221 3% 240192 6% |* @
16777216 79952 2% 241157 7% |* @ | | | |
------------------------ mbench.c: gcc -O mbench.c -o mbench ---------------
/*
* mbench
* John L. Bass, DMS Design, Copyright 1984-1996
*/
#include <sys/types.h>
#include <sys/times.h>
#include <unistd.h>
#include <limits.h>
#include <stdio.h>
#ifndef CLK_TCK
#define CLK_TCK 100L
#endif
unsigned int data[1024*1024*4]; /* 16mb span */
/*
* Memory access - random - exercise L1, L2 cache plus TLB's.
* do 32 random memory accesses per count over a range of size words
*/
random(count, size)
unsigned long count, size;
{
size /= sizeof (unsigned int);
while(count-- > 0 && 0xfffffff !=
data[data[data[data[data[data[data[data[data[data[data[data[data[data[data[
data[data[data[data[data[data[data[data[data[data[data[data[data[data[data[
data[data[
count%size
]] ]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]] );
}
/*
* Memory access - sequential
* do 32 sequential cache line accesses per count of a range of size words
*/
sequential(count, size)
unsigned long count, size;
{
struct cache_lines {
unsigned int data;
unsigned int fill[7];
} *linep, *cachep = (struct cache_lines *) data;
size /= ((sizeof (struct cache_lines)) * 32);
while(count-- > 0 && (linep = &cachep[(count%size)*32]) && (
linep[0x00].data | linep[0x01].data | linep[0x02].data | linep[0x03].data |
linep[0x04].data | linep[0x05].data | linep[0x06].data | linep[0x07].data |
linep[0x08].data | linep[0x09].data | linep[0x0a].data | linep[0x0b].data |
linep[0x0c].data | linep[0x0d].data | linep[0x0e].data | linep[0x0f].data |
linep[0x10].data | linep[0x11].data | linep[0x12].data | linep[0x13].data |
linep[0x14].data | linep[0x15].data | linep[0x16].data | linep[0x17].data |
linep[0x18].data | linep[0x19].data | linep[0x1a].data | linep[0x1b].data |
linep[0x1c].data | linep[0x1d].data | linep[0x1e].data | linep[0x1f].data
) != 0 );
}
/*
* init_data to random values mod range size
*/
init_data(bytes)
{
register unsigned int *word;
srand(1234567);
for(word=data; word < &data[bytes/sizeof (unsigned int)]; word++) {
*word = rand() % (bytes/sizeof (unsigned int));
}
}
/*
* interate random runs to find the count which matches the number of
* seconds requested.
*/
unsigned long
do_random(seconds, size) {
clock_t real_start, real_stop;
struct tms cpu_start, cpu_stop;
static unsigned int first_time = 1;
static unsigned long current, count;
if(first_time) {
current = CLK_TCK;
count = 100000;
first_time = 0;
}
init_data(size);
do {
count = ((count+(current/2L))/current)*CLK_TCK*(unsigned long)seconds;
real_start = times(&cpu_start);
random(count, size);
real_stop = times(&cpu_stop);
current = (real_stop - real_start);
if(current == 0) current = 1;
} while (((current+CLK_TCK/2)/CLK_TCK) != seconds);
return((32L*count+current/2L)/current);
}
/*
* interate sequential runs to find the count which matches the number of
* seconds requested.
*/
unsigned long
do_sequential(seconds, size) {
clock_t real_start, real_stop;
struct tms cpu_start, cpu_stop;
static unsigned int first_time = 1;
static unsigned long current, count;
if(first_time) {
current = CLK_TCK;
count = 100000;
first_time = 0;
}
do {
count = ((count+(current/2L))/current)*CLK_TCK*(unsigned long)seconds;
real_start = times(&cpu_start);
sequential(count, size);
real_stop = times(&cpu_stop);
current = (real_stop - real_start);
if(current == 0) current = 1;
} while (((current+CLK_TCK/2)/CLK_TCK) != seconds);
return((32L*count+current/2L)/current);
}
/*
* Search memory speed between 1K and 2M Bytes
*/
main(argc, argv)
int argc;
char *argv[];
{
unsigned int size, seconds = 10;
unsigned long rhigh, shigh, rval, sval, phigh;
int i;
char plot[46];
printf("Mbench by John. L. Bass, DMS Design copyright 1985-1996\n");
printf("You are free to copy and use this program providing configuration info\n");
printf("and results are shared with the author by email to jbass at dmsd.com\n\n");
printf("Counts are per clock tick, presumed to be HZ=%d\n", CLK_TCK);
printf(" SetSize Random Sequential\n");
printf("-------- ------------- ------------- 0%% 25%% 50%% 75%% 100%%\n");
if(argc > 1) seconds = atoi(argv[1]);
do_random(2, sizeof data);
do_sequential(2, sizeof data);
rhigh = shigh = phigh = 0;
for(size=1024; size <= sizeof data; size <<= 1) {
for(i=0; i<sizeof plot; i++) plot[i] = ' ';
plot[(100*(sizeof plot-2))/100] = '|';
plot[( 75*(sizeof plot-2))/100] = '|';
plot[( 50*(sizeof plot-2))/100] = '|';
plot[( 25*(sizeof plot-2))/100] = '|';
plot[( 0*(sizeof plot-2))/100] = '|';
printf("%8d ", size); fflush(stdout);
printf("%8lu ", rval = do_random(seconds, size));
if(rhigh == 0) rhigh = rval;
printf("%3d%% ",rval*100/rhigh); fflush(stdout);
printf("%8lu ", sval = do_sequential(seconds, size));
if(shigh == 0) shigh = sval;
printf("%3d%% ",sval*100/shigh);
phigh = rhigh > shigh ? rhigh : shigh;
plot[(rval*(sizeof plot-2)+phigh/2)/phigh] = '*';
plot[(sval*(sizeof plot-2)+phigh/2)/phigh] = '@';
printf("%46.46s\n", plot); fflush(stdout);
if((size+size/2) <= sizeof data) {
plot[0] = '|'; for(i=1; i<sizeof plot; i++) plot[i] = ' ';
printf("%8d ", size+size/2); fflush(stdout);
printf("%8lu ", rval = do_random(seconds, size+size/2));
printf("%3d%% ",rval*100/rhigh);fflush(stdout);
printf("%8lu ", sval = do_sequential(seconds, size+size/2));
printf("%3d%% ",sval*100/shigh);
plot[(rval*(sizeof plot-2)+phigh/2)/phigh] = '*';
plot[(sval*(sizeof plot-2)+phigh/2)/phigh] = '@';
printf("%46.46s\n", plot); fflush(stdout);
}
}
}
More information about the NCLUG
mailing list