[NCLUG] July 1, 2003 talk - Linux System Performance

jbass at dmsd.com jbass at dmsd.com
Thu Jun 26 06:35:38 MDT 2003


Mucho thanks to mike at verinet.com and jens001 at attbi.com for help
in debugging the counter overflow problem in mbench's runtime predictor.

Getting counters cast to unsigned long, plus an inversion seems
to get the right results out of gcc so that cron and other light
background activity get reasonably averaged while the test is running.

Have fun!
John

Another sample:

	Dual Intel 850MHz PIII w/256KB L2 cache 100MHz FSB,
	Tyan MB with IBM registerd ECC 512MB PC100 SDRAM

Linux reports:
	Processor #0 Pentium(tm) Pro APIC version 17
	Processor #1 Pentium(tm) Pro APIC version 17
	Processors: 2
	Initializing CPU#0
	Detected 847.202 MHz processor.
	Memory: 510756k/524288k available (1488k kernel code, 10968k reserved, 1099k data, 156k init, 0k highmem)
	CPU: L1 I cache: 16K, L1 D cache: 16K
	CPU: L2 cache: 256K
	CPU:     After generic, caps: 0383fbff 00000000 00000000 00000000

Interesting points to consider are the nature of the random/sequential
cache transitions, the performance penalty for working sets which
exceed L1/L2 cache, and the performance penalty on large working sets
for random vs. sequential access and the impact that has on common
programming methodology.

Looking forward to Mike Loseke's and Mike Jensen's GHz machine benchmarks,
as well as any fast non-pentium numbers from Alpha, PA Risc, Sparc or Mips
platforms - especially big iron cpu farms with cross bar memory.

# gcc -O mbench.c -o mbench
# ./mbench
Mbench by John. L. Bass, DMS Design copyright 1985-1996
You are free to copy and use this program providing configuration info
and results are shared with the author by email to jbass at dmsd.com

Counts are per clock tick, presumed to be HZ=100
 SetSize         Random    Sequential
--------  ------------- ------------- 0%        25%        50%        75%       100%
    1024  2674680 100%  3441307 100% |          |          |          |*         @
    1536  2672016  99%  3441312 100% |                                 *         @
    2048  2680040 100%  3434443  99% |          |          |          |*         @
    3072  2682715 100%  3434432  99% |                                 *         @
    4096  2682720 100%  3441315 100% |          |          |          |*         @
    6144  2677365 100%  3441312 100% |                                 *         @
    8192  2674701 100%  3444757 100% |          |          |          |*         @
   12288  2680048 100%  3434465  99% |                                 *         @
   16384  2682747 100%  3434464  99% |          |          |          |*         @
   24576  2421493  90%  1796439  52% |                      @       *
   32768  2229760  83%  1798246  52% |          |          |@     *   |          |
   49152  1999598  74%  1796444  52% |                      @  *
   65536  1904464  71%  1778661  51% |          |          |@*        |          |
   98304  1769603  66%  1802083  52% |                      @
  131072  1714729  64%  1798483  52% |          |          *@         |          |
  196608   984215  36%   680905  19% |        @   *
  262144   645002  24%   496545  14% |     @ *  |          |          |          |
  393216   349373  13%   277131   8% |   @
  524288   231375   8%   239234   6% |  @       |          |          |          |
  786432   152112   5%   239952   6% | *@
 1048576   125502   4%   239936   6% | *@       |          |          |          |
 1572864   105364   3%   239936   6% |* @
 2097152    96575   3%   240176   6% |* @       |          |          |          |
 3145728    90004   3%   240192   6% |* @
 4194304    86888   3%   240192   6% |* @       |          |          |          |
 6291456    84105   3%   240192   6% |* @
 8388608    83017   3%   239713   6% |* @       |          |          |          |
12582912    81221   3%   240192   6% |* @
16777216    79952   2%   241157   7% |* @       |          |          |          |

------------------------ mbench.c:  gcc -O mbench.c -o mbench ---------------
/*
 * mbench
 * John L. Bass, DMS Design, Copyright 1984-1996
 */
#include <sys/types.h>
#include <sys/times.h>
#include <unistd.h>
#include <limits.h>
#include <stdio.h>

#ifndef CLK_TCK
#define CLK_TCK 100L
#endif

unsigned int data[1024*1024*4]; /* 16mb span */

/*
 * Memory access - random - exercise L1, L2 cache plus TLB's.
 * do 32 random memory accesses per count over a range of size words
 */

random(count, size)
unsigned long count, size;
{

  size /= sizeof (unsigned int);

  while(count-- > 0 && 0xfffffff !=
    data[data[data[data[data[data[data[data[data[data[data[data[data[data[data[
    data[data[data[data[data[data[data[data[data[data[data[data[data[data[data[
    data[data[
      count%size
    ]] ]]]]]]]]]]]]]]] ]]]]]]]]]]]]]]] );
}

/*
 * Memory access - sequential 
 * do 32 sequential cache line accesses per count of a range of size words
 */
sequential(count, size)
unsigned long count, size;
{
    struct cache_lines {
        unsigned int data;
        unsigned int fill[7];
    } *linep, *cachep = (struct cache_lines *) data;

    size /= ((sizeof (struct cache_lines)) * 32);

    while(count-- > 0 && (linep = &cachep[(count%size)*32]) && (
        linep[0x00].data | linep[0x01].data | linep[0x02].data | linep[0x03].data |
        linep[0x04].data | linep[0x05].data | linep[0x06].data | linep[0x07].data |
        linep[0x08].data | linep[0x09].data | linep[0x0a].data | linep[0x0b].data |
        linep[0x0c].data | linep[0x0d].data | linep[0x0e].data | linep[0x0f].data |
        linep[0x10].data | linep[0x11].data | linep[0x12].data | linep[0x13].data |
        linep[0x14].data | linep[0x15].data | linep[0x16].data | linep[0x17].data |
        linep[0x18].data | linep[0x19].data | linep[0x1a].data | linep[0x1b].data |
        linep[0x1c].data | linep[0x1d].data | linep[0x1e].data | linep[0x1f].data
    ) != 0 );
}

/*
 * init_data to random values mod range size
 */

init_data(bytes)
{
    register unsigned int *word;

    srand(1234567);

    for(word=data; word < &data[bytes/sizeof (unsigned int)]; word++) {
	*word = rand() % (bytes/sizeof (unsigned int));
    }
}

/*
 * interate random runs to find the count which matches the number of
 * seconds requested.
 */

unsigned long
do_random(seconds, size) {
    clock_t	real_start,	real_stop;
    struct tms	cpu_start,	cpu_stop;
    static unsigned int first_time = 1;
    static unsigned long current, count;

    if(first_time) {
        current = CLK_TCK;
        count = 100000;
        first_time = 0;
    }

    init_data(size);

    do {
	count = ((count+(current/2L))/current)*CLK_TCK*(unsigned long)seconds;
	real_start = times(&cpu_start);
	random(count, size);
	real_stop = times(&cpu_stop);
	current = (real_stop - real_start);
        if(current == 0) current = 1;
    } while (((current+CLK_TCK/2)/CLK_TCK) != seconds);
    return((32L*count+current/2L)/current);
}

/*
 * interate sequential runs to find the count which matches the number of
 * seconds requested.
 */

unsigned long
do_sequential(seconds, size) {
    clock_t	real_start,	real_stop;
    struct tms	cpu_start,	cpu_stop;
    static unsigned int first_time = 1;
    static unsigned long current, count;

    if(first_time) {
        current = CLK_TCK;
        count = 100000;
        first_time = 0;
    }

    do {
	count = ((count+(current/2L))/current)*CLK_TCK*(unsigned long)seconds;
	real_start = times(&cpu_start);
	sequential(count, size);
	real_stop = times(&cpu_stop);
	current = (real_stop - real_start);
        if(current == 0) current = 1;
    } while (((current+CLK_TCK/2)/CLK_TCK) != seconds);
    return((32L*count+current/2L)/current);
}

/*
 * Search memory speed between 1K and 2M Bytes
 */

main(argc, argv)
int argc;
char *argv[];
{
    unsigned int size, seconds = 10;
    unsigned long rhigh, shigh, rval, sval, phigh;
    int i;
    char plot[46];

printf("Mbench by John. L. Bass, DMS Design copyright 1985-1996\n");
printf("You are free to copy and use this program providing configuration info\n");
printf("and results are shared with the author by email to jbass at dmsd.com\n\n");
printf("Counts are per clock tick, presumed to be HZ=%d\n", CLK_TCK);
printf(" SetSize         Random    Sequential\n");
printf("--------  ------------- ------------- 0%%        25%%        50%%        75%%       100%%\n");

    if(argc > 1) seconds = atoi(argv[1]);

    do_random(2, sizeof data);
    do_sequential(2, sizeof data);
    rhigh = shigh = phigh = 0;

    for(size=1024; size <= sizeof data; size <<= 1) {
        for(i=0; i<sizeof plot; i++) plot[i] = ' ';
        plot[(100*(sizeof plot-2))/100] = '|';
        plot[( 75*(sizeof plot-2))/100] = '|';
        plot[( 50*(sizeof plot-2))/100] = '|';
        plot[( 25*(sizeof plot-2))/100] = '|';
        plot[(  0*(sizeof plot-2))/100] = '|';
        printf("%8d ", size); fflush(stdout);

        printf("%8lu ", rval = do_random(seconds, size));
        if(rhigh == 0) rhigh = rval;
        printf("%3d%% ",rval*100/rhigh); fflush(stdout);

        printf("%8lu ", sval = do_sequential(seconds, size));
        if(shigh == 0) shigh = sval;
        printf("%3d%% ",sval*100/shigh);

	phigh = rhigh > shigh ? rhigh : shigh;
        plot[(rval*(sizeof plot-2)+phigh/2)/phigh] = '*';
        plot[(sval*(sizeof plot-2)+phigh/2)/phigh] = '@';
        printf("%46.46s\n", plot); fflush(stdout);

        if((size+size/2) <= sizeof data) {
            plot[0] = '|'; for(i=1; i<sizeof plot; i++) plot[i] = ' ';
            printf("%8d ", size+size/2); fflush(stdout);

            printf("%8lu ", rval = do_random(seconds, size+size/2));
            printf("%3d%% ",rval*100/rhigh);fflush(stdout);

            printf("%8lu ", sval = do_sequential(seconds, size+size/2));
            printf("%3d%% ",sval*100/shigh);

            plot[(rval*(sizeof plot-2)+phigh/2)/phigh] = '*';
            plot[(sval*(sizeof plot-2)+phigh/2)/phigh] = '@';
            printf("%46.46s\n", plot); fflush(stdout);
        }
    }
}



More information about the NCLUG mailing list