Issue
My simple program:
//usage:
//indent ./a.c;gcc -O0 ./a.c
//./a.out max r/w repeat timeout
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
int
main (int argc, char **argv)
{
time_t const start_time = time (NULL);
time_t timeout;
int max;
int repeat;
if (argc == 5)
{
max = atoi (argv[1]);
repeat = atoi (argv[3]);
timeout = ((time_t) (atoi (argv[4])));
}
else
return 1;
unsigned char **block_array =
calloc (sizeof (unsigned char *), (size_t) (max));
size_t block_length = (size_t) (1024u * 1024u);
unsigned char data[3];
data[0] = 'a';
data[1] = 'b';
data[2] = 'c';
unsigned i = 0u;
//initialize block_array
for (i = 0u; i < max; i++)
{
do
{
if ((timeout > ((time_t) (0)))
&& ((time (NULL) - start_time) > timeout))
{
puts ("timeouted!");
return 0;
}
block_array[i] = malloc (block_length);
if (block_array[i] != NULL)
{
unsigned bi = 0u;
for (bi = 0u; bi < block_length; bi++)
block_array[i][bi] = data[bi % ((unsigned) (sizeof (data)))];
}
else
{
printf ("%u error\n", i);
}
}
while (NULL == block_array[i]);
}
puts ("init ok");
unsigned score = 0u;
//do page read test
if ('r' == argv[2][0])
for (;;)
{
for (i = 0u; i < max; i++)
{
if ((timeout > ((time_t) (0)))
&& ((time (NULL) - start_time) > timeout))
{
puts ("timeouted!");
goto show_score;
}
unsigned bi = 0u;
for (bi = 0u; bi < block_length; bi++)
{
data[bi % ((unsigned) (sizeof (data)))] = block_array[i][bi];
}
score++;
}
if (repeat >= 0)
{
repeat--;
if (0 == repeat)
goto show_score;
}
}
//do page write test
else if ('w' == argv[2][0])
for (;;)
{
for (i = 0u; i < max; i++)
{
if ((timeout > ((time_t) (0)))
&& ((time (NULL) - start_time) > timeout))
{
puts ("timeouted!");
goto show_score;
}
unsigned bi = 0u;
for (bi = 0u; bi < block_length; bi++)
{
block_array[i][bi] = data[bi % ((unsigned) (sizeof (data)))];
}
score++;
}
if (repeat >= 0)
{
repeat--;
if (0 == repeat)
goto show_score;
}
}
show_score:
printf ("score:%u\n", score);
return 0;
}
I also same test Debian Jessie(Linux 3.16)(lesser test) and Debian Stretch(Linux 4.9)(more test to sure)
I already repeat many times same test to sure this,so I only post a short result.
Test result:
$ cat /proc/meminfo |grep SwapTotal
SwapTotal: 0 kB
$ time ./a.out 100 r 5 -1
init ok
score:500
real 0m2.689s
user 0m2.604s
sys 0m0.080s
$ time ./a.out 100 w 5 -1
init ok
score:500
real 0m2.567s
user 0m2.496s
sys 0m0.060s
$
Solution
The main assignment inside the loop in both the 'r' and 'w' cases reads from memory and writes back to memory i.e. they are essentially the same - you aren't really testing memory read versus memory write. This is borne out by the fact that the times in each case are pretty close.
The 'w' case may be slightly faster because the cache probably contains the value you want to read from memory, since you aren't changing the source address in that case.
Answered By - Murray Jensen