osdir.com
mailing list archive F.A.Q. -since 2001!



Subject: coreutils patch to align buffers better -
msg#00095

List: gnu.core-utils.bugs

Mail Archive Navigation:
by Date: Prev Next Date Index by Thread: Prev Next Thread Index

Following up on my previous email, here's a proposed performance patch
for coreutils. I can't measure any performance improvements on my
host, but I suspect that aligning I/O buffers can make a real
difference with some device drivers on some hosts, and it shouldn't
hurt on other hosts.

2004-04-13 Paul Eggert <eggert@xxxxxxxxxxx>

Use page-aligned buffers whenever we bother to do I/O using buffer
sizes that are tailored for the files.

* src/cat.c: Include getpagesize.h.
* src/copy.c: Likewise.
* src/shred.c: Likewise.
* src/split.c: Likewise.
* src/cat.c (main): Align I/O buffers to page boundaries.
* src/copy.c (copy_reg): Likewise.
* src/shred.c (dopass): Likewise.
* src/split.c (main): Likewise.
* src/dd (ROUND_UP_OFFSET, PTR_ALIGN): Remove.
All uses replaced by ptr_align.
* src/od.c (gcd, lcm): Remove; now in system.h.
* src/system.h (gcd, lcm, ptr_align): New functions.

Index: src/cat.c
===================================================================
RCS file: /home/meyering/coreutils/cu/src/cat.c,v
retrieving revision 1.93
diff -p -u -r1.93 cat.c
--- src/cat.c 21 Jan 2004 22:42:46 -0000 1.93
+++ src/cat.c 13 Apr 2004 20:58:09 -0000
@@ -33,6 +33,7 @@
#include "system.h"
#include "error.h"
#include "full-write.h"
+#include "getpagesize.h"
#include "safe-read.h"

/* The official name of this program (e.g., no `g' prefix). */
@@ -504,6 +505,8 @@ main (int argc, char **argv)
/* Optimal size of i/o operations of input. */
size_t insize;

+ size_t page_size = getpagesize ();
+
/* Pointer to the input buffer. */
char *inbuf;

@@ -806,15 +809,16 @@ main (int argc, char **argv)
if (options == 0)
{
insize = max (insize, outsize);
- inbuf = xmalloc (insize);
+ inbuf = xmalloc (insize + page_size - 1);

- simple_cat (inbuf, insize);
+ simple_cat (ptr_align (inbuf, page_size), insize);
}
else
{
- inbuf = xmalloc (insize + 1);
+ inbuf = xmalloc (insize + 1 + page_size - 1);

- /* Why are (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN)
+ /* Why are
+ (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN + PAGE_SIZE - 1)
bytes allocated for the output buffer?

A test whether output needs to be written is done when the input
@@ -829,11 +833,17 @@ main (int argc, char **argv)
options) as the first thing in the output buffer. (Done after the
new input is read, but before processing of the input begins.)
A line number requires seldom more than LINE_COUNTER_BUF_LEN
- positions. */
+ positions.
+
+ Align the output buffer to a page size boundary, for efficency on
+ some paging implementations, so add PAGE_SIZE - 1 bytes to the
+ request to make room for the alignment. */

- outbuf = xmalloc (outsize - 1 + insize * 4 + LINE_COUNTER_BUF_LEN);
+ outbuf = xmalloc (outsize - 1 + insize * 4 + LINE_COUNTER_BUF_LEN
+ + page_size - 1);

- cat (inbuf, insize, outbuf, outsize, quote,
+ cat (ptr_align (inbuf, page_size), insize,
+ ptr_align (outbuf, page_size), outsize, quote,
output_tabs, numbers, numbers_at_empty_lines, mark_line_ends,
squeeze_empty_lines);

Index: src/copy.c
===================================================================
RCS file: /home/meyering/coreutils/cu/src/copy.c,v
retrieving revision 1.160
diff -p -u -r1.160 copy.c
--- src/copy.c 12 Apr 2004 09:30:57 -0000 1.160
+++ src/copy.c 13 Apr 2004 21:20:52 -0000
@@ -33,6 +33,7 @@
#include "dirname.h"
#include "error.h"
#include "full-write.h"
+#include "getpagesize.h"
#include "hash.h"
#include "hash-pjw.h"
#include "path-concat.h"
@@ -205,7 +206,8 @@ copy_reg (const char *src_path, const ch
struct stat const *src_sb)
{
char *buf;
- int buf_size;
+ size_t buf_size;
+ size_t buf_alignment;
int dest_desc;
int source_desc;
struct stat sb;
@@ -316,7 +318,9 @@ copy_reg (const char *src_path, const ch

/* Make a buffer with space for a sentinel at the end. */

- buf = alloca (buf_size + sizeof (int));
+ buf_alignment = lcm (getpagesize (), sizeof (int));
+ buf = alloca (buf_size + sizeof (int) + buf_alignment - 1);
+ buf = ptr_align (buf, buf_alignment);

for (;;)
{
Index: src/dd.c
===================================================================
RCS file: /home/meyering/coreutils/cu/src/dd.c,v
retrieving revision 1.160
diff -p -u -r1.160 dd.c
--- src/dd.c 8 Apr 2004 21:36:06 -0000 1.160
+++ src/dd.c 13 Apr 2004 20:52:09 -0000
@@ -53,10 +53,6 @@
# define fdatasync(fd) (errno = ENOSYS, -1)
#endif

-#define ROUND_UP_OFFSET(X, M) ((M) - 1 - (((X) + (M) - 1) % (M)))
-#define PTR_ALIGN(Ptr, M) ((Ptr) \
- + ROUND_UP_OFFSET ((char *)(Ptr) - (char *)0, (M)))
-
#define max(a, b) ((a) > (b) ? (a) : (b))
#define output_char(c) \
do \
@@ -1061,13 +1057,13 @@ dd_copy (void)
ibuf = real_buf;
ibuf += SWAB_ALIGN_OFFSET; /* allow space for swab */

- ibuf = PTR_ALIGN (ibuf, page_size);
+ ibuf = ptr_align (ibuf, page_size);

if (conversions_mask & C_TWOBUFS)
{
/* Page-align the output buffer, too. */
real_obuf = xmalloc (output_blocksize + page_size - 1);
- obuf = PTR_ALIGN (real_obuf, page_size);
+ obuf = ptr_align (real_obuf, page_size);
}
else
{
Index: src/od.c
===================================================================
RCS file: /home/meyering/coreutils/cu/src/od.c,v
retrieving revision 1.146
diff -p -u -r1.146 od.c
--- src/od.c 21 Jan 2004 23:37:54 -0000 1.146
+++ src/od.c 13 Apr 2004 21:21:53 -0000
@@ -371,33 +371,6 @@ implies 32. By default, od uses -A o -t
exit (status);
}

-/* Compute the greatest common denominator of U and V
- using Euclid's algorithm. */
-
-static unsigned int
-gcd (unsigned int u, unsigned int v)
-{
- unsigned int t;
- while (v != 0)
- {
- t = u % v;
- u = v;
- v = t;
- }
- return u;
-}
-
-/* Compute the least common multiple of U and V. */
-
-static unsigned int
-lcm (unsigned int u, unsigned int v)
-{
- unsigned int t = gcd (u, v);
- if (t == 0)
- return 0;
- return u * v / t;
-}
-
static void
print_s_char (size_t n_bytes, const char *block, const char *fmt_string)
{
Index: src/shred.c
===================================================================
RCS file: /home/meyering/coreutils/cu/src/shred.c,v
retrieving revision 1.85
diff -p -u -r1.85 shred.c
--- src/shred.c 8 Apr 2004 21:36:18 -0000 1.85
+++ src/shred.c 13 Apr 2004 21:25:07 -0000
@@ -105,6 +105,7 @@
#include "system.h"
#include "xstrtol.h"
#include "error.h"
+#include "getpagesize.h"
#include "human.h"
#include "inttostr.h"
#include "quotearg.h" /* For quotearg_colon */
@@ -787,11 +788,9 @@ dopass (int fd, char const *qname, off_t
size_t lim; /* Amount of data to try writing */
size_t soff; /* Offset into buffer for next write */
ssize_t ssize; /* Return value from write */
-#if ISAAC_WORDS > 1024
- word32 r[ISAAC_WORDS * 3]; /* Multiple of 4K and of pattern size */
-#else
- word32 r[1024 * 3]; /* Multiple of 4K and of pattern size */
-#endif
+ word32 *r; /* Fill pattern. */
+ size_t rsize = 3 * MAX (ISAAC_WORDS, 1024) * sizeof *r; /* Fill size. */
+ size_t ralign = lcm (getpagesize (), sizeof *r); /* Fill alignment. */
char pass_string[PASS_NAME_SIZE]; /* Name of current pass */

/* Printable previous offset into the file */
@@ -804,10 +803,13 @@ dopass (int fd, char const *qname, off_t
return -1;
}

+ r = alloca (rsize + ralign - 1);
+ r = ptr_align (r, ralign);
+
/* Constant fill patterns need only be set up once. */
if (type >= 0)
{
- lim = sizeof r;
+ lim = rsize;
if ((off_t) lim > size && size != -1)
{
lim = (size_t) size;
@@ -832,7 +834,7 @@ dopass (int fd, char const *qname, off_t
for (;;)
{
/* How much to write this time? */
- lim = sizeof r;
+ lim = rsize;
if ((off_t) lim > size - offset && size != -1)
{
if (size < offset)
@@ -842,7 +844,7 @@ dopass (int fd, char const *qname, off_t
break;
}
if (type < 0)
- fillrand (s, r, sizeof r, lim);
+ fillrand (s, r, rsize, lim);
/* Loop to retry partial writes. */
for (soff = 0; soff < lim; soff += ssize)
{
Index: src/split.c
===================================================================
RCS file: /home/meyering/coreutils/cu/src/split.c,v
retrieving revision 1.97
diff -p -u -r1.97 split.c
--- src/split.c 21 Jan 2004 23:45:13 -0000 1.97
+++ src/split.c 13 Apr 2004 20:57:23 -0000
@@ -30,6 +30,7 @@
#include "system.h"
#include "dirname.h"
#include "error.h"
+#include "getpagesize.h"
#include "full-read.h"
#include "full-write.h"
#include "inttostr.h"
@@ -373,6 +374,7 @@ main (int argc, char **argv)
} split_type = type_undef;
size_t in_blk_size; /* optimal block size of input file device */
char *buf; /* file i/o buffer */
+ size_t page_size = getpagesize ();
uintmax_t n_units;
int c;
int digits_optind = 0;
@@ -555,7 +557,7 @@ main (int argc, char **argv)
error (EXIT_FAILURE, errno, "%s", infile);
in_blk_size = ST_BLKSIZE (stat_buf);

- buf = xmalloc (in_blk_size + 1);
+ buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);

switch (split_type)
{
Index: src/system.h
===================================================================
RCS file: /home/meyering/coreutils/cu/src/system.h,v
retrieving revision 1.85
diff -p -u -r1.85 system.h
--- src/system.h 9 Apr 2004 12:05:45 -0000 1.85
+++ src/system.h 13 Apr 2004 21:44:17 -0000
@@ -766,3 +766,43 @@ enum
? fseek (s, o, w) \
: (errno = EOVERFLOW, -1))
#endif
+
+/* Compute the greatest common divisor of U and V using Euclid's
+ algorithm. U and V must be nonzero. */
+
+static inline size_t
+gcd (size_t u, size_t v)
+{
+ do
+ {
+ size_t t = u % v;
+ u = v;
+ v = t;
+ }
+ while (v);
+
+ return u;
+}
+
+/* Compute the least common multiple of U and V. U and V must be
+ nonzero. There is no overflow checking, so callers should not
+ specify outlandish sizes. */
+
+static inline size_t
+lcm (size_t u, size_t v)
+{
+ return u * (v / gcd (u, v));
+}
+
+/* Return PTR, aligned upward to the next multiple of ALIGNMENT.
+ ALIGNMENT must be nonzero. The caller must arrange for ((char *)
+ PTR) through ((char *) PTR + ALIGNMENT - 1) to be addressable
+ locations. */
+
+static inline void *
+ptr_align (void *ptr, size_t alignment)
+{
+ char *p0 = ptr;
+ char *p1 = p0 + alignment - 1;
+ return p1 - (uintptr_t) p1 % alignment;
+}


Thread at a glance:

Previous Message by Date:

Re: "readlink -f foo" fails if the target of foo does not exist

Dmitry V. Levin wrote: > I've implemented the changes mentioned above. Thanks! Unfortunately I am unable to test the patch at the moment because I have poor Internet connectivity. I will write again when I am able to test it. > [...] "readlink -f" is now more compatible with prior implementations, > old behaviour moved to "readlink -e", and your proposed relaxed -ff > mode implemented as "-m". I don't insist on these option names though. Nothing is actually using "-ff" yet so please choose the option names you think best. -- Thomas ________________________________________________________________________ Yahoo! Messenger - Communicate instantly..."Ping" your friends today! Download Messenger Now http://uk.messenger.yahoo.com/download/index.html

Next Message by Date:

Re: dd PATCH: add conv=direct

Jim Meyering <jim@xxxxxxxxxxxx> writes: >>> > http://oss.oracle.com/projects/ocfs/dist/files/source/RHAT/RHAS3/coreutils-4.5.3-33.src.rpm I briefly looked at the following patches in that RPM: coreutils-4.5.3-O_DIRECT-NFS.patch coreutils-4.5.3-O_DIRECT-dd.patch coreutils-4.5.3-O_DIRECT-valloc.patch coreutils-4.5.3-o_direct-copy-valloc.patch coreutils-4.5.3-o_direct.patch and I found the following differences between those ideas and what's in coreutils CVS right now: * Coreutils dd simply aligns the I/O buffers to getpagesize() boundaries, 4.5.3-33 has a complicated alignment strategy that I don't fully follow, but which seems to do the same thing. (There may be some differences if I/O errors occur; is that the point?) * 4.5.3-33 aligns buffers to page size boundaries in copy.c. This looks to me like it's worth doing (independently of O_DIRECT), so I'll propose a patch along those lines via separate email to bug-coreutils. * cp, mv, and md5sum have --o_direct options. I'm not convinced that md5sum needs this (why not all the other commands that read files, too, while you're at it? cat, say?) but perhaps cp and mv should have it (what are the application areas here?). Also, option names should not have underscores, so I'd suggest --direct (or perhaps --direct-io) as a better name for this sort of option. * The dd options are spelled differently, e.g.: dd ibs=512 obs=1024 iflags=direct oflags=direct (coreutils CVS) dd --o_direct=512,1024 (4.5.3-33) Here I prefer the coreutils CVS version as it's a bit more orthogonal.

Previous Message by Thread:

Re: dd PATCH: add conv=direct

Wim Coekaerts <wim.coekaerts@xxxxxxxxxx> wrote: > philip copeland did a whole set of patches for coreutils to allow > directio for both read write and mixed sizes even > the rpm is at > http://oss.oracle.com/projects/ocfs/files/source/RHAT/RHAS3/coreutils-4.5.3-33.src.rpm, Thanks for the pointer. FYI, that URL didn't work for me. This one did: http://oss.oracle.com/projects/ocfs/dist/files/source/RHAT/RHAS3/coreutils-4.5.3-33.src.rpm Whoever maintains that code should consider merging their changes with something newer. There are over 500 lines of NEWS entries alone for the coreutils releases that have been made since 4.5.3. Besides, those patches have portability and robustness problems, and they aren't even based on coreutils-4.5.3. Maybe they're based on some vendor's version of coreutils? > I think he took it up with the maintainers but so far had no luck As far as I know, no one ever submitted such O_DIRECT-based patches to me or any of the bug-*@gnu.org mailing lists. The only pending O_DIRECT-based patch is one for shred. Clean, complete, and well-justified patches usually go in pretty quickly.

Next Message by Thread:

Re: coreutils patch to align buffers better

Paul Eggert <eggert@xxxxxxxxxxx> wrote: > Following up on my previous email, here's a proposed performance patch > for coreutils. I can't measure any performance improvements on my > host, but I suspect that aligning I/O buffers can make a real > difference with some device drivers on some hosts, and it shouldn't > hurt on other hosts. Thanks. I've applied that. > 2004-04-13 Paul Eggert <eggert@xxxxxxxxxxx> > > Use page-aligned buffers whenever we bother to do I/O using buffer > sizes that are tailored for the files. > > * src/cat.c: Include getpagesize.h. > * src/copy.c: Likewise. > * src/shred.c: Likewise. > * src/split.c: Likewise. > * src/cat.c (main): Align I/O buffers to page boundaries. > * src/copy.c (copy_reg): Likewise. > * src/shred.c (dopass): Likewise. > * src/split.c (main): Likewise. > * src/dd (ROUND_UP_OFFSET, PTR_ALIGN): Remove. > All uses replaced by ptr_align. > * src/od.c (gcd, lcm): Remove; now in system.h. > * src/system.h (gcd, lcm, ptr_align): New functions.
blog comments powered by Disqus

Home | News | Sitemap | FAQ | advertise | OSDir is an Inevitable website. GBiz is too!