From: Dana Jansens Date: Sun, 10 Feb 2008 21:49:16 +0000 (-0500) Subject: use memcpy's to make splitvertical gradient much faster - using log n memcpy's is... X-Git-Url: https://git.dogcows.com/gitweb?a=commitdiff_plain;h=1d00d9947067da76ac4d8d0a6b9ef2c28e73349e;p=chaz%2Fopenbox use memcpy's to make splitvertical gradient much faster - using log n memcpy's is much quicker than setting a pointer value n times Here are some profiling results. splitvertical1 is the original code, splitvertical2 is some slight improvements in locality for it, and splitvertical3 is the new O(log n) memcpy code % cumulative self self total time seconds seconds calls ms/call ms/call name 49.44 0.88 0.88 1063 0.83 0.83 gradient_splitvertical1 47.19 1.72 0.84 1063 0.79 0.79 gradient_splitvertical2 2.81 1.77 0.05 1063 0.05 0.05 gradient_splitvertical3 i also tested this with 'time' to draw 1000 gradients, and the new code used approximately half the user time, and finished 10 seconds quicker. so yeah, it's magical and works well. --- diff --git a/render/gradient.c b/render/gradient.c index 6439b301..bbd2a5c9 100644 --- a/render/gradient.c +++ b/render/gradient.c @@ -425,8 +425,7 @@ static void gradient_splitvertical(RrAppearance *a, gint w, gint h) { gint x, y1, y2, y3; RrSurface *sf = &a->surface; - RrPixel32 *data = sf->pixel_data; - RrPixel32 current; + RrPixel32 *data, *start; gint y1sz, y2sz, y3sz; VARS(y1); @@ -455,28 +454,69 @@ static void gradient_splitvertical(RrAppearance *a, gint w, gint h) } SETUP(y3, sf->secondary, sf->split_secondary, y3sz); - for (y1 = y1sz; y1 > 0; --y1) { - current = COLOR(y1); - for (x = w - 1; x >= 0; --x) - *(data++) = current; + /* find the color for the first pixel of each row first */ + data = sf->pixel_data; + for (y1 = y1sz-1; y1 > 0; --y1) { + *data = COLOR(y1); + data += w; NEXT(y1); } - - for (y2 = y2sz; y2 > 0; --y2) { - current = COLOR(y2); - for (x = w - 1; x >= 0; --x) - *(data++) = current; - + *data = COLOR(y1); + data += w; + for (y2 = y2sz-1; y2 > 0; --y2) { + *data = COLOR(y2); + data += w; NEXT(y2); } + *data = COLOR(y2); + data += w; + for (y3 = y3sz-1; y3 > 0; --y3) { + *data = COLOR(y3); + data += w; + NEXT(y3); + } + *data = COLOR(y3); - for (y3 = y3sz; y3 > 0; --y3) { - current = COLOR(y3); - for (x = w - 1; x >= 0; --x) - *(data++) = current; + /* copy the first pixels into the whole rows */ - NEXT(y3); + start = sf->pixel_data; + data = start + 1; + + for (y1 = h; y1 > 0; --y1) { + /* for really small things, just copy ourselves */ + if (w < 8) { + for (x = w-1; x > 0; --x) + *(data++) = *start; + } + /* for >= 8, then use O(log n) memcpy's... */ + else { + gint len = 4; + gint lenbytes = 4 * sizeof(RrPixel32); + + /* copy the first 3 * 32 bits (3 words) ourselves - then we have + 3 + the original 1 = 4 words to make copies of at a time + + this is faster than doing memcpy for 1 or 2 words at a time + */ + for (x = 3; x > 0; --x) + *(data++) = *start; + + for (x = w - 4; x > 0;) { + memcpy(data, start, lenbytes); + x -= len; + data += len; + len <<= 1; + lenbytes <<= 1; + if (len > x) { + len = x; + lenbytes = x * sizeof(RrPixel32); + } + } + } + + start += w; + ++data; } } @@ -551,13 +591,13 @@ static void gradient_vertical(RrSurface *sf, gint w, gint h) for (y = h - 1; y > 0; --y) { /* 0 -> h-1 */ current = COLOR(y); - for (x = w - 1; x >= 0; --x) /* 0 -> w */ + for (x = w; x > 0; --x) /* 0 -> w */ *(data++) = current; NEXT(y); } current = COLOR(y); - for (x = w - 1; x >= 0; --x) /* 0 -> w */ + for (x = w; x > 0; --x) /* 0 -> w */ *(data++) = current; }