Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* $OpenBSD: qsort.c,v 1.11 2010/02/08 11:04:07 otto Exp $ */
- /*-
- * Jan.28.2017 Rick C. Hodgin -- See if inlining or blocks are faster.
- * Average results Win7: Native=78, Inline=47, Block=31
- *
- * Copyright (c) 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #include <sys/types.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <windows.h>
- typedef unsigned long long u64;
- static __inline void swapfunc(char *, char *, size_t, int);
- static __inline char *med3_native(char *, char *, char *, int (*)(const void *, const void *));
- static __inline char *med3_inline(char *, char *, char *);
- static __inline char *med3_block(char *, char *, char *);
- #define compare_block(l, r) (((*(int*)l < *(int*)r) ? -1 : ((*(int*)l > *(int*)r) ? 1 : 0)))
- //#define compare_block_asm(l, r) (((*(int*)l < *(int*)r) ? -1 : ((*(int*)l > *(int*)r) ? 1 : 0)))
- #define mymin(a, b) (((a) < (b)) ? a : b)
- int compare(const void* left, const void* right)
- {
- int l, r;
- l = *(int*)left;
- r = *(int*)right;
- if (l < r) return(-1); // Left is less
- else if (l > r) return(1); // Left is greater
- else return(0); // Equal
- }
- /*
- * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
- */
- #define swapcode(TYPE, parmi, parmj, n) { \
- size_t i = (n) / sizeof (TYPE); \
- TYPE *pi = (TYPE *) (parmi); \
- TYPE *pj = (TYPE *) (parmj); \
- do { \
- TYPE t = *pi; \
- *pi++ = *pj; \
- *pj++ = t; \
- } while (--i > 0); \
- }
- #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
- es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
- static __inline void
- swapfunc(char *a, char *b, size_t n, int swaptype)
- {
- if (swaptype <= 1)
- swapcode(long, a, b, n)
- else
- swapcode(char, a, b, n)
- }
- #define swap(a, b) \
- if (swaptype == 0) { \
- long t = *(long *)(a); \
- *(long *)(a) = *(long *)(b); \
- *(long *)(b) = t; \
- } else \
- swapfunc(a, b, es, swaptype)
- #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
- static __inline char *
- med3_native(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
- {
- return cmp(a, b) < 0 ? (cmp(b, c) < 0 ? b
- : (cmp(a, c) < 0 ? c
- : a ))
- : (cmp(b, c) > 0 ? b
- : (cmp(a, c) < 0 ? a
- : c ));
- }
- static __inline char *
- med3_inline(char *a, char *b, char *c)
- {
- return compare(a, b) < 0 ? (compare(b, c) < 0 ? b
- : (compare(a, c) < 0 ? c
- : a ))
- : (compare(b, c) > 0 ? b
- : (compare(a, c) < 0 ? a
- : c ));
- }
- static __inline char *
- med3_block(char *a, char *b, char *c)
- {
- return compare_block(a, b) < 0 ? (compare_block(b, c) < 0 ? b
- : (compare_block(a, c) < 0 ? c
- : a ))
- : (compare_block(b, c) > 0 ? b
- : (compare_block(a, c) < 0 ? a
- : c ));
- }
- void
- myqsort_native(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
- {
- char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
- int cmp_result, swaptype, swap_cnt;
- size_t d, r;
- char *a = (char *)aa;
- loop: SWAPINIT(a, es);
- swap_cnt = 0;
- if (n < 7) {
- for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
- for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
- pl -= es)
- swap(pl, pl - es);
- return;
- }
- pm = (char *)a + (n / 2) * es;
- if (n > 7) {
- pl = (char *)a;
- pn = (char *)a + (n - 1) * es;
- if (n > 40) {
- d = (n / 8) * es;
- pl = med3_native(pl, pl + d, pl + 2 * d, cmp);
- pm = med3_native(pm - d, pm, pm + d, cmp);
- pn = med3_native(pn - 2 * d, pn - d, pn, cmp);
- } else {
- pm = med3_native(pl, pm, pn, cmp);
- }
- }
- swap(a, pm);
- pa = pb = (char *)a + es;
- pc = pd = (char *)a + (n - 1) * es;
- for (;;) {
- while (pb <= pc && (cmp_result = cmp(pb, a)) <= 0) {
- if (cmp_result == 0) {
- swap_cnt = 1;
- swap(pa, pb);
- pa += es;
- }
- pb += es;
- }
- while (pb <= pc && (cmp_result = cmp(pc, a)) >= 0) {
- if (cmp_result == 0) {
- swap_cnt = 1;
- swap(pc, pd);
- pd -= es;
- }
- pc -= es;
- }
- if (pb > pc)
- break;
- swap(pb, pc);
- swap_cnt = 1;
- pb += es;
- pc -= es;
- }
- if (swap_cnt == 0) { /* Switch to insertion sort */
- for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
- for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
- pl -= es)
- swap(pl, pl - es);
- return;
- }
- pn = (char *)a + n * es;
- r = mymin(pa - (char *)a, pb - pa);
- vecswap(a, pb - r, r);
- r = mymin(pd - pc, pn - pd - es);
- vecswap(pb, pn - r, r);
- if ((r = pb - pa) > es)
- myqsort_native(a, r / es, es, cmp);
- if ((r = pd - pc) > es) {
- /* Iterate rather than recurse to save stack space */
- a = pn - r;
- n = r / es;
- goto loop;
- }
- /* qsort(pn - r, r / es, es, cmp);*/
- }
- void
- myqsort_inline(void *aa, size_t n, size_t es)
- {
- char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
- int cmp_result, swaptype, swap_cnt;
- size_t d, r;
- char *a = (char *)aa;
- loop: SWAPINIT(a, es);
- swap_cnt = 0;
- if (n < 7) {
- for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
- for (pl = pm; pl > (char *) a && compare(pl - es, pl) > 0;
- pl -= es)
- swap(pl, pl - es);
- return;
- }
- pm = (char *)a + (n / 2) * es;
- if (n > 7) {
- pl = (char *)a;
- pn = (char *)a + (n - 1) * es;
- if (n > 40) {
- d = (n / 8) * es;
- pl = med3_inline(pl, pl + d, pl + 2 * d);
- pm = med3_inline(pm - d, pm, pm + d);
- pn = med3_inline(pn - 2 * d, pn - d, pn);
- } else {
- pm = med3_inline(pl, pm, pn);
- }
- }
- swap(a, pm);
- pa = pb = (char *)a + es;
- pc = pd = (char *)a + (n - 1) * es;
- for (;;) {
- while (pb <= pc && (cmp_result = compare(pb, a)) <= 0) {
- if (cmp_result == 0) {
- swap_cnt = 1;
- swap(pa, pb);
- pa += es;
- }
- pb += es;
- }
- while (pb <= pc && (cmp_result = compare(pc, a)) >= 0) {
- if (cmp_result == 0) {
- swap_cnt = 1;
- swap(pc, pd);
- pd -= es;
- }
- pc -= es;
- }
- if (pb > pc)
- break;
- swap(pb, pc);
- swap_cnt = 1;
- pb += es;
- pc -= es;
- }
- if (swap_cnt == 0) { /* Switch to insertion sort */
- for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
- for (pl = pm; pl > (char *) a && compare(pl - es, pl) > 0;
- pl -= es)
- swap(pl, pl - es);
- return;
- }
- pn = (char *)a + n * es;
- r = mymin(pa - (char *)a, pb - pa);
- vecswap(a, pb - r, r);
- r = mymin(pd - pc, pn - pd - es);
- vecswap(pb, pn - r, r);
- if ((r = pb - pa) > es)
- myqsort_inline(a, r / es, es);
- if ((r = pd - pc) > es) {
- /* Iterate rather than recurse to save stack space */
- a = pn - r;
- n = r / es;
- goto loop;
- }
- /* qsort(pn - r, r / es, es, cmp);*/
- }
- void
- myqsort_block(void *aa, size_t n, size_t es)
- {
- char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
- int cmp_result, swaptype, swap_cnt;
- size_t d, r;
- char *a = (char *)aa;
- loop: SWAPINIT(a, es);
- swap_cnt = 0;
- if (n < 7) {
- for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
- for (pl = pm; pl > (char *) a && compare(pl - es, pl) > 0;
- pl -= es)
- swap(pl, pl - es);
- return;
- }
- pm = (char *)a + (n / 2) * es;
- if (n > 7) {
- pl = (char *)a;
- pn = (char *)a + (n - 1) * es;
- if (n > 40) {
- d = (n / 8) * es;
- pl = med3_block(pl, pl + d, pl + 2 * d);
- pm = med3_block(pm - d, pm, pm + d);
- pn = med3_block(pn - 2 * d, pn - d, pn);
- } else {
- pm = med3_block(pl, pm, pn);
- }
- }
- swap(a, pm);
- pa = pb = (char *)a + es;
- pc = pd = (char *)a + (n - 1) * es;
- for (;;) {
- while (pb <= pc && (cmp_result = compare_block(pb, a)) <= 0) {
- if (cmp_result == 0) {
- swap_cnt = 1;
- swap(pa, pb);
- pa += es;
- }
- pb += es;
- }
- while (pb <= pc && (cmp_result = compare_block(pc, a)) >= 0) {
- if (cmp_result == 0) {
- swap_cnt = 1;
- swap(pc, pd);
- pd -= es;
- }
- pc -= es;
- }
- if (pb > pc)
- break;
- swap(pb, pc);
- swap_cnt = 1;
- pb += es;
- pc -= es;
- }
- if (swap_cnt == 0) { /* Switch to insertion sort */
- for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
- for (pl = pm; pl > (char *) a && compare(pl - es, pl) > 0;
- pl -= es)
- swap(pl, pl - es);
- return;
- }
- pn = (char *)a + n * es;
- r = mymin(pa - (char *)a, pb - pa);
- vecswap(a, pb - r, r);
- r = mymin(pd - pc, pn - pd - es);
- vecswap(pb, pn - r, r);
- if ((r = pb - pa) > es)
- myqsort_inline(a, r / es, es);
- if ((r = pd - pc) > es) {
- /* Iterate rather than recurse to save stack space */
- a = pn - r;
- n = r / es;
- goto loop;
- }
- /* qsort(pn - r, r / es, es, cmp);*/
- }
- int array[100000];
- int main(int argc, char* argv[])
- {
- int i, j, pass, subpass, iter_size, array_size;
- u64 deltaNative, deltaInline, deltaBlock, deltaNativeBest, deltaInlineBest, deltaBlockBest, deltaNativeAvg, deltaInlineAvg, deltaBlockAvg, startTick;
- char* description;
- // This algorithm breaks down the qsort() algorithm into three forms:
- //
- // Native -- the original qsort algorithm from: https://raw.githubusercontent.com/01org/linux-sgx/master/sdk/tlibc/stdlib/qsort.c
- // Inline -- removing the callback, and directly calling the function with full optimizations at compile-time.
- // Blocks -- replacing the callback with a macro which handles the compare directly.
- //
- // Tests were computed on 100K, 10K, and 1K array[] sizes.
- for (pass = 0; pass < 3; pass++)
- {
- deltaNativeBest = 9999999999999;
- deltaInlineBest = 9999999999999;
- deltaBlockBest = 9999999999999;
- deltaNativeAvg = 0;
- deltaInlineAvg = 0;
- deltaBlockAvg = 0;
- // Determine sort size
- switch (pass)
- {
- case 0:
- array_size = 100000;
- iter_size = 50;
- description = "100K";
- break;
- case 1:
- array_size = 10000;
- iter_size = 500;
- description = " 10K";
- break;
- case 2:
- array_size = 1000;
- iter_size = 5000;
- description = " 1K";
- break;
- }
- for (subpass = 0; subpass < 10; subpass++)
- {
- //////////
- // Native
- //////
- deltaNative = 0;
- srand((pass + 1) * subpass);
- for (i = 0; i < iter_size; i++)
- {
- // Generate all random numbers
- for (j = 0; j < array_size; j++)
- array[j] = rand();
- startTick = GetTickCount64();
- myqsort_native(&array[0], array_size, sizeof(int), compare);
- deltaNative += GetTickCount64() - startTick;
- }
- //////////
- // Inline
- //////
- deltaInline = 0;
- srand((pass + 1) * subpass);
- for (i = 0; i < iter_size; i++)
- {
- // Re-generate
- for (j = 0; j < array_size; j++)
- array[j] = rand();
- startTick = GetTickCount64();
- myqsort_inline(&array[0], array_size, sizeof(int));
- deltaInline += GetTickCount64() - startTick;
- }
- //////////
- // Block
- //////
- deltaBlock = 0;
- srand((pass + 1) * subpass);
- for (i = 0; i < iter_size; i++)
- {
- // Re-generate
- for (j = 0; j < array_size; j++)
- array[j] = rand();
- startTick = GetTickCount64();
- myqsort_block(&array[0], array_size, sizeof(int));
- deltaBlock += GetTickCount64() - startTick;
- }
- // // Report results
- // printf("%s subpass %d Native = %I64d\n", description, subpass, deltaNative);
- // printf("%s subpass %d Inline = %I64d\n", description, subpass, deltaInline);
- // printf("%s subpass %d Block = %I64d\n", description, subpass, deltaBlock);
- // Store best
- deltaNativeBest = min(deltaNativeBest, deltaNative);
- deltaInlineBest = min(deltaInlineBest, deltaInline);
- deltaBlockBest = min(deltaBlockBest, deltaBlock);
- // Store for average
- deltaNativeAvg += deltaNative;
- deltaInlineAvg += deltaInline;
- deltaBlockAvg += deltaBlock;
- }
- printf("%s Native best = %I64d,\taverage = %I64d)\n", description, deltaNativeBest, deltaNativeAvg / subpass);
- printf("%s Inline best = %I64d,\taverage = %I64d)\n", description, deltaInlineBest, deltaInlineAvg / subpass);
- printf("%s Block best = %I64d,\taverage = %I64d)\n\n", description, deltaBlockBest, deltaBlockAvg / subpass);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment