- added functions to get the index of the first or last bit set in
authorAndrei Pelinescu-Onciul <andrei@iptel.org>
Mon, 25 Jun 2007 17:20:34 +0000 (17:20 +0000)
committerAndrei Pelinescu-Onciul <andrei@iptel.org>
Mon, 25 Jun 2007 17:20:34 +0000 (17:20 +0000)
a 32 bit or 64 bit int: bit_scan_forward32(), bit_scan_forward64(),
 bit_scan_reverse32(), bit_scan_reverse64(), bit_scan_forward(long) and
  bit_scan_reverse(long). All of them are very fast, they use asm
   if available (for now only for __CPU_x86 and __CPU_x86_64), and fall back
  to a de Bruijn based method or binary search (depending on which method
  was faster in my measurements on a particular cpu).
- added test/profile.h - simple measure the cpu cycles between two calls
 functions (for now support for x86, x86_64 and sparc64)

bit_scan.c [new file with mode: 0644]
bit_scan.h [new file with mode: 0644]
test/bit_scan_test.c [new file with mode: 0644]
test/profile.h [new file with mode: 0644]

diff --git a/bit_scan.c b/bit_scan.c
new file mode 100644 (file)
index 0000000..9e51f5f
--- /dev/null
@@ -0,0 +1,36 @@
+/* 
+ * $Id$
+ * 
+ * Copyright (C) 2007 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ *  bit scan operations, see bit_scan.h.
+ */
+/* 
+ * History:
+ * --------
+ *  2007-06-23  created by andrei
+ */
+
+#include "bit_scan.h"
+
+int _debruijn_hash32[32]={0, 1, 2, 6, 3, 11, 7, 16, 4, 14, 12, 21, 8,
+       23, 17, 26, 31, 5, 10, 15, 13, 20, 22, 25, 30, 9, 19, 24, 29, 18, 28, 27 };
+
+int _debruijn_hash64[64]={0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9,
+       34, 20, 40, 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
+       63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56, 62, 11, 23,
+       32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58 };
+
diff --git a/bit_scan.h b/bit_scan.h
new file mode 100644 (file)
index 0000000..a9bdfa9
--- /dev/null
@@ -0,0 +1,411 @@
+/* 
+ * $Id$
+ * 
+ * Copyright (C) 2007 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ *  bit scan operations
+ *  int bit_scan_forward(unsigned long v)   - returns the index of the first
+ *                                          set bit (undefined value if v==0)
+ *  int bit_scan_forward32(unsigned int v)   - returns the index of the first
+ *                                          set bit (undefined value if v==0)
+ *  int bit_scan_forward64(long long v)      - returns the index of the first
+ *                                          set bit (undefined value if v==0)
+ *  int bit_scan_reverse(unsigned long v)   - returns the index of the last
+ *                                          set bit (undefined value if v==0)
+ *  int bit_scan_reverse32(unsigned int v)  - returns the index of the last
+ *                                          set bit (undefined value if v==0)
+ *  int bit_scan_reverse64(long long v)     - returns the index of the last
+ *                                          set bit (undefined value if v==0)
+ *
+ * Config defines:   CC_GCC_LIKE_ASM  - the compiler support gcc style
+ *                     inline asm,
+ *                  __CPU_x86, __CPU_x86_64,
+ *                  ULONG_MAX (limits.h)
+ */
+/* 
+ * History:
+ * --------
+ *  2007-06-23  created by andrei
+ */
+
+#ifndef _bit_scan_h
+#define _bit_scan_h
+
+#include <limits.h>
+
+
+#ifdef CC_GCC_LIKE_ASM
+#if defined __CPU_x86 || defined __CPU_x86_64
+#define BIT_SCAN_ASM
+#endif
+#endif
+
+
+/* set default bitscan versions, depending on the architecture
+ * In general the order is  asm, debruijn, br, slow for bit_scan_forward
+ *  and asm, br, slow, debruijn for bit_scan_reverse. */
+#ifdef BIT_SCAN_ASM
+/* have asm => use it */
+
+#define bit_scan_forward32(i)  bit_scan_forward_asm32(i)
+#define bit_scan_forward64(i)  bit_scan_forward_asm64(i)
+#define bit_scan_reverse32(i)  bit_scan_reverse_asm32(i)
+#define bit_scan_reverse64(i)  bit_scan_reverse_asm64(i)
+
+#elif defined __CPU_x86 || defined __CPU_x86_64
+/* no asm (e.g. no CC_GCC_LIKE_ASM) => debruijn for bit_scan_forward and
+ *  br for bit_scan_reverse */
+/* make sure debruijn an branch version are enabled */
+#ifndef BIT_SCAN_DEBRUIJN
+#define BIT_SCAN_DEBRUIJN
+#endif
+#ifndef BIT_SCAN_BRANCH
+#define BIT_SCAN_BRANCH
+#endif
+
+#define bit_scan_forward32(i)  bit_scan_forward_debruijn32(i)
+#define bit_scan_forward64(i)  bit_scan_forward_debruijn64(i)
+#define bit_scan_reverse32(i)  bit_scan_reverse_br32(i)
+#define bit_scan_reverse64(i)  bit_scan_reverse_br64(i)
+
+#elif defined __CPU_sparc64
+/* no asm yet => use branch for everything in 64 bit mode
+ *               and debruijn + branch in 32 bit mode
+ *  (in 64bit mode the branch method is slightly faster then debruijn,
+ *   however note that in 32 bit mode the roles are reversed for _forward)*/
+#ifndef BIT_SCAN_BRANCH
+#define BIT_SCAN_BRANCH
+#endif
+
+#define bit_scan_reverse32(i)  bit_scan_reverse_br32(i)
+#define bit_scan_reverse64(i)  bit_scan_reverse_br64(i)
+#ifdef LP64
+#define bit_scan_forward32(i)  bit_scan_forward_br32(i)
+#define bit_scan_forward64(i)  bit_scan_forward_br64(i)
+#else /* LP64 */
+
+#ifndef BIT_SCAN_DEBRUIJN
+#define BIT_SCAN_DEBRUIJN
+#endif
+#define bit_scan_forward32(i)  bit_scan_forward_debruijn32(i)
+#define bit_scan_forward64(i)  bit_scan_forward_debruijn64(i)
+#endif /* LP64 */
+
+#else /* __CPU_XXX */
+/* default - like x86 no asm */
+/* make sure debruijn an branch version are enabled */
+#ifndef BIT_SCAN_DEBRUIJN
+#define BIT_SCAN_DEBRUIJN
+#endif
+#ifndef BIT_SCAN_BRANCH
+#define BIT_SCAN_BRANCH
+#endif
+
+#define bit_scan_forward32(i)  bit_scan_forward_debruijn32(i)
+#define bit_scan_forward64(i)  bit_scan_forward_debruijn64(i)
+#define bit_scan_reverse32(i)  bit_scan_reverse_br32(i)
+#define bit_scan_reverse64(i)  bit_scan_reverse_br64(i)
+
+#endif /* __CPU_XXX */
+
+
+/* try to use the right version for bit_scan_forward(unisgned long l)
+ */
+#if (defined (ULONG_MAX) && ULONG_MAX > 4294967295) || defined LP64
+/* long is 64 bits */
+#define bit_scan_forward(l)    bit_scan_forward64((unsigned long long)(l))
+#define bit_scan_reverse(l)    bit_scan_reverse64((unsigned long long)(l))
+
+#else
+/* long is 32 bits */
+#define bit_scan_forward(l)    bit_scan_forward32((l))
+#define bit_scan_reverse(l)    bit_scan_reverse32((l))
+#endif
+
+
+
+
+#ifdef BIT_SCAN_DEBRUIJN
+
+/* use a de Bruijn sequence to get the index of the set bit for a number
+ *  of the form 2^k (DEBRUIJN_HASH32() and DEBRUIJN_HASH64()).
+ *  bit_scan_forward & bit_scan_reverse would need first to convert
+ *  the argument to 2^k (where k is the first set bit or last set bit index)-
+ *  For bit_scan_forward this can be done very fast using x & (-x).
+ *  For more info about this method see:
+ *  http://citeseer.ist.psu.edu/leiserson98using.html
+ *  ("Using de Bruijn Sequences to Index a 1 in a Computer Word")
+ */
+
+extern int _debruijn_hash32[32]; /* see bit_scan.c */
+extern int _debruijn_hash64[64]; /* see bit_scan.c */
+
+#define DEBRUIJN_CT32  0x04653ADFU
+#define DEBRUIJN_CT64  0x0218A392CD3D5DBFULL 
+
+#define DEBRUIJN_HASH32(x)\
+       (((x)*DEBRUIJN_CT32)>>(sizeof(x)*8-5))
+
+#define DEBRUIJN_HASH64(x)\
+       (((x)*DEBRUIJN_CT64)>>(sizeof(x)*8-6))
+
+#define bit_scan_forward_debruijn32(x) \
+       ( _debruijn_hash32[DEBRUIJN_HASH32((x) & (-(x)))])
+
+#define bit_scan_forward_debruijn64(x) \
+       ( _debruijn_hash64[DEBRUIJN_HASH64((x) & (-(x)))])
+
+
+static inline int bit_scan_reverse_debruijn32(unsigned int v)
+{
+       unsigned int last;
+       
+       do{
+               last=v;
+               v=v&(v-1);
+       }while(v); /* => last is 2^k */
+       return _debruijn_hash32[DEBRUIJN_HASH32(last)];
+}
+
+
+static inline int bit_scan_reverse_debruijn64(unsigned long long v)
+{
+       unsigned long long last;
+       
+       do{
+               last=v;
+               v=v&(v-1);
+       }while(v); /* => last is 2^k */
+       return _debruijn_hash64[DEBRUIJN_HASH64(last)];
+}
+
+
+#endif /* BIT_SCAN_DEBRUIJN */
+
+#ifdef BIT_SCAN_SLOW
+/* only for reference purposes (testing the other versions against it) */
+
+static inline int bit_scan_forward_slow32(unsigned int v)
+{
+       int r;
+       for(r=0; r<(sizeof(v)*8); r++, v>>=1)
+               if (v&1) return r;
+       return 0;
+}
+
+
+static inline int bit_scan_reverse_slow32(unsigned int v)
+{
+       int r;
+       for(r=sizeof(v)*8-1; r>0; r--, v<<=1)
+               if (v& (1UL<<(sizeof(v)*8-1))) return r;
+       return 0;
+}
+
+
+static inline int bit_scan_forward_slow64(unsigned long long v)
+{
+       int r;
+       for(r=0; r<(sizeof(v)*8); r++, v>>=1)
+               if (v&1ULL) return r;
+       return 0;
+}
+
+
+static inline int bit_scan_reverse_slow64(unsigned long long v)
+{
+       int r;
+       for(r=sizeof(v)*8-1; r>0; r--, v<<=1)
+               if (v& (1ULL<<(sizeof(v)*8-1))) return r;
+       return 0;
+}
+
+
+#endif /* BIT_SCAN_SLOW */
+
+
+#ifdef BIT_SCAN_BRANCH
+
+static inline int bit_scan_forward_br32(unsigned int v)
+{
+       int b;
+       
+       b=0;
+       if (v&0x01)
+               return 0;
+       if (!(v & 0xffff)){
+               b+=16;
+               v>>=16;
+       }
+       if (!(v&0xff)){
+               b+=8;
+               v>>=8;
+       }
+       if (!(v&0x0f)){
+               b+=4;
+               v>>=4;
+       }
+       if (!(v&0x03)){
+               b+=2;
+               v>>=2;
+       }
+       b+= !(v&0x01);
+       return b;
+}
+
+
+static inline int bit_scan_reverse_br32(unsigned int v)
+{
+       int b;
+       
+       b=0;
+       if (v & 0xffff0000){
+               b+=16;
+               v>>=16;
+       }
+       if (v&0xff00){
+               b+=8;
+               v>>=8;
+       }
+       if (v&0xf0){
+               b+=4;
+               v>>=4;
+       }
+       if (v&0x0c){
+               b+=2;
+               v>>=2;
+       }
+       b+= !!(v&0x02);
+       return b;
+}
+
+
+static inline int bit_scan_forward_br64(unsigned long long v)
+{
+       int b;
+       
+       b=0;
+       if (v&0x01ULL)
+               return 0;
+       if (!(v & 0xffffffffULL)){
+               b+=32;
+               v>>=32;
+       }
+       if (!(v & 0xffffULL)){
+               b+=16;
+               v>>=16;
+       }
+       if (!(v&0xffULL)){
+               b+=8;
+               v>>=8;
+       }
+       if (!(v&0x0fULL)){
+               b+=4;
+               v>>=4;
+       }
+       if (!(v&0x03ULL)){
+               b+=2;
+               v>>=2;
+       }
+       b+= !(v&0x01ULL);
+       return b;
+}
+
+
+static inline int bit_scan_reverse_br64(unsigned long long v)
+{
+       int b;
+       
+       b=0;
+       if (v & 0xffffffff00000000ULL){
+               b+=32;
+               v>>=32;
+       }
+       if (v & 0xffff0000ULL){
+               b+=16;
+               v>>=16;
+       }
+       if (v&0xff00ULL){
+               b+=8;
+               v>>=8;
+       }
+       if (v&0xf0ULL){
+               b+=4;
+               v>>=4;
+       }
+       if (v&0x0cULL){
+               b+=2;
+               v>>=2;
+       }
+       b+= !!(v&0x02ULL);
+       return b;
+}
+#endif  /* BIT_SCAN_BRANCH */
+
+
+
+#ifdef BIT_SCAN_ASM
+#if defined __CPU_x86 || defined __CPU_x86_64
+#define HAS_BIT_SCAN_ASM
+
+static inline int bit_scan_forward_asm32(unsigned int v)
+{
+       int r;
+       asm volatile(" bsfl %1, %0": "=r"(r): "rm"(v) );
+       return r;
+}
+
+static inline int bit_scan_reverse_asm32(unsigned int v)
+{
+       int r;
+       asm volatile(" bsrl %1, %0": "=r"(r): "rm"(v) );
+       return r;
+}
+
+#ifdef __CPU_x86_64
+static inline int bit_scan_forward_asm64(unsigned long long v)
+{
+       long r;
+       asm volatile(" bsfq %1, %0": "=r"(r): "rm"(v) );
+       return r;
+}
+
+static inline int bit_scan_reverse_asm64(unsigned long long v)
+{
+       long r;
+       asm volatile(" bsrq %1, %0": "=r"(r): "rm"(v) );
+       return r;
+}
+#else
+static inline int bit_scan_forward_asm64(unsigned long long v)
+{
+       if ((unsigned int)v)
+               return bit_scan_forward_asm32((unsigned int)v);
+       return 32+bit_scan_forward_asm32(*(((unsigned int*)(void*)&v)+1));
+}
+
+static inline int bit_scan_reverse_asm64(unsigned long long v)
+{
+       if (v & 0xffffffff00000000ULL)
+               return 32+bit_scan_reverse_asm32(*(((unsigned int*)(void*)&v)+1));
+       return bit_scan_reverse_asm32((unsigned int)v);
+}
+#endif /* __CPU_x86_64 */
+
+#endif /* __CPU_x86 || __CPU_x86_64 */
+#endif /* BIT_SCAN_ASM */
+
+#endif
diff --git a/test/bit_scan_test.c b/test/bit_scan_test.c
new file mode 100644 (file)
index 0000000..206a722
--- /dev/null
@@ -0,0 +1,207 @@
+/* $Id$
+ * 
+ * test bit_scan operations from bit_scan.h
+ *  (both for correctness  and speed)
+ * 
+ * Copyright (C) 2007 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* 
+ * Example gcc command line:
+ *  gcc -O9 -Wall -DCC_GCC_LIKE_ASM  -D__CPU_x86 bit_scan_test.c ../bit_scan.c
+ *      -o bit_scan_test
+ *
+ * History:
+ * --------
+ *  2007-06-23  created by andrei
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+
+#define BIT_SCAN_DEBRUIJN
+#define BIT_SCAN_BRANCH
+#define BIT_SCAN_SLOW
+
+#include "../bit_scan.h"
+#ifdef NO_PROFILE
+#define profile_init(x,y)  do{}while(0)
+#define profile_start(x)  do{}while(0)
+#define profile_end(x)  do{}while(0)
+#define PROFILE_PRINT(x) do{}while(0)
+#else
+#include "profile.h"
+#endif
+
+#define CHECK(txt, v1, val, f, pd) \
+       do{ \
+               unsigned long long ret; \
+               profile_start(pd); \
+               ret=(unsigned long long)f(val); \
+               profile_end(pd); \
+               if ((unsigned long long)v1!=ret){ \
+                       fprintf(stderr, "ERROR:" #f ": %s, expected %llx (%llx), got"\
+                                       " %llx\n", \
+                                       (txt), (unsigned long long)v1, \
+                                       (unsigned long long)val, ret); \
+                       exit(-1); \
+               } \
+       }while(0)
+
+#ifndef PROFILE_PRINT
+#define PROFILE_PRINT(pd) \
+       do{ \
+               printf("profile: %s (%ld/%ld) total %llu max %llu average %llu\n", \
+                               (pd)->name,  (pd)->entries, (pd)->exits, \
+                               (pd)->total_cycles,  (pd)->max_cycles, \
+                               (pd)->entries? \
+                               (pd)->total_cycles/(unsigned long long)(pd)->entries:0ULL ); \
+       }while(0)
+#endif
+
+int main(int argc, char** argv)
+{
+       int r;
+       unsigned int v;
+       unsigned long long ll;
+       int i;
+#ifndef NO_PROFILE
+       struct profile_data pdf1, pdf2, pdf4, pdf5, pdf6, pdf8;
+       struct profile_data pdl1, pdl2, pdl4, pdl5, pdl6, pdl8;
+#ifdef HAS_BIT_SCAN_ASM
+       struct profile_data pdf3, pdf7, pdl3, pdl7;
+#endif
+       struct profile_data pdf_32, pdf_64, pdl_32, pdl_64;
+       struct profile_data pdf_long, pdl_long;
+#endif /* NO_PROFILE */
+       
+       profile_init(&pdf1, "first_debruijn32");
+       profile_init(&pdf2, "first_slow32");
+#ifdef HAS_BIT_SCAN_ASM
+       profile_init(&pdf3, "first_asm32");
+#endif
+       profile_init(&pdf4, "first_br32");
+       profile_init(&pdf5, "first_debruijn64");
+       profile_init(&pdf6, "first_slow64");
+#ifdef HAS_BIT_SCAN_ASM
+       profile_init(&pdf7, "first_asm64");
+#endif
+       profile_init(&pdf8, "first_br64");
+       profile_init(&pdl1, "last_debruijn32");
+       profile_init(&pdl2, "last_slow32");
+#ifdef HAS_BIT_SCAN_ASM
+       profile_init(&pdl3, "last_asm32");
+#endif
+       profile_init(&pdl4, "last_br32");
+       profile_init(&pdl5, "last_debruijn64");
+       profile_init(&pdl6, "last_slow64");
+#ifdef HAS_BIT_SCAN_ASM
+       profile_init(&pdl7, "last_asm64");
+#endif
+       profile_init(&pdl8, "last_br64");
+       
+       profile_init(&pdf_32, "scan_forward32");
+       profile_init(&pdf_64, "scan_forward64");
+       profile_init(&pdl_32, "scan_reverse32");
+       profile_init(&pdl_64, "scan_reverse64");
+       profile_init(&pdf_long, "scan_forward_l");
+       profile_init(&pdl_long, "scan_reverse_l");
+
+
+       for (i=0; i<100; i++){
+       for (r=0; r<32; r++){
+               v=(1U<<r);
+               CHECK("first debruijn 32bit", r, v, bit_scan_forward_debruijn32, &pdf1);
+               CHECK("first slow 32bit", r, v, bit_scan_forward_slow32, &pdf2);
+#ifdef HAS_BIT_SCAN_ASM
+               CHECK("first asm 32bit", r, v, bit_scan_forward_asm32, &pdf3);
+#endif
+               CHECK("first br 32bit", r, v, bit_scan_forward_br32, &pdf4);
+               CHECK("scan_forward32", r, v, bit_scan_forward32, &pdf_32);
+               if (sizeof(long)<=4){
+                       CHECK("scan_forward_l", r, v, bit_scan_forward, &pdf_long);
+               }
+               v+=(v-1);
+               CHECK("last debruijn 32bit", r, v, bit_scan_reverse_debruijn32, &pdl1);
+               CHECK("last slow 32bit", r, v, bit_scan_reverse_slow32, &pdl2);
+#ifdef HAS_BIT_SCAN_ASM
+               CHECK("last asm 32bit", r, v, bit_scan_reverse_asm32, &pdl3);
+#endif
+               CHECK("last br 32bit", r, v, bit_scan_reverse_br32, &pdl4);
+               CHECK("scan_reverse32", r, v, bit_scan_reverse32, &pdl_32);
+               if (sizeof(long)<=4){
+                       CHECK("scan_reverse_l", r, v, bit_scan_reverse, &pdl_long);
+               }
+       }
+       for (r=0; r<64; r++){
+               ll=(1ULL<<r);
+               CHECK("first debruijn 64bit", r, ll, bit_scan_forward_debruijn64, &pdf5);
+               CHECK("first slow 64bit", r, ll, bit_scan_forward_slow64, &pdf6);
+#ifdef HAS_BIT_SCAN_ASM
+               CHECK("first asm 64bit", r, ll, bit_scan_forward_asm64, &pdf7);
+#endif
+               CHECK("first br 64bit", r, ll, bit_scan_forward_br64, &pdf8);
+               CHECK("scan_forward64", r, ll, bit_scan_forward64, &pdf_64);
+               if (sizeof(long)>4){
+                       CHECK("scan_forward_l", r, ll, bit_scan_forward, &pdf_long);
+               }
+               ll+=ll-1;
+               CHECK("last debruijn 64bit", r, ll, bit_scan_reverse_debruijn64, &pdl5);
+               CHECK("last slow 64bit", r, ll, bit_scan_reverse_slow64, &pdl6);
+#ifdef HAS_BIT_SCAN_ASM
+               CHECK("last asm 64bit", r, ll, bit_scan_reverse_asm64, &pdl7);
+#endif
+               CHECK("last br 64bit", r, ll, bit_scan_reverse_br64, &pdl8);
+               CHECK("scan_reverse64", r, ll, bit_scan_reverse64, &pdl_64);
+               if (sizeof(long)>4){
+                       CHECK("scan_reverse_l", r, ll, bit_scan_reverse, &pdl_long);
+               }
+       }
+       }
+
+       PROFILE_PRINT(&pdf1);
+       PROFILE_PRINT(&pdf2);
+#ifdef HAS_BIT_SCAN_ASM
+       PROFILE_PRINT(&pdf3);
+#endif
+       PROFILE_PRINT(&pdf4);
+       PROFILE_PRINT(&pdl1);
+       PROFILE_PRINT(&pdl2);
+#ifdef HAS_BIT_SCAN_ASM
+       PROFILE_PRINT(&pdl3);
+#endif
+       PROFILE_PRINT(&pdl4);
+       PROFILE_PRINT(&pdf5);
+       PROFILE_PRINT(&pdf6);
+#ifdef HAS_BIT_SCAN_ASM
+       PROFILE_PRINT(&pdf7);
+#endif
+       PROFILE_PRINT(&pdf8);
+       PROFILE_PRINT(&pdl5);
+       PROFILE_PRINT(&pdl6);
+#ifdef HAS_BIT_SCAN_ASM
+       PROFILE_PRINT(&pdl7);
+#endif
+       PROFILE_PRINT(&pdl8);
+       
+       PROFILE_PRINT(&pdf_32);
+       PROFILE_PRINT(&pdf_64);
+       PROFILE_PRINT(&pdf_long);
+       PROFILE_PRINT(&pdl_32);
+       PROFILE_PRINT(&pdl_64);
+       PROFILE_PRINT(&pdl_long);
+       return 0;
+}
diff --git a/test/profile.h b/test/profile.h
new file mode 100644 (file)
index 0000000..e63c9ad
--- /dev/null
@@ -0,0 +1,188 @@
+/*
+ * $Id$
+ * 
+ * Copyright (C) 2007 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Basic profile using the cpu cycle counter
+ *
+ * cycles_t - an unsigned interger type used for storing the cpu cycles
+ *            (unsigned long long for now)
+ *
+ * cycles_t get_cpu_cycles() - returns the current cpu cycles counter
+ *
+ * void     get_cpu_cycles_uint(unsigned* u1, unsigned* u2) 
+ *                            - sets u1 and u2 to the least significant, 
+ *                              respective most significant 32 bit word of
+ *                              the cpu cycles counter
+ * struct profile_data;            - holds all the profile results
+ *                               (last call cycles, max cycles, total cycles,
+ *                                no. of profile_start calls, no. of 
+ *                                profile_end calls, name use in profile_init)
+ * void     profile_init(pd, name) - intialize a profile structure
+ * void     profile_start(pd)      - starts profiling (call before calling
+ *                               the target function)
+ * void     profile_end(pd)        - stops profiling (call after the target
+ *                               function returns)
+ * 
+ */
+ /*
+ * Config defines:   CC_GCC_LIKE_ASM  - the compiler support gcc style
+ *                     inline asm,
+ *                  __CPU_x86, __CPU_x86_64, __CPU_sparc64
+ */
+/* 
+ * History:
+ * --------
+ *  2007-06-23  created by andrei
+ */
+
+
+
+
+#ifndef _profile_h
+#define _profile_h
+
+#include <string.h>
+
+/*
+ * cycles_t - an unsigned interger type used for storing the cpu cycles
+ *            (unsigned long long for now)
+ *
+ * cycles_t get_cpu_cycles() - returns the current cpu cycles counter
+ * void     get_cpu_cycles_uint(unsigned* u1, unsigned* u2) 
+ *                            - sets u1 and u2 to the least significant, 
+ *                              respective most significant 32 bit word of
+ *                              the cpu cycles counter
+ */
+
+#ifdef __CPU_x86
+typedef unsigned long long cycles_t;
+
+inline static cycles_t get_cpu_cycles()
+{
+       cycles_t r;
+       asm volatile( "rdtsc \n\t" : "=A"(r));
+       return r;
+}
+
+#define get_cpu_cycles_uint(u1, u2) \
+       do{ \
+               /* result in edx:eax */ \
+               asm volatile( "rdtsc \n\t" : "=a"(*(u1)), "=d"(*(u2))); \
+       }while(0)
+
+#elif defined __CPU_x86_64
+typedef unsigned long long cycles_t;
+
+inline static cycles_t get_cpu_cycles()
+{
+       unsigned int u1, u2;
+       asm volatile( "rdtsc \n\t" : "=a"(u1), "=d"(u2));
+       return ((cycles_t)u2<<32ULL)|u1;
+}
+
+
+#define get_cpu_cycles_uint(u1, u2) \
+       do{ \
+               /* result in edx:eax */ \
+               asm volatile( "rdtsc \n\t" : "=a"(*(u1)), "=d"(*(u2))); \
+       }while(0)
+
+#elif defined __CPU_sparc64
+
+typedef unsigned long long cycles_t;
+
+inline static cycles_t get_cpu_cycles()
+{
+#if ! defined(_LP64)
+#warning "ilp32 mode "
+       struct uint_64{
+               unsigned int u2;
+               unsigned int u1;
+       };
+       union{
+               cycles_t c;
+               struct uint_64 u;
+       }r;
+       
+       asm volatile("rd %%tick, %0 \n\t"
+                                "srlx %0, 32, %1 \n\t"
+                               : "=r"(r.u.u1), "=r"(r.u.u2));
+       return r.c;
+#else
+       cycles_t r;
+       /* normal 64 bit mode (e.g. gcc -m64) */
+       asm volatile("rd %%tick, %0" : "=r"(r));
+       return r;
+#endif
+}
+inline static void  get_cpu_cycles_uint(unsigned int* u1, unsigned int* u2)
+{
+       cycles_t r;
+       asm volatile("rd %%tick, %0" : "=r"(r));
+       *u1=(unsigned int)r;
+       *u2=(unsigned int)(r>>32);
+}
+
+#else /* __CPU_xxx */
+#error "no get_cycles support for this CPU"
+#endif /* __CPU_xxx */
+
+
+union profile_cycles{
+       cycles_t c;
+       struct{
+               unsigned int u1;
+               unsigned int u2;
+       }uint;
+};
+
+struct profile_data{
+       cycles_t cycles;  /* last call */
+       cycles_t total_cycles;
+       cycles_t max_cycles;
+       unsigned long entries; /* no. profile_start calls */
+       unsigned long exits;   /* no. profile_end calls */
+       char * name;
+       
+       /* private stuff */
+       union profile_cycles init_rdtsc;
+};
+
+inline static void profile_init(struct profile_data* pd, char *name)
+{
+       memset(pd, 0, sizeof(*pd));
+       pd->name=name;
+}
+
+
+inline static void profile_start(struct profile_data* pd)
+{
+       pd->entries++;
+       pd->init_rdtsc.c=get_cpu_cycles();
+}
+
+
+inline static void profile_end(struct profile_data* pd)
+{
+       pd->cycles=get_cpu_cycles()-pd->init_rdtsc.c;
+       if (pd->max_cycles<pd->cycles) pd->max_cycles=pd->cycles;
+       pd->total_cycles+=pd->cycles;
+       pd->exits++;
+}
+
+
+#endif