Skip to content

Commit

Permalink
Merge pull request #33 from Cyan4973/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Cyan4973 committed Aug 20, 2015
2 parents ee844cf + dcfccd2 commit 44a6297
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 112 deletions.
4 changes: 2 additions & 2 deletions cmake_unofficial/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cmake_policy(VERSION 2.6)

project(xxhash)

set(XXHASH_LIB_VERSION "0.41.0")
set(XXHASH_LIB_VERSION "0.42.0")
set(XXHASH_LIB_SOVERSION "0")

set(BUILD_STATIC_LIBS ON CACHE BOOL "Set to ON to build static libraries")
Expand All @@ -18,5 +18,5 @@ set_target_properties(xxhash PROPERTIES
VERSION "${XXHASH_LIB_VERSION}"
SOVERSION "${XXHASH_LIB_SOVERSION}")

INSTALL(FILES xxhash.h DESTINATION "include/xxHash")
INSTALL(FILES ../xxhash.h DESTINATION include)
INSTALL(TARGETS xxhash DESTINATION lib)
53 changes: 39 additions & 14 deletions xxhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,26 @@ You can contact the author at :
/**************************************
* Tuning parameters
**************************************/
/* XXH_FORCE_DIRECT_UNALIGNED_MEMORY_ACCESS
* Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64.
* For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses.
* If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually.
* If your CPU efficiently supports unaligned memory accesses and the compiler did not automatically detected it, you will witness large performance improvement.
* You can also enable this switch from compilation command line / Makefile.
/* XXH_FORCE_MEMORY_ACCESS
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
* The below switch allow to select different access method for improved performance.
* Method 0 (default) : use `memcpy()`. Safe and portable.
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
* Method 2 : direct access. This method is portable but violate C standard.
* It can generate buggy code on targets which generate assembly depending on alignment.
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
* See http://stackoverflow.com/a/32095106/646947 for details.
* Prefer these methods in priority order (0 > 1 > 2)
*/
#if !defined(XXH_FORCE_DIRECT_MEMORY_ACCESS) && ( defined(__ARM_FEATURE_UNALIGNED) )
# define XXH_FORCE_DIRECT_MEMORY_ACCESS 1
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define XXH_FORCE_MEMORY_ACCESS 2
# elif defined(__INTEL_COMPILER) || \
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif

/* XXH_ACCEPT_NULL_INPUT_POINTER :
Expand All @@ -57,18 +68,18 @@ You can contact the author at :
* By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
* Results are therefore identical for little-endian and big-endian CPU.
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
* Should endian-independance be of no importance for your application, you may set the #define below to 1.
* It will improve speed for Big-endian CPU.
* Should endian-independance be of no importance for your application, you may set the #define below to 1,
* to improve speed for Big-endian CPU.
* This option has no impact on Little_Endian CPU.
*/
#define XXH_FORCE_NATIVE_FORMAT 0

/* XXH_USELESS_ALIGN_BRANCH :
* This is a minor performance trick, only useful with lots of very small keys.
* It means : don't make a test between aligned/unaligned, because performance will be the same.
* It avoids one initial branch per hash.
* It saves one initial branch per hash.
*/
#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) || defined(XXH_FORCE_DIRECT_MEMORY_ACCESS)
#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
# define XXH_USELESS_ALIGN_BRANCH 1
#endif

Expand Down Expand Up @@ -125,13 +136,27 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
#endif


#if defined(XXH_FORCE_DIRECT_MEMORY_ACCESS)
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))

/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }

#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))

/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;

static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }

#else

/* portable and safe solution. Generally efficient.
* see : http://stackoverflow.com/a/32095106/646947
*/

static U32 XXH_read32(const void* memPtr)
{
U32 val;
Expand All @@ -146,7 +171,7 @@ static U64 XXH_read64(const void* memPtr)
return val;
}

#endif // defined
#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS


/******************************************
Expand Down
146 changes: 50 additions & 96 deletions xxhsum.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,13 @@ static unsigned BMK_isLittleEndian(void)
**************************************/
#define PROGRAM_NAME exename
#define PROGRAM_VERSION ""
static const int g_nbBits = (int)(sizeof(void*)*8);
static const char g_lename[] = "little endian";
static const char g_bename[] = "big endian";
#define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename)
#define COMPILED __DATE__
static const char author[] = "Yann Collet";
#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", PROGRAM_NAME, (int)(sizeof(void*)*8), PROGRAM_VERSION, author, COMPILED
#define WELCOME_MESSAGE "%s %s (%i-bits %s), by %s (%s) \n", PROGRAM_NAME, PROGRAM_VERSION, g_nbBits, ENDIAN_NAME, author, COMPILED

#define NBLOOPS 3 /* Default number of benchmark iterations */
#define TIMELOOP 2500 /* Minimum timing per iteration */
Expand Down Expand Up @@ -219,118 +223,64 @@ static U64 BMK_GetFileSize(const char* infilename)
return (U64)statbuf.st_size;
}

typedef void (*hashFunction)(const void* buffer, size_t bufferSize);

/* Note : buffer is supposed malloc'ed, hence aligned */
static void BMK_benchMem(const void* buffer, size_t bufferSize)
static void localXXH32(const void* buffer, size_t bufferSize) { XXH32(buffer, bufferSize, 0); }

static void localXXH64(const void* buffer, size_t bufferSize) { XXH64(buffer, bufferSize, 0); }

static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer, size_t bufferSize)
{
static const int nbh_perloop = 100;
int iterationNb;
double fastestH = 100000000.;

/* XXH32 bench */
DISPLAY("\r%79s\r", ""); /* Clean display line */
if (g_nbIterations<1) g_nbIterations=1;
for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++)
{
int iterationNb;
double fastestH = 100000000.;
U32 hashResult = 0;

DISPLAY("\r%79s\r", ""); /* Clean display line */
if (g_nbIterations<1) g_nbIterations=1;
for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++)
{
int nbHashes = 0;
int milliTime;
int nbHashes = 0;
int milliTime;

DISPLAY("%1i-%-17.17s : %10i ->\r", iterationNb, "XXH32", (int)bufferSize);
DISPLAY("%1i-%-17.17s : %10i ->\r", iterationNb, hName, (int)bufferSize);

/* Timing loop */
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
/* Timing loop */
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
int i;
for (i=0; i<nbh_perloop; i++)
{
int i;
for (i=0; i<nbh_perloop; i++)
{
hashResult = XXH32(buffer, bufferSize, 0);
}
nbHashes += nbh_perloop;
h(buffer, bufferSize);
}
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestH*nbHashes) fastestH = (double)milliTime/nbHashes;
DISPLAY("%1i-%-17.17s : %10i -> %7.1f MB/s\r", iterationNb, "XXH32", (int)bufferSize, (double)bufferSize / fastestH / 1000.);
nbHashes += nbh_perloop;
}
DISPLAY("%-19.19s : %10i -> %7.1f MB/s 0x%08X\n", "XXH32", (int)bufferSize, (double)bufferSize / fastestH / 1000., hashResult);
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestH*nbHashes) fastestH = (double)milliTime/nbHashes;
DISPLAY("%1i-%-17.17s : %10i -> %7.1f MB/s\r", iterationNb, hName, (int)bufferSize, (double)bufferSize / fastestH / 1000.);
}
DISPLAY("%-19.19s : %10i -> %7.1f MB/s \n", hName, (int)bufferSize, (double)bufferSize / fastestH / 1000.);
}


/* Note : buffer is supposed malloc'ed, hence aligned */
static void BMK_benchMem(const void* buffer, size_t bufferSize)
{
/* XXH32 bench */
BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize);

/* Bench XXH32 on Unaligned input */
if (bufferSize>1)
{
int iterationNb;
double fastestH = 100000000.;

DISPLAY("\r%79s\r", ""); /* Clean display line */
for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++)
{
int nbHashes = 0;
int milliTime;
const char* charPtr = (const char*)buffer;

DISPLAY("%1i-%-17.17s : %10i ->\r", iterationNb, "(unaligned)", (int)(bufferSize-1));
/* timing loop */
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
int i;
for (i=0; i<nbh_perloop; i++)
{
XXH32(charPtr+1, bufferSize-1, 0);
}
nbHashes += nbh_perloop;
}
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestH*nbHashes) fastestH = (double)milliTime/nbHashes;
DISPLAY("%1i-%-17.17s : %10i -> %7.1f MB/s\r", iterationNb, "XXH32 (unaligned)", (int)(bufferSize-1), (double)(bufferSize-1) / fastestH / 1000.);
}
DISPLAY("%-19.19s : %10i -> %7.1f MB/s \n", "XXH32 (unaligned)", (int)(bufferSize-1), (double)(bufferSize-1) / fastestH / 1000.);
}
BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize-1);

/* Bench XXH64 */
{
int iterationNb;
double fastestH = 100000000.;
unsigned long long h64 = 0;

DISPLAY("\r%79s\r", ""); /* Clean display line */
for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++)
{
int nbHashes = 0;
int milliTime;

DISPLAY("%1i-%-17.17s : %10i ->\r", iterationNb, "XXH64", (int)bufferSize);
BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize);

/* Timing loop */
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliStart() == milliTime);
milliTime = BMK_GetMilliStart();
while(BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
int i;
for (i=0; i<nbh_perloop; i++)
{
h64 = XXH64(buffer, bufferSize, 0);
}
nbHashes+=nbh_perloop;
}
milliTime = BMK_GetMilliSpan(milliTime);
if ((double)milliTime < fastestH*nbHashes) fastestH = (double)milliTime/nbHashes;
DISPLAY("%1i-%-17.17s : %10i -> %7.1f MB/s\r", iterationNb, "XXH64", (int)bufferSize, (double)bufferSize / fastestH / 1000.);
}
{
DISPLAY("%-19.19s : %10i -> %7.1f MB/s 0x", "XXH64", (int)bufferSize, (double)bufferSize / fastestH / 1000.);
DISPLAY("%08X%08X", (U32)(h64 >> 32), (U32)h64);
DISPLAY("\n");
}
}
/* Bench XXH64 on Unaligned input */
if (bufferSize>1)
BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+1, bufferSize-1);
}


Expand Down Expand Up @@ -703,6 +653,10 @@ int main(int argc, const char** argv)
{
switch(*argument)
{
/* Display version */
case 'V':
DISPLAY(WELCOME_MESSAGE); return 0;

/* Display help on usage */
case 'h':
return usage(exename);
Expand Down

0 comments on commit 44a6297

Please sign in to comment.