Skip to content

Commit

Permalink
Merge pull request #174 from Cyan4973/xxh3
Browse files Browse the repository at this point in the history
XXH3
  • Loading branch information
Cyan4973 authored Mar 15, 2019
2 parents 589e517 + 204255b commit 375d401
Show file tree
Hide file tree
Showing 7 changed files with 1,548 additions and 286 deletions.
66 changes: 57 additions & 9 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,58 @@
language: c
compiler: gcc
script: make -B test-all
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq gcc-arm-linux-gnueabi
- sudo apt-get install -qq clang
- sudo apt-get install -qq g++-multilib
- sudo apt-get install -qq gcc-multilib
- sudo apt-get install -qq cppcheck

matrix:
fast_finish: true
include:

- name: General linux tests (Xenial)
dist: xenial
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq clang
- sudo apt-get install -qq g++-multilib
- sudo apt-get install -qq gcc-multilib
- sudo apt-get install -qq cppcheck
script:
- make -B test-all

- name: Check results consistency on x64
script:
- CPPFLAGS=-DXXH_VECTOR=0 make check # Scalar code path
- make clean
- CPPFLAGS=-DXXH_VECTOR=1 make check # SSE2 code path
- make clean
- CPPFLAGS="-mavx2 -DXXH_VECTOR=2" make check # AVX2 code path

- name: ARM + aarch64 compilation and consistency checks
dist: xenial
install:
- sudo apt-get install -qq
qemu-system-arm
qemu-user-static
gcc-arm-linux-gnueabi
libc6-dev-armel-cross
gcc-aarch64-linux-gnu
libc6-dev-arm64-cross
script:
# arm (32-bit)
- CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=0 LDFLAGS=-static RUN_ENV=qemu-arm-static make check # Scalar code path
- make clean
# Note : the following test (ARM 32-bit + NEON) is disabled for the time being.
# I haven't yet found a way to make it link on Travis CI using gcc cross-compilation.
# NEON code path is fortunately validated through `aarch64` below.
# - CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=3 CFLAGS="-O3 -march=armv7-a -mfloat-abi=hard -mfpu=neon" LDFLAGS=-static RUN_ENV=qemu-arm-static make check # NEON code path
- make clean
# aarch64
- CC=aarch64-linux-gnu-gcc CPPFLAGS=-DXXH_VECTOR=0 LDFLAGS=-static RUN_ENV=qemu-aarch64-static make check # Scalar code path
- make clean
- CC=aarch64-linux-gnu-gcc CPPFLAGS=-DXXH_VECTOR=3 LDFLAGS=-static RUN_ENV=qemu-aarch64-static make check # NEON code path
- make clean

- name: PowerPC + PPC64 compilation and consistency checks
install:
- sudo apt-get install -qq qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu
script:
- CC=powerpc-linux-gnu-gcc RUN_ENV=qemu-ppc-static CPPFLAGS=-m32 LDFLAGS=-static make check # Only scalar code path available
- make clean
- CC=powerpc-linux-gnu-gcc RUN_ENV=qemu-ppc64-static CFLAGS="-O3 -m64" LDFLAGS="-static -m64" make check # Only scalar code path available
- make clean
31 changes: 16 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,8 @@ LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)

# SSE4 detection
HAVE_SSE4 := $(shell $(CC) -dM -E - < /dev/null | grep "SSE4" > /dev/null && echo 1 || echo 0)
ifeq ($(HAVE_SSE4), 1)
NOSSE4 := -mno-sse4
else
NOSSE4 :=
endif

CFLAGS ?= -O2 $(NOSSE4) # disables potential auto-vectorization
DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
CFLAGS ?= -O3
DEBUGFLAGS+=-Wall -Wextra -Wconversion -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
Expand Down Expand Up @@ -90,6 +82,10 @@ xxhsum32: CFLAGS += -m32
xxhsum32: xxhash.c xxhsum.c
$(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT)

xxhash.o: xxhash.h xxh3.h

xxhsum.o: xxhash.h

.PHONY: xxhsum_and_links
xxhsum_and_links: xxhsum xxh32sum xxh64sum

Expand Down Expand Up @@ -122,18 +118,22 @@ libxxhash : $(LIBXXH)
lib: libxxhash.a libxxhash


# =================================================
# tests
# =================================================

# make check can be run with cross-compiled binaries on emulated environments (qemu user mode)
# by setting $(RUN_ENV) to the target emulation environment
.PHONY: check
check: xxhsum
# stdin
./xxhsum < xxhash.c
$(RUN_ENV) ./xxhsum < xxhash.c
# multiple files
./xxhsum xxhash.* xxhsum.*
$(RUN_ENV) ./xxhsum xxhash.* xxhsum.*
# internal bench
./xxhsum -bi1
$(RUN_ENV) ./xxhsum -bi1
# file bench
./xxhsum -bi1 xxhash.c
$(RUN_ENV) ./xxhsum -bi1 xxhash.c

.PHONY: test-mem
test-mem: xxhsum
Expand Down Expand Up @@ -226,7 +226,8 @@ preview-man: clean-man man

test: all namespaceTest check test-xxhsum-c c90test

test-all: test test32 armtest clangtest cxxtest usan listL120 trailingWhitespace staticAnalyze cppcheck
test-all: CFLAGS += -Werror
test-all: test test32 clangtest cxxtest usan listL120 trailingWhitespace staticAnalyze

.PHONY: listL120
listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
Expand Down
44 changes: 33 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,6 @@ they modify xxhash behavior. They are all disabled by default.
- `XXH_CPU_LITTLE_ENDIAN` : by default, endianess is determined at compile time.
It's possible to skip auto-detection and force format to little-endian, by setting this macro to 1.
Setting it to 0 forces big-endian.
- `XXH_FORCE_NATIVE_FORMAT` : on big-endian systems : use native number representation.
Breaks consistency with little-endian results.
- `XXH_PRIVATE_API` : same impact as `XXH_INLINE_ALL`.
Name underlines that symbols will not be published on library public interface.
- `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`.
Expand All @@ -100,7 +98,7 @@ they modify xxhash behavior. They are all disabled by default.

Calling xxhash 64-bit variant from a C program :

```c
```C
#include "xxhash.h"

unsigned long long calcul_hash(const void* buffer, size_t length)
Expand All @@ -112,42 +110,66 @@ unsigned long long calcul_hash(const void* buffer, size_t length)
```
Using streaming variant is more involved, but makes it possible to provide data in multiple rounds :
```c
```C
#include "stdlib.h" /* abort() */
#include "xxhash.h"
unsigned long long calcul_hash_streaming(someCustomType handler)
{
/* create a hash state */
XXH64_state_t* const state = XXH64_createState();
if (state==NULL) abort();
size_t const bufferSize = SOME_VALUE;
size_t const bufferSize = SOME_SIZE;
void* const buffer = malloc(bufferSize);
if (buffer==NULL) abort();
/* Initialize state with selected seed */
unsigned long long const seed = 0; /* or any other value */
XXH_errorcode const resetResult = XXH64_reset(state, seed);
if (resetResult == XXH_ERROR) abort();
/* Feed the state with input data, any size, any number of times */
(...)
while ( /* any condition */ ) {
size_t const length = get_more_data(buffer, bufferSize, handler); /* undescribed */
XXH_errorcode const addResult = XXH64_update(state, buffer, length);
if (addResult == XXH_ERROR) abort();
size_t const length = get_more_data(buffer, bufferSize, handler);
XXH_errorcode const updateResult = XXH64_update(state, buffer, length);
if (updateResult == XXH_ERROR) abort();
(...)
}
(...)
unsigned long long const hash = XXH64_digest(state);
/* Get the hash */
XXH64_hash_t const hash = XXH64_digest(state);
/* State can then be re-used; in this example, it is simply freed */
free(buffer);
XXH64_freeState(state);
return hash;
return (unsigned long long)hash;
}
```

### New experimental hash algorithm

Starting with `v0.7.0`, the library includes a new algorithm, named `XXH3`,
able to generate 64 and 128-bits hashes.

The new algorithm is much faster than its predecessors,
for both long and small inputs,
as can be observed in following graphs :

![XXH3, bargraph](https://github.com/Cyan4973/xxHash/releases/download/graphs/H_bandwidth_bargraph.png)

![XXH3, latency, random size](https://github.com/Cyan4973/xxHash/releases/download/graphs/H_latency_randomS.png)

The algorithm is currently labelled experimental, as it may change in a future version.
To access it, one need to unlock its declaration using macro `XXH_STATIC_LINKING_ONLY`.
It can be used for ephemeral data, and for tests, but avoid storing long-term hash values yet.
`XXH3` will be stabilized in a future version.
This period will be used to collect users' feedback.


### Other programming languages

Expand Down
Loading

0 comments on commit 375d401

Please sign in to comment.