diff --git a/src/Android.mk b/src/Android.mk index cd6be9d..4a54afd 100644 --- a/src/Android.mk +++ b/src/Android.mk @@ -2,17 +2,7 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE := aEdax # should be renamed to lib..aEdax..so afterwords LOCAL_CFLAGS += -DUNICODE -<<<<<<< HEAD -<<<<<<< HEAD LOCAL_SRC_FILES := all.c board_sse.c.neon eval_sse.c.neon flip_neon_bitscan.c.neon android/cpu-features.c LOCAL_ARM_NEON := false -======= -LOCAL_SRC_FILES := all.c -# LOCAL_ARM_NEON := true ->>>>>>> f2da03e (Refine arm builds adding neon support.) -======= -LOCAL_SRC_FILES := all.c board_sse.c.neon eval_sse.c.neon flip_neon_bitscan.c.neon android/cpu-features.c -LOCAL_ARM_NEON := false ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) # cmd-strip := include $(BUILD_EXECUTABLE) diff --git a/src/Makefile b/src/Makefile index cdedf8f..4f6f51c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,27 +1,7 @@ # # makefile to Compile Edax # -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD # Copyright 1998 - 2024 -======= -# Copyright 1998 - 2018 ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= -# Copyright 1998 - 2022 ->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval) -======= -# Copyright 1998 - 2022 ->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval) -======= -# Copyright 1998 - 2023 ->>>>>>> 4087529 (Revise board0 usage; fix unused flips) -======= -# Copyright 1998 - 2024 ->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build) # Richard Delorme # Version 4.5 # @@ -70,11 +50,6 @@ ifeq ($(COMP),gcc) endif ifeq ($(ARCH),x64-modern) -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT endif ifeq ($(ARCH),x64-avx512) @@ -85,30 +60,6 @@ ifeq ($(COMP),gcc) endif ifeq ($(ARCH),x64-k10) CFLAGS += -m64 -march=amdfam10 -DUSE_GAS_X64 -DPOPCOUNT -DMOVE_GENERATOR=MOVE_GENERATOR_BITSCAN -======= - CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= - CFLAGS += -m64 -march=core-avx2 -mno-bmi2 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8) -======= - CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval) -======= - CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval) -======= - CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT ->>>>>>> 85955bf (lazy high cut version of board_score_sse_1) - endif - ifeq ($(ARCH),x64-avx512) - CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT - endif - ifeq ($(ARCH),x64-popcnt) - CFLAGS += -m64 -mpopcnt -mtune=generic -DUSE_GAS_X64 -DPOPCOUNT - endif - ifeq ($(ARCH),x64-k10) - CFLAGS += -m64 -march=amdfam10 -DUSE_GAS_X64 -DPOPCOUNT -DMOVE_GENERATOR=MOVE_GENERATOR_BITSCAN endif ifeq ($(ARCH),x32-modern) CFLAGS += -mx32 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT @@ -119,39 +70,8 @@ ifeq ($(COMP),gcc) ifeq ($(ARCH),x32) CFLAGS += -mx32 -mtune=generic -DUSE_GAS_X64 endif -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= - ifeq ($(ARCH),x86-modern) - CFLAGS += -m32 -march=core-avx2 -DUSE_GAS_X86 -DUSE_GAS_MMX -DhasSSE2 -DPOPCOUNT -DLASTFLIP_HIGHCUT - ifeq ($(BUILD),optimize) - CFLAGS += -fomit-frame-pointer - endif ->>>>>>> 0b8fa13 (More HBOARD hash functions) -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) ifeq ($(ARCH),x86-sse) CFLAGS += -m32 -march=pentium-m -mfpmath=sse -DUSE_GAS_X86 -DUSE_GAS_MMX -DhasSSE2 -======= - ifeq ($(ARCH),x86-modern) - CFLAGS += -m32 -march=native -mfpmath=sse -DUSE_GAS_X86 -DUSE_GAS_MMX -DhasSSE2 -DPOPCOUNT -======= - ifeq ($(ARCH),x86-sse) - CFLAGS += -m32 -march=pentium-m -mfpmath=sse -DUSE_GAS_X86 -DUSE_GAS_MMX -DhasSSE2 - ifeq ($(BUILD),optimize) - CFLAGS += -fomit-frame-pointer - endif ->>>>>>> 46e3559 (fix gcc x86 build; add x86-sse build to makefile) - endif - ifeq ($(ARCH),x86) -<<<<<<< HEAD - CFLAGS += -m32 -mtune=generic -DUSE_GAS_X86 -DUSE_GAS_MMX ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= - CFLAGS += -m32 -march=i386 -mtune=generic -DUSE_GAS_X86 -DUSE_GAS_MMX ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) ifeq ($(BUILD),optimize) CFLAGS += -fomit-frame-pointer endif @@ -167,24 +87,14 @@ ifeq ($(COMP),gcc) CFLAGS += -fomit-frame-pointer endif endif -<<<<<<< HEAD -<<<<<<< HEAD ifeq ($(ARCH),armv7) CFLAGS += -march=armv7-a ifeq ($(BUILD),optimize) -<<<<<<< HEAD -<<<<<<< HEAD -======= - ifeq ($(ARCH),armv7) - CFLAGS += -march=armv7-a - ifeq ($(BUILD),optimize) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) CFLAGS += -fomit-frame-pointer endif endif ifeq ($(ARCH),arm-neon) CFLAGS += -march=armv7-a+simd -mfloat-abi=softfp -<<<<<<< HEAD ifeq ($(BUILD),optimize) CFLAGS += -fomit-frame-pointer endif @@ -193,23 +103,6 @@ ifeq ($(COMP),gcc) CFLAGS += -march=armv8.2-a+sve ifeq ($(BUILD),optimize) CFLAGS += -fomit-frame-pointer -<<<<<<< HEAD -======= - CFLAGS += -fomit-frame-pointer -march=armv7-a -mfpu=neon ->>>>>>> f2da03e (Refine arm builds adding neon support.) -======= - CFLAGS += -fomit-frame-pointer -march=armv7-a -mfloat-abi=softfp -mfpu=neon -DhasNeon ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) -======= - ifeq ($(ARCH),ARMv7) - CFLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon -DhasNeon -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) - ifeq ($(BUILD),optimize) - CFLAGS += -fomit-frame-pointer ->>>>>>> 46e3559 (fix gcc x86 build; add x86-sse build to makefile) -======= ->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build) endif endif @@ -221,30 +114,13 @@ ifeq ($(COMP),gcc) ifeq ($(ARCH),arm) CFLAGS += -march=armv8.3-a endif -<<<<<<< HEAD - endif - ifeq ($(OS),android) - CFLAGS += -DANDROID=1 -======= ->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2) endif ifeq ($(OS),android) CFLAGS += -DANDROID=1 endif ifeq ($(OS),windows) CFLAGS += -D__USE_MINGW_ANSI_STDIO -DWINVER=0x0501 -<<<<<<< HEAD -<<<<<<< HEAD ifneq (,$(findstring x86,$(ARCH))) -======= - ifeq ($(ARCH),x86-modern) - CFLAGS += -DUSE_PTHREAD - endif - ifeq ($(ARCH),x86) ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= - ifneq (,$(findstring x86,$(ARCH))) ->>>>>>> 11e7bb7 (filp_sse_bitscan.c (experimental) added; Makefile modified.) CFLAGS += -DUSE_PTHREAD endif endif @@ -345,23 +221,7 @@ endif #icc ifeq ($(COMP),icc) -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -D_GNU_SOURCE=1 -DUNICODE -Qoption,cpp,--unicode_source_kind,"UTF-8" -======= - CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -wd913 -D_GNU_SOURCE=1 -DUNICODE ->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling) -======= - CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -wd913 -DUNICODE ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= - CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -wd913 -D_GNU_SOURCE=1 -DUNICODE ->>>>>>> 6506166 (More SSE optimizations) -======= CFLAGS = -std=c99 -Wall -Wcheck -wd2259 -D_GNU_SOURCE=1 -DUNICODE -Qoption,cpp,--unicode_source_kind,"UTF-8" ->>>>>>> 0b9d604 (Add more AVX512 builds; fix modern compiler's warnings) PGO_GEN = -prof_gen PGO_USE = -prof_use -wd11505 @@ -375,15 +235,7 @@ ifeq ($(COMP),icc) CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT endif ifeq ($(ARCH),x64-avx512) -<<<<<<< HEAD -<<<<<<< HEAD CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT -======= - CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> 0b9d604 (Add more AVX512 builds; fix modern compiler's warnings) -======= - CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT ->>>>>>> 85955bf (lazy high cut version of board_score_sse_1) endif ifeq ($(ARCH),x64) CFLAGS += -m64 -DUSE_GAS_X64 @@ -419,68 +271,20 @@ endif #clang ifeq ($(COMP),clang) -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD CFLAGS = -std=c99 -pedantic -W -Wall -D_GNU_SOURCE=1 -DUNICODE -======= - CFLAGS = -std=c99 -pedantic -W -Wall -D_GNU_SOURCE=1 -DUNICODE -Wno-invalid-source-encoding ->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling) -======= - CFLAGS = -std=c99 -pedantic -W -Wall -DUNICODE -Wno-invalid-source-encoding ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= - CFLAGS = -std=c99 -pedantic -W -Wall -D_GNU_SOURCE=1 -DUNICODE -Wno-invalid-source-encoding ->>>>>>> 6506166 (More SSE optimizations) -======= - CFLAGS = -std=c99 -pedantic -W -Wall -D_GNU_SOURCE=1 -DUNICODE ->>>>>>> 0b9d604 (Add more AVX512 builds; fix modern compiler's warnings) PGO_GEN = -fprofile-instr-generate PGO_USE = -fprofile-instr-use=edax.profdata PGO = llvm-profdata merge -output=edax.profdata $(BIN)/*.profraw ifeq ($(BUILD),optimize) -<<<<<<< HEAD -<<<<<<< HEAD CFLAGS += -O3 -ffast-math -fomit-frame-pointer -DNDEBUG LTOFLAG = -flto -======= - CFLAGS += -O3 -flto -ffast-math -fomit-frame-pointer -DNDEBUG ->>>>>>> ea39994 (Improve clang compatibility) -======= - CFLAGS += -O3 -ffast-math -fomit-frame-pointer -DNDEBUG - LTOFLAG = -flto ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) else CFLAGS += -O0 -g -DDEBUG endif ifeq ($(ARCH),x64-modern) -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT - endif - ifeq ($(ARCH),x64-avx512) - CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT -======= - CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> 11e7bb7 (filp_sse_bitscan.c (experimental) added; Makefile modified.) -======= - CFLAGS += -m64 -march=core-avx2 -mno-bmi2 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8) -======= - CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval) -======= - CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT ->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval) -======= CFLAGS += -m64 -march=core-avx2 -DUSE_GAS_X64 -DPOPCOUNT -DLASTFLIP_HIGHCUT ->>>>>>> 85955bf (lazy high cut version of board_score_sse_1) endif ifeq ($(ARCH),x64-avx512) CFLAGS += -m64 -march=skylake-avx512 -DUSE_GAS_X64 -DPOPCOUNT @@ -514,39 +318,12 @@ endif ifeq ($(OS),windows) EXE = wEdax-$(ARCH).exe LIBS += -lws2_32 -<<<<<<< HEAD -<<<<<<< HEAD ifneq (,$(findstring x86,$(ARCH))) LIBS += -Bstatic -Wl,-Bstatic,-lpthread -<<<<<<< HEAD -======= - ifeq ($(ARCH),x86-modern) - LIBS += -lpthread -======= ->>>>>>> a03a9c9 (Static link to pthread on MSYS2 x86 build) - endif - ifeq ($(ARCH),x86) -======= - ifneq (,$(findstring x86,$(ARCH))) ->>>>>>> 11e7bb7 (filp_sse_bitscan.c (experimental) added; Makefile modified.) - LIBS += -lpthread ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) endif endif ifeq ($(OS),osx) -<<<<<<< HEAD -<<<<<<< HEAD EXE = mEdax-$(ARCH) -======= - ifeq ($(ARCH),x64) - EXE = mEdax - else - EXE = mEdax-$(ARCH) - endif ->>>>>>> e558fdb (Some cleanups for clang / android build) -======= - EXE = mEdax-$(ARCH) ->>>>>>> ffdc063 (makefile for macuniversal) LIBS += -lpthread endif @@ -602,12 +379,6 @@ help: build: @echo "building edax..." $(CC) $(CFLAGS) $(LTOFLAG) all.c -s -o $(BIN)/$(EXE) $(LIBS) -<<<<<<< HEAD - -source: - $(CC) $(CFLAGS) -S all.c -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) source: $(CC) $(CFLAGS) -S all.c @@ -638,33 +409,12 @@ release: $(MAKE) pgo-build ARCH=x64 OS=linux COMP=gcc $(MAKE) build ARCH=x64 OS=windows COMP=gcc CC='x86_64-w64-mingw32-gcc' $(MAKE) build ARCH=x86 OS=windows COMP=gcc CC='i686-w64-mingw32-gcc' -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - $(MAKE) build ARCH=armv7 OS=android COMP=gcc CC='arm-linux-androideabi-gcc --sysroot=$SYSROOT' -======= - $(MAKE) build ARCH=ARMv7 OS=android COMP=gcc CC='arm-linux-androideabi-gcc' ->>>>>>> e558fdb (Some cleanups for clang / android build) -======= - $(MAKE) build ARCH=ARMv7 OS=android COMP=gcc CC='arm-linux-androideabi-gcc --sysroot=$SYSROOT' ->>>>>>> f2da03e (Refine arm builds adding neon support.) -======= $(MAKE) build ARCH=armv7 OS=android COMP=gcc CC='arm-linux-androideabi-gcc --sysroot=$SYSROOT' ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) $(MAKE) clean $(MAKE) build ARCH=x64 OS=osx COMP=gcc-old CC=i686-apple-darwin10-gcc android: ndk-build NDK_PROJECT_PATH=. NDK_APPLICATION_MK=./Application.mk # NDK_DEBUG=1 -<<<<<<< HEAD - -macuniversal: - $(MAKE) build ARCH=x86 OS=osx COMP=clang - $(MAKE) build ARCH=x64 OS=osx COMP=clang - lipo -create -arch i686 ../bin/mEdax-x86 -arch x86_64 ../bin/mEdax-x64 -arch arm64 ../bin/mEdax-arm -output ../bin/mEdax - rm -f ../bin/mEdax-x86 ../bin/mEdax-x64 -======= ->>>>>>> f2da03e (Refine arm builds adding neon support.) macuniversal: $(MAKE) build ARCH=x86 OS=osx COMP=clang diff --git a/src/NMakefile b/src/NMakefile index 258d496..ba1a68e 100644 --- a/src/NMakefile +++ b/src/NMakefile @@ -8,8 +8,6 @@ # # Microsoft Visual C++ 2008 or better for Windows (7 or Vista). -<<<<<<< HEAD -<<<<<<< HEAD # VC_FLAGS = /source-charset:.1252 /execution-charset:.1252\ VC_FLAGS = /D UNICODE /utf-8 /D _CRT_SECURE_NO_DEPRECATE /I"..\include" /O2 /fp:fast /GS- /D NDEBUG /MT @@ -103,146 +101,6 @@ vc-pgo-w64: link all.obj ws2_32.lib /out:..\bin\wEdax-w64.exe /ltcg:pgo /VERSION:4.5 del *.pgc ..\bin\*.pgd -icc-pgo-w64-modern: - icl $(VC_FLAGS) /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT /Qprof-gen all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5 - cd ..\bin - wEdax-w64-modern -l 60 -solve ..\problem\fforum-20-39.obf - wEdax-w64-modern -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo - del book.pgo book.pgo.store - cd ..\src - icl $(VC_FLAGS) /GL /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT /Qprof-use /Qip all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5 -======= -VC_FLAGS = /I"..\include" /O2 /Oi /GL /fp:fast /source-charset:.1252 /execution-charset:.1252\ - /D "NDEBUG" /D "inline=__inline" /D "__func__=__FUNCTION__"\ -======= -# VC_FLAGS = /source-charset:.1252 /execution-charset:.1252\ -<<<<<<< HEAD -VC_FLAGS = /D UNICODE /utf-8\ -<<<<<<< HEAD - /I"..\include" /O2 /Oi /GL /GF /fp:fast /D NDEBUG /D inline=__inline /D __func__=__FUNCTION__\ ->>>>>>> 4cba71a (Use utf-8 for french/degree/micro chars; consistent capitalize in opening names for string-pooling) -======= -======= -VC_FLAGS = /D UNICODE /utf-8 /D _CRT_SECURE_NO_DEPRECATE\ -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 6506166 (More SSE optimizations) - /I"..\include" /O2 /GL /fp:fast /D NDEBUG /D inline=__forceinline /D __func__=__FUNCTION__\ ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= - /I"..\include" /O2 /fp:fast /GS- /D NDEBUG /D inline=__forceinline /D __func__=__FUNCTION__\ ->>>>>>> e832f60 (Inlining move_evaluate; skip movelist_evaluate if empty = 1) - /MT -======= - /I"..\include" /O2 /fp:fast /D NDEBUG /D inline=__forceinline /D __func__=__FUNCTION__\ -<<<<<<< HEAD - /MT /GL ->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan) -======= - /MT ->>>>>>> 59f61a8 (Drop /GL from clang build) - -vc-w64-modern: - cl $(VC_FLAGS) /GL /D HAS_CPU_64 /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5 - -vc-w64-avx512: - cl $(VC_FLAGS) /GL /D HAS_CPU_64 /arch:AVX512 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-avx512.exe /link /VERSION:4.5 - -vc-w64-popcnt: - cl $(VC_FLAGS) /GL /D HAS_CPU_64 /D POPCOUNT all.c ws2_32.lib /Fe..\bin\wEdax-w64-popcnt.exe /link /VERSION:4.5 - -vc-w64-k10: - cl $(VC_FLAGS) /GL /D HAS_CPU_64 /D POPCOUNT /D __LZCNT__ /D MOVE_GENERATOR=MOVE_GENERATOR_BITSCAN /favor:AMD64 all.c ws2_32.lib /Fe..\bin\wEdax-w64-k10.exe /link /VERSION:4.5 - -vc-w64: - cl $(VC_FLAGS) /GL /D HAS_CPU_64 all.c ws2_32.lib /Fe..\bin\wEdax-w64.exe /link /VERSION:4.5 - -vc-w32-modern: - cl $(VC_FLAGS) /GL /D hasSSE2 /arch:AVX2 /D POPCOUNT all.c ws2_32.lib /Fe..\bin\wEdax-w32-modern.exe - -vc-w32-sse: - cl $(VC_FLAGS) /GL /D hasSSE2 all.c ws2_32.lib /Fe..\bin\wEdax-w32-sse.exe - -vc-w32-mmx: - cl $(VC_FLAGS) /GL /arch:IA32 /D hasMMX all.c ws2_32.lib /Fe..\bin\wEdax-w32-mmx.exe - -vc-w32: - cl $(VC_FLAGS) /GL /arch:IA32 all.c ws2_32.lib /Fe..\bin\wEdax-w32.exe - -vc-a64: -# vcvarsamd64_arm64.bat - cl $(VC_FLAGS) /GL /D HAS_CPU_64 all.c ws2_32.lib /Fe..\bin\wEdax-a64.exe /link /VERSION:4.5 - -vc-a32: -# vcvarsamd64_arm.bat - cl $(VC_FLAGS) /GL all.c ws2_32.lib /Fe..\bin\wEdax-a32.exe - -icc-w64-modern: - icl $(VC_FLAGS) /GL /D HAS_CPU_64 /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5 - -icc-w64-avx512: - icl $(VC_FLAGS) /GL /D HAS_CPU_64 /arch:CORE-AVX512 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-avx512.exe /link /VERSION:4.5 - -icc-w64: - icl $(VC_FLAGS) /GL /D HAS_CPU_64 all.c ws2_32.lib /Fe..\bin\wEdax-w64.exe /link /VERSION:4.5 - -icc-w32: - icl $(VC_FLAGS) /GL /arch:IA32 all.c ws2_32.lib /Fe..\bin\wEdax-w32.exe - -clang-w64-modern: - clang-cl $(VC_FLAGS) /D HAS_CPU_64 /arch:AVX2 /D POPCOUNT all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5 - -clang-w64: - clang-cl $(VC_FLAGS) /D HAS_CPU_64 all.c ws2_32.lib /Fe..\bin\wEdax-w64.exe /link /VERSION:4.5 - -clang-w32: - clang-cl $(VC_FLAGS) /arch:IA32 all.c ws2_32.lib /Fe..\bin\wEdax-w32.exe - -vc-pgo-w64-modern: - set VCPROFILE_PATH=..\src - cl $(VC_FLAGS) /GL /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /ltcg:pgi /VERSION:4.5 - cd ..\bin - wEdax-w64-modern -l 60 -solve ..\problem\fforum-20-39.obf - wEdax-w64-modern -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo - del book.pgo book.pgo.store - cd ..\src - link all.obj ws2_32.lib /out:..\bin\wEdax-w64-modern.exe /ltcg:pgo /VERSION:4.5 - del *.pgc ..\bin\*.pgd - -vc-pgo-w64-k10: - set VCPROFILE_PATH=..\src - cl $(VC_FLAGS) /GL /D POPCOUNT /D __LZCNT__ /D MOVE_GENERATOR=MOVE_GENERATOR_BITSCAN all.c ws2_32.lib /Fe..\bin\wEdax-w64-k10.exe /link /ltcg:pgi /VERSION:4.5 - cd ..\bin - wEdax-w64-k10 -l 60 -solve ..\problem\fforum-20-39.obf - wEdax-w64-k10 -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo - del book.pgo book.pgo.store - cd ..\src - link all.obj ws2_32.lib /out:..\bin\wEdax-w64-k10.exe /ltcg:pgo /VERSION:4.5 - del *.pgc ..\bin\*.pgd - -vc-pgo-w64: - set VCPROFILE_PATH=..\src - cl $(VC_FLAGS) /GL all.c ws2_32.lib /Fe..\bin\wEdax-w64.exe /link /ltcg:pgi /VERSION:4.5 - cd ..\bin - wEdax-w64 -l 60 -solve ..\problem\fforum-20-39.obf - wEdax-w64 -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo - del book.pgo book.pgo.store - cd ..\src -<<<<<<< HEAD -<<<<<<< HEAD - link all.obj ws2_32.lib /out:..\bin\wEdax-w64.exe /ltcg:pgo /machine:x64 /VERSION:4.4 -<<<<<<< HEAD ->>>>>>> 1dc032e (Improve visual c compatibility) -======= -======= - link all.obj ws2_32.lib /out:..\bin\wEdax-w64.exe /ltcg:pgo /VERSION:4.4 ->>>>>>> f2da03e (Refine arm builds adding neon support.) -======= - link all.obj ws2_32.lib /out:..\bin\wEdax-w64.exe /ltcg:pgo /VERSION:4.5 ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) - del *.pgc ..\bin\*.pgd ->>>>>>> a9ee768 (Change popcnt build to k10 build using flip_bitscan) - icc-pgo-w64-modern: icl $(VC_FLAGS) /arch:AVX2 /D POPCOUNT /D LASTFLIP_HIGHCUT /Qprof-gen all.c ws2_32.lib /Fe..\bin\wEdax-w64-modern.exe /link /VERSION:4.5 cd ..\bin diff --git a/src/base.c b/src/base.c index c6c6528..aef0f86 100644 --- a/src/base.c +++ b/src/base.c @@ -481,15 +481,7 @@ static void wthorgame_get_board(WthorGame *game, const int n_empties, Board *boa if (board_is_pass(board)) { board_pass(board); *player ^= 1; } -<<<<<<< HEAD -<<<<<<< HEAD board_get_move_flip(board, move_from_wthor(game->x[i]), &move); -======= - board_get_move(board, move_from_wthor(game->x[i]), &move); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - board_get_move_flip(board, move_from_wthor(game->x[i]), &move); ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) if (board_check_move(board, &move)) { board_update(board, &move); *player ^= 1; } else { diff --git a/src/bench.c b/src/bench.c index f9b1a82..7b6fad9 100644 --- a/src/bench.c +++ b/src/bench.c @@ -1,27 +1,9 @@ -<<<<<<< HEAD -<<<<<<< HEAD /** * @file bench.c * -<<<<<<< HEAD * @date 1998 - 2023 -======= - * @date 1998 - 2017 ->>>>>>> b3f048d (copyright changes) * @author Richard Delorme * @version 4.5 -======= -/** - * @file bench.c - * - * @date 1998 - 2023 - * @author Richard Delorme -<<<<<<< HEAD - * @version 4.4 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - * @version 4.5 ->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame) */ #include "bit.h" @@ -38,15 +20,7 @@ * * @return a CPU clock tick. */ -<<<<<<< HEAD -<<<<<<< HEAD -static unsigned long long click(void) -======= -static unsigned long long click() ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= static unsigned long long click(void) ->>>>>>> 0b8fa13 (More HBOARD hash functions) { #if defined(USE_GAS_X64) @@ -60,15 +34,7 @@ static unsigned long long click(void) __asm__ volatile ( "rdtsc" : "=A" (a)); return a; -<<<<<<< HEAD -<<<<<<< HEAD -#elif defined(_WIN32) && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) -======= -#elif defined(_WIN32) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #elif defined(_WIN32) && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) ->>>>>>> f2da03e (Refine arm builds adding neon support.) return __rdtsc(); #else return cpu_clock(); @@ -78,15 +44,7 @@ static unsigned long long click(void) /* * @brief Move generator performance test. */ -<<<<<<< HEAD -<<<<<<< HEAD static void bench_move_generator(void) -======= -static void bench_move_generator() ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static void bench_move_generator(void) ->>>>>>> 0b8fa13 (More HBOARD hash functions) { const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; char m[4]; @@ -124,29 +82,13 @@ static void bench_move_generator(void) c = -click(); for (i = 0; i < N_WARMUP; ++i) { -<<<<<<< HEAD -<<<<<<< HEAD - v += board_get_move_flip(&board, x, &move); -======= - v += board_get_move(&board, x, &move); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= v += board_get_move_flip(&board, x, &move); ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) } c += click(); c = -click(); for (i = 0; i < N_REPEAT; ++i) { -<<<<<<< HEAD -<<<<<<< HEAD v += board_get_move_flip(&board, x, &move); -======= - v += board_get_move(&board, x, &move); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - v += board_get_move_flip(&board, x, &move); ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) } c += click(); @@ -156,44 +98,20 @@ static void bench_move_generator(void) if (t < t_min) t_min = t; if (t > t_max) t_max = t; -<<<<<<< HEAD -<<<<<<< HEAD - if (options.verbosity >= 2) printf("board_get_move_flip: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t); -======= - if (options.verbosity >= 2) printf("board_get_move: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= if (options.verbosity >= 2) printf("board_get_move_flip: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t); ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) } t_mean /= x; t_var = t_var / x - (t_mean * t_mean); -<<<<<<< HEAD -<<<<<<< HEAD - printf("board_get_move_flip: %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max); -======= - printf("board_get_move: %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= printf("board_get_move_flip: %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max); ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) } /* * @brief Last Move performance test. */ -<<<<<<< HEAD -<<<<<<< HEAD static void bench_count_last_flip(void) -======= -static void bench_count_last_flip() ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static void bench_count_last_flip(void) ->>>>>>> 0b8fa13 (More HBOARD hash functions) { const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; char m[4]; @@ -260,15 +178,7 @@ static void bench_count_last_flip(void) /* * @brief Scoring performance test. */ -<<<<<<< HEAD -<<<<<<< HEAD -static void bench_board_score_1(void) -======= -static void bench_board_score_1() ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= static void bench_board_score_1(void) ->>>>>>> 0b8fa13 (More HBOARD hash functions) { const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; char m[4]; @@ -307,37 +217,13 @@ static void bench_board_score_1(void) c = -click(); for (i = 0; i < N_WARMUP; ++i) { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD v += board_score_1(board.player, SCORE_MAX - 1, x); -======= - v += board_score_1(&board, SCORE_MAX, x); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - v += board_score_1(board.player, SCORE_MAX, x); ->>>>>>> 26dad03 (Use player bits only in board_score_1) -======= - v += board_score_1(board.player, SCORE_MAX - 1, x); ->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1) } c += click(); c = -click(); for (i = 0; i < N_REPEAT; ++i) { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - v += board_score_1(board.player, SCORE_MAX - 1, x); -======= - v += board_score_1(&board, SCORE_MAX, x); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - v += board_score_1(board.player, SCORE_MAX, x); ->>>>>>> 26dad03 (Use player bits only in board_score_1) -======= v += board_score_1(board.player, SCORE_MAX - 1, x); ->>>>>>> 9ec6e5d (Negative score in endgame solve 2/3/4; offset beta in score_1) } c += click(); @@ -360,15 +246,7 @@ static void bench_board_score_1(void) /* * @brief Mobility performance test. */ -<<<<<<< HEAD -<<<<<<< HEAD static void bench_mobility(void) -======= -static void bench_mobility() ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static void bench_mobility(void) ->>>>>>> 0b8fa13 (More HBOARD hash functions) { const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; char m[4]; @@ -446,15 +324,7 @@ static void bench_mobility(void) /* * @brief Stability performance test. */ -<<<<<<< HEAD -<<<<<<< HEAD -static void bench_stability(void) -======= -static void bench_stability() ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= static void bench_stability(void) ->>>>>>> 0b8fa13 (More HBOARD hash functions) { const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; char m[4]; @@ -542,422 +412,3 @@ void bench(void) -<<<<<<< HEAD -======= -/** - * @file bench.c - * - * @date 1998 - 2020 - * @author Richard Delorme - * @version 4.4 - */ - -#include "bit.h" -#include "board.h" -#include "move.h" -#include "options.h" -#include "search.h" -#include "util.h" - -#include - -/* - * @brief return a CPU clock tick. - * - * @return a CPU clock tick. - */ -static unsigned long long click() -{ -#if defined(USE_GAS_X64) - - unsigned int a, d; - __asm__ volatile ( - "rdtsc" : "=a" (a), "=d" (d)); - return a | (((unsigned long long)d) << 32); - -#elif defined(USE_GAS_X86) - unsigned long long a; - __asm__ volatile ( - "rdtsc" : "=A" (a)); - return a; -#elif defined(_WIN32) - return __rdtsc(); -#else - return cpu_clock(); -#endif -} - -/* - * @brief Move generator performance test. - */ -static void bench_move_generator() -{ - const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; - char m[4]; - Board board; - Move move; - int i, x; - volatile int v; - const int N_WARMUP = 1000; - const int N_REPEAT = 1000000; - unsigned long long c, overhead; - double t, t_mean, t_var, t_min, t_max; - - v = 0; - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - v += i; - } - c += click(); - - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - v += i; - } - c += click(); - overhead = c; - - t_mean = t_var = 0.0; - t_max = 0; - t_min = 1e30; - - for (x = A1; x < PASS; ++x) { - board_set(&board, b); - board.player &= ~x_to_bit(x); - board.opponent &= ~x_to_bit(x); - - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - v += board_get_move(&board, x, &move); - } - c += click(); - - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - v += board_get_move(&board, x, &move); - } - c += click(); - - t = ((double)(c - overhead)) / N_REPEAT; - t_mean += t; - t_var += t * t; - if (t < t_min) t_min = t; - if (t > t_max) t_max = t; - - if (options.verbosity >= 2) printf("board_get_move: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t); - - } - - t_mean /= x; - t_var = t_var / x - (t_mean * t_mean); - - printf("board_get_move: %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max); -} - -/* - * @brief Last Move performance test. - */ -static void bench_count_last_flip() -{ - const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; - char m[4]; - Board board; - int i, x; - volatile int v; - const int N_WARMUP = 1000; - const int N_REPEAT = 1000000; - unsigned long long c, overhead; - double t, t_mean, t_var, t_min, t_max; - - v = 0; - - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - v += i; - } - c += click(); - - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - v += i; - } - c += click(); - overhead = c; - - t_mean = t_var = 0.0; - t_max = 0; - t_min = 1e30; - - for (x = A1; x < PASS; ++x) { - board_set(&board, b); - board.player &= ~x_to_bit(x); - // board.opponent &= ~x_to_bit(x); - - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - v += last_flip(x, board.player & ~i); - } - c += click(); - - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - v += last_flip(x, board.player& ~i); - } - c += click(); - - t = ((double)(c - overhead)) / N_REPEAT; - t_mean += t; - t_var += t * t; - if (t < t_min) t_min = t; - if (t > t_max) t_max = t; - - if (options.verbosity >= 2) printf("count_last_flip: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t); - - } - - t_mean /= x; - t_var = t_var / x - (t_mean * t_mean); - - printf("count_last_flip: %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max); -} - -/* - * @brief Scoring performance test. - */ -static void bench_board_score_1() -{ - const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; - char m[4]; - Board board; - int i, x; - volatile int v; - const int N_WARMUP = 1000; - const int N_REPEAT = 1000000; - unsigned long long c, overhead; - double t, t_mean, t_var, t_min, t_max; - - board_set(&board, b); - v = 0; - - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - v += i; - } - c += click(); - - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - v += i; - } - c += click(); - overhead = c; - - t_mean = t_var = 0.0; - t_max = 0; - t_min = 1e30; - - for (x = A1; x < PASS; ++x) { - board_set(&board, b); - board.player &= ~x_to_bit(x); - board.opponent &= ~x_to_bit(x); - - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - v += board_score_1(&board, SCORE_MAX, x); - } - c += click(); - - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - v += board_score_1(&board, SCORE_MAX, x); - } - c += click(); - - t = ((double)(c - overhead)) / N_REPEAT; - t_mean += t; - t_var += t * t; - if (t < t_min) t_min = t; - if (t > t_max) t_max = t; - - if (options.verbosity >= 2) printf("board_score_1: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t); - - } - - t_mean /= x; - t_var = t_var / x - (t_mean * t_mean); - - printf("board_score_1: %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max); -} - -/* - * @brief Mobility performance test. - */ -static void bench_mobility() -{ - const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; - char m[4]; - Board board; - int i, x; - volatile int v; - const int N_WARMUP = 1000; - const int N_REPEAT = 1000000; - unsigned long long c, overhead; - double t, t_mean, t_var, t_min, t_max; - - board_set(&board, b); - v = 0; - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - board.player &= ~i; - board.opponent &= ~i; - v += i; - } - c += click(); - - board_set(&board, b); - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - board.player &= ~i; - board.opponent &= ~i; - v += i; - } - c += click(); - overhead = 0; - - t_mean = t_var = 0.0; - t_max = 0; - t_min = 1e30; - - for (x = A1; x < PASS; ++x) { - board_set(&board, b); - - v = 0; - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - board.player &= ~i; - board.opponent &= ~i; - v += get_mobility(board.player, board.opponent); - v -= get_mobility(board.opponent, board.player); - } - c += click(); - - board_set(&board, b); - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - board.player &= ~i; - board.opponent &= ~i; - v += get_mobility(board.player, board.opponent); - v -= get_mobility(board.opponent, board.player); - } - c += click(); - - t = ((double)(c - overhead)) / N_REPEAT / 2; - t_mean += t; - t_var += t * t; - if (t < t_min) t_min = t; - if (t > t_max) t_max = t; - - if (options.verbosity >= 2) printf("v = %d\n", v); - if (options.verbosity >= 2) printf("mobility: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t); - } - - t_mean /= x; - t_var = t_var / x - (t_mean * t_mean); - - printf("mobility: %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max); -} - -/* - * @brief Stability performance test. - */ -static void bench_stability() -{ - const char *b = "OOOOOOOOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOXXXXXXOOOOOOOOO O"; - char m[4]; - Board board; - int i, x; - volatile int v; - const int N_WARMUP = 1000; - const int N_REPEAT = 1000000; - unsigned long long c, overhead; - double t, t_mean, t_var, t_min, t_max; - - board_init(&board); - - v = 0; - x = A1; - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - board.player &= ~x_to_bit(x); - board.opponent &= ~x_to_bit(x); - } - c += click(); - - board_set(&board, b); - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - board.player &= ~x_to_bit(x); - board.opponent &= ~x_to_bit(x); - } - c += click(); - overhead = c; - - t_mean = t_var = 0.0; - t_max = 0; - t_min = 1e30; - - for (x = A1; x < PASS; ++x) { - board_set(&board, b); - - v = 0; - c = -click(); - for (i = 0; i < N_WARMUP; ++i) { - board.player &= ~x_to_bit(x); - board.opponent &= ~x_to_bit(x); - v += get_stability(board.player, board.opponent); - } - c += click(); - - board_set(&board, b); - c = -click(); - for (i = 0; i < N_REPEAT; ++i) { - board.player &= ~x_to_bit(x); - board.opponent &= ~x_to_bit(x); - v += get_stability(board.player, board.opponent); - } - c += click(); - - t = ((double)(c - overhead)) / N_REPEAT; - t_mean += t; - t_var += t * t; - if (t < t_min) t_min = t; - if (t > t_max) t_max = t; - - if (options.verbosity >= 2) printf("v = %d\n", v); - if (options.verbosity >= 2) printf("stability: %s %.1f clicks;\n", move_to_string(x, WHITE, m), t); - } - - t_mean /= x; - t_var = t_var / x - (t_mean * t_mean); - - printf("stability: %.2f < %.2f +/- %.2f < %.2f\n", t_min, t_mean, sqrt(t_var), t_max); -} - -/** - * @brief perform various performance tests. - */ -void bench(void) -{ - printf("The unit of the results is CPU cycles\n"); - bench_move_generator(); - bench_count_last_flip(); - bench_board_score_1(); - bench_mobility(); - bench_stability(); -} - - - ->>>>>>> c04475d (Fix microbench not to be optimized out) -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/bit.c b/src/bit.c index 218a399..de9f903 100644 --- a/src/bit.c +++ b/src/bit.c @@ -6,31 +6,7 @@ * a macro needs to be defined to chose between different flavors of the * algorithm. * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2023 -======= - * @date 1998 - 2017 ->>>>>>> b3f048d (copyright changes) -======= - * @date 1998 - 2018 ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= - * @date 1998 - 2020 ->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish) -======= - * @date 1998 - 2021 ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= - * @date 1998 - 2022 ->>>>>>> dc7c79c (Omit unpack from get_edge_stability) -======= - * @date 1998 - 2023 ->>>>>>> a9633d5 (Initial 4.5.2; some reformats) * @author Richard Delorme * @version 4.5 */ @@ -38,22 +14,11 @@ #include "bit.h" #include "util.h" -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) /** Table for a 32-bits-at-a-time software CRC-32C calculation. * This tablehas built into it the pre and post bit inversion of the CRC. */ #ifndef crc32c_u64 static unsigned int crc32c_table[4][256]; #endif -<<<<<<< HEAD -======= -/** coordinate to bit table converter */ -unsigned long long X_TO_BIT[66]; ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) /** coordinate to bit table converter */ unsigned long long X_TO_BIT[66]; @@ -94,28 +59,15 @@ const unsigned long long NEIGHBOUR[] = { * @return the number of bits set. */ -<<<<<<< HEAD -<<<<<<< HEAD -#ifndef POPCOUNT - #if 0 -======= -#if 0 // ndef POPCOUNT ->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish) -======= #ifndef POPCOUNT #if 0 ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) int bit_count(unsigned long long b) { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD int c; b = b - ((b >> 1) & 0x5555555555555555ULL); b = ((b >> 2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); #ifdef HAS_CPU_64 -<<<<<<< HEAD b = (b + (b >> 4)) & 0x0F0F0F0F0F0F0F0FULL; c = (b * 0x0101010101010101ULL) >> 56; #else @@ -125,42 +77,6 @@ int bit_count(unsigned long long b) #endif return c; } -======= - register unsigned long long c; -======= ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) - #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) -======= - int c; -<<<<<<< HEAD - #if 0 // defined(USE_GAS_MMX) || defined(USE_MSVC_X86) ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) - static const unsigned long long M55 = 0x5555555555555555ULL; - static const unsigned long long M33 = 0x3333333333333333ULL; - static const unsigned long long M0F = 0x0F0F0F0F0F0F0F0FULL; - #endif - -// MMX does not help much here :-( - #if 0 // def USE_MSVC_X86 - __m64 m; - - if (hasSSE2) { - m = *(__m64 *) &b; - m = _m_psubd(m, _m_pand(_m_psrlqi(m, 1), *(__m64 *) &M55)); - m = _m_paddd(_m_pand(m, *(__m64 *) &M33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &M33)); - m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &M0F); - c = _m_to_int(_m_psadbw(m, _mm_setzero_si64())); - _mm_empty(); - - return c; - } - -<<<<<<< HEAD - #elif defined(USE_GAS_MMX) ->>>>>>> 1dc032e (Improve visual c compatibility) -======= - #elif 0 // defined(USE_GAS_MMX) ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) #else // https://github.com/official-stockfish/Stockfish/pull/620/files @@ -187,7 +103,6 @@ void bit_init(void) #ifndef crc32c_u64 unsigned int k, crc; -<<<<<<< HEAD // http://stackoverflow.com/a/17646775/1821055 // https://github.com/baruch/crcbench // Generate byte-wise table. @@ -211,126 +126,19 @@ void bit_init(void) for (n = 0; n < 66; ++n) { // X_TO_BIT[64] = X_TO_BIT[65] = 0 for passing move & nomove X_TO_BIT[n] = ll; ll <<= 1; -======= - "pxor %%mm2, %%mm2\n\t" - "psadbw %%mm2, %%mm0\n\t" // SSE2 - "movd %%mm0, %0\n\t" - "emms" - : "=a" (c) - : "rm" (b), "m" (M55), "m" (M33), "m" (M0F), "m" (((unsigned int *) &b)[1])); - - return c; ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) } -<<<<<<< HEAD #ifndef POPCOUNT for (n = 0; n < (1 << 16); ++n) PopCnt16[n] = bit_count_32_SWAR(n); -======= - #endif -======= ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) - - b = b - ((b >> 1) & 0x5555555555555555ULL); - b = ((b >> 2) & 0x3333333333333333ULL) + (b & 0x3333333333333333ULL); -#ifdef HAS_CPU_64 -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) - b = (b + (b >> 4)) & 0x0F0F0F0F0F0F0F0FULL; - c = (b * 0x0101010101010101ULL) >> 56; - #else - c = (b >> 32) + b; - c = (c & 0x0F0F0F0F) + ((c >> 4) & 0x0F0F0F0F); - c = (c * 0x01010101) >> 24; - #endif - return c; -} -<<<<<<< HEAD ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) #endif -<<<<<<< HEAD #if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2) init_mmx(); #endif #if defined(ANDROID) && !defined(__ARM_NEON) && !defined(hasSSE2) init_neon(); #endif -======= -#ifndef POPCOUNT -======= - - #else ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) -// https://github.com/official-stockfish/Stockfish/pull/620/files -// 2% faster than SWAR bit_count for 32 & 64 non-POPCOUNT build -unsigned char PopCnt16[1 << 16]; - -static int bit_count_32_SWAR(unsigned int b) -{ - b = b - ((b >> 1) & 0x55555555); - b = ((b >> 2) & 0x333333333) + (b & 0x33333333); - b = ((b >> 4) + b) & 0x0F0F0F0F; - return (b * 0x01010101) >> 24; -} - #endif -#endif - -/** - * @brief initialize PopCnt16 table and check MMX/SSE availability. - */ -void bit_init(void) -{ - unsigned int n; - unsigned long long ll; -#ifndef crc32c_u64 - unsigned int k, crc; - - // http://stackoverflow.com/a/17646775/1821055 - // https://github.com/baruch/crcbench - // Generate byte-wise table. - for (n = 0; n < 256; n++) { - crc = ~n; - for (k = 0; k < 8; k++) - crc = (crc >> 1) ^ (-(int)(crc & 1) & 0x82f63b78); - crc32c_table[0][n] = ~crc; - } - // Use byte-wise table to generate word-wise table. - for (n = 0; n < 256; n++) { - crc = ~crc32c_table[0][n]; - for (k = 1; k < 4; k++) { - crc = crc32c_table[0][crc & 0xff] ^ (crc >> 8); - crc32c_table[k][n] = ~crc; - } - } -#endif - - ll = 1; - for (n = 0; n < 66; ++n) { // X_TO_BIT[64] = X_TO_BIT[65] = 0 for passing move & nomove - X_TO_BIT[n] = ll; - ll <<= 1; - } - -#ifndef POPCOUNT - for (n = 0; n < (1 << 16); ++n) - PopCnt16[n] = bit_count_32_SWAR(n); -#endif - -#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2) - init_mmx(); -#endif -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish) -======= -#if defined(ANDROID) && !defined(hasNeon) && !defined(hasSSE2) -======= -#if defined(ANDROID) && !defined(__ARM_NEON) && !defined(hasSSE2) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) - init_neon(); -#endif ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) } /** @@ -343,17 +151,8 @@ void bit_init(void) * @param v 64-bit integer to count bits of. * @return the number of bit set, counting the corners twice. */ -<<<<<<< HEAD -<<<<<<< HEAD -#if !defined(__AVX2__) && defined(hasSSE2) && !defined(POPCOUNT) -__m128i bit_weighted_count_sse(unsigned long long Q0, unsigned long long Q1) -======= -int bit_weighted_count(unsigned long long v) ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) -======= #if !defined(__AVX2__) && defined(hasSSE2) && !defined(POPCOUNT) __m128i bit_weighted_count_sse(unsigned long long Q0, unsigned long long Q1) ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) { static const V2DI mask15 = {{ 0x1555555555555515, 0x1555555555555515 }}; static const V2DI mask01 = {{ 0x0100000000000001, 0x0100000000000001 }}; @@ -367,15 +166,7 @@ __m128i bit_weighted_count_sse(unsigned long long Q0, unsigned long long Q1) return _mm_sad_epu8(v, _mm_setzero_si128()); } -<<<<<<< HEAD -<<<<<<< HEAD #elif defined(__ARM_NEON) -======= -#elif defined(hasNeon) ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) -======= -#elif defined(__ARM_NEON) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) uint64x2_t bit_weighted_count_neon(unsigned long long Q0, unsigned long long Q1) { uint64x2_t v = vcombine_u64(vcreate_u64(Q0), vcreate_u64(Q1)); @@ -383,39 +174,13 @@ uint64x2_t bit_weighted_count_neon(unsigned long long Q0, unsigned long long Q1) vcntq_u8(vreinterpretq_u8_u64(vandq_u64(v, vdupq_n_u64(0x8100000000000081)))))))); } -<<<<<<< HEAD -<<<<<<< HEAD #elif 0 // SWAR, for record int bit_weighted_count(unsigned long long v) { -<<<<<<< HEAD -======= -#if defined(POPCOUNT) - unsigned int P2187 = (v >> 48) | (v << 16); // ror 48 - return bit_count(v) + bit_count_32(P2187 & 0x00818100); - -#else ->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY) -======= -#else -int bit_weighted_count(unsigned long long v) -{ - #if defined(POPCOUNT) - unsigned int P2187 = (v >> 48) | (v << 16); // ror 48 - return bit_count(v) + bit_count_32(P2187 & 0x00818100); - - #else ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) -======= -#elif 0 // SWAR, for record -int bit_weighted_count(unsigned long long v) -{ ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) int c; v = v - ((v >> 1) & 0x1555555555555515) + (v & 0x0100000000000001); v = ((v >> 2) & 0x3333333333333333) + (v & 0x3333333333333333); -<<<<<<< HEAD c = (v >> 32) + v; c = (c & 0x0F0F0F0F) + ((c >> 4) & 0x0F0F0F0F); c = (c * 0x01010101) >> 24; @@ -423,8 +188,6 @@ int bit_weighted_count(unsigned long long v) } #else -<<<<<<< HEAD -<<<<<<< HEAD int bit_weighted_count(unsigned long long v) { unsigned int AH18 = ((v >> 56) | (v << 8)) & 0x8181; // ror 56 @@ -433,42 +196,6 @@ int bit_weighted_count(unsigned long long v) #else return bit_count(v) + PopCnt16[AH18]; #endif -======= -======= - int c; - ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) - v = v - ((v >> 1) & 0x1555555555555515ULL) + (v & 0x0100000000000001ULL); - v = ((v >> 2) & 0x3333333333333333ULL) + (v & 0x3333333333333333ULL); - #ifdef HAS_CPU_64 - v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; - c = (v * 0x0101010101010101ULL) >> 56; - #else -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) - c = (v >> 32) + v; - c = (c & 0x0F0F0F0F) + ((c >> 4) & 0x0F0F0F0F); - c = (c * 0x01010101) >> 24; - return c; -<<<<<<< HEAD -<<<<<<< HEAD -#endif ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) -======= -======= -} - -#else -int bit_weighted_count(unsigned long long v) -{ - unsigned int AH18 = ((v >> 56) | (v << 8)) & 0x8181; // ror 56 - #ifdef POPCOUNT - return bit_count(v) + bit_count_32(AH18); - #else - return bit_count(v) + PopCnt16[AH18]; ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) - #endif ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) } #endif @@ -485,25 +212,6 @@ int bit_weighted_count(unsigned long long v) */ #if !defined(first_bit_32) && !defined(HAS_CPU_64) int first_bit_32(unsigned int b) -<<<<<<< HEAD -{ - #if defined(_MSC_VER) - unsigned long index; - _BitScanForward(&index, b); - return (int) index; - - #elif defined(USE_GAS_X64) || defined(USE_GAS_X86) - __asm__("rep; bsf %1, %0" : "=r" (b) : "rm" (b)); // tzcnt on BMI CPUs, bsf otherwise - return (int) b; - - #elif defined(USE_MSVC_X86) - __asm { - bsf eax, word ptr b - } - - #elif defined(USE_GCC_ARM) - return __builtin_clz(b & -b) ^ 31; -======= { #if defined(_MSC_VER) unsigned long index; @@ -533,41 +241,6 @@ int first_bit_32(unsigned int b) } #endif // first_bit_32 -#ifndef first_bit -int first_bit(unsigned long long b) -{ - #if defined(USE_GAS_X64) - __asm__("rep; bsfq %1, %0" : "=r" (b) : "rm" (b)); // tzcntq on BMI CPUs - return (int) b; - - #elif defined(USE_GAS_X86) - int x; - __asm__ ("bsf %2, %0\n\t" // (ZF differs from tzcnt) - "jnz 1f\n\t" - "bsf %1, %0\n\t" - "addl $32, %0\n" - "1:" : "=&q" (x) : "g" ((int) (b >> 32)), "g" ((int) b)); - return x; ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) - -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - #else - static const unsigned char magic[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; -======= -#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM)) -<<<<<<< HEAD ->>>>>>> 1dc032e (Improve visual c compatibility) - - return magic[((b & (-b)) * 0x077CB531U) >> 27]; - #endif -} -#endif // first_bit_32 - #ifndef first_bit int first_bit(unsigned long long b) { @@ -585,31 +258,11 @@ int first_bit(unsigned long long b) return x; #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= -#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) ->>>>>>> f2da03e (Refine arm builds adding neon support.) -======= - #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) unsigned long index; _BitScanForward64(&index, b); return (int) index; -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - #elif defined(USE_MSVC_X86) -======= -#elif defined(USE_MASM_X86) ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= -#elif defined(USE_MSVC_X86) ->>>>>>> 1dc032e (Improve visual c compatibility) -======= #elif defined(USE_MSVC_X86) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) __asm { bsf eax, dword ptr b jnz l1 @@ -618,15 +271,7 @@ int first_bit(unsigned long long b) l1: } -<<<<<<< HEAD -<<<<<<< HEAD #elif defined(HAS_CPU_64) -======= -#elif defined(HAS_CPU_64) ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= - #elif defined(HAS_CPU_64) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) static const unsigned char magic[64] = { 63, 0, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, 33, 42, 3, @@ -640,41 +285,17 @@ int first_bit(unsigned long long b) return magic[((b & (-b)) * 0x07EDD5E59A4E28C2ULL) >> 58]; -<<<<<<< HEAD -<<<<<<< HEAD - #else -======= -#else ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= #else ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) const unsigned int lb = (unsigned int) b; if (lb) { return first_bit_32(lb); } else { return 32 + first_bit_32(b >> 32); } -<<<<<<< HEAD -<<<<<<< HEAD - #endif -======= -#endif ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= #endif ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) } -<<<<<<< HEAD -<<<<<<< HEAD -#endif // first_bit - -======= ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= #endif // first_bit ->>>>>>> ea39994 (Improve clang compatibility) #if 0 /** * @brief Search the next bit set. @@ -704,56 +325,16 @@ int next_bit(unsigned long long *b) */ int last_bit(unsigned long long b) { -<<<<<<< HEAD -<<<<<<< HEAD #if defined(USE_GAS_X64) -======= -#if defined(USE_GAS_X64) ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) __asm__("bsrq %1, %0" :"=r" (b) :"rm" (b)); return b; -<<<<<<< HEAD -<<<<<<< HEAD #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) -======= -#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM)) -<<<<<<< HEAD - ->>>>>>> 1dc032e (Improve visual c compatibility) -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= -#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) ->>>>>>> f2da03e (Refine arm builds adding neon support.) -======= - #if defined(USE_GAS_X64) - __asm__("bsrq %1, %0" :"=r" (b) :"rm" (b)); - return b; - - #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) unsigned long index; _BitScanReverse64(&index, b); return (int) index; -<<<<<<< HEAD -<<<<<<< HEAD - #elif defined(USE_GAS_X86) - int x; - __asm__ ("bsr %1, %0\n\t" - "leal 32(%0), %0\n\t" - "jnz 1f\n\t" - "bsr %2, %0\n\t" - "1:" : "=&q" (x) : "g" ((int) (b >> 32)), "g" ((int) b)); - return x; - - #elif 0 // defined(USE_GCC_ARM) -======= -#elif defined(USE_GAS_X86) -======= #elif defined(USE_GAS_X86) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) int x; __asm__ ("bsr %1, %0\n\t" "leal 32(%0), %0\n\t" @@ -762,16 +343,7 @@ int last_bit(unsigned long long b) "1:" : "=&q" (x) : "g" ((int) (b >> 32)), "g" ((int) b)); return x; -<<<<<<< HEAD -<<<<<<< HEAD -#elif defined(USE_GCC_ARM) ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= -#elif 0 // defined(USE_GCC_ARM) ->>>>>>> f2da03e (Refine arm builds adding neon support.) -======= #elif 0 // defined(USE_GCC_ARM) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) const unsigned int hb = b >> 32; if (hb) { return 63 - __builtin_clz(hb); @@ -779,19 +351,7 @@ int last_bit(unsigned long long b) return 31 - __builtin_clz((int) b); } -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #elif defined(USE_MSVC_X86) -======= - -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -#elif defined(USE_MSVC_X86) ->>>>>>> 1dc032e (Improve visual c compatibility) -======= - #elif defined(USE_MSVC_X86) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) __asm { bsr eax, dword ptr b+4 lea eax, [eax+32] @@ -800,10 +360,6 @@ int last_bit(unsigned long long b) l1: } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) #elif defined(HAS_CPU_64) // https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication_2 static const unsigned char magic[64] = { @@ -815,30 +371,6 @@ int last_bit(unsigned long long b) 34, 51, 20, 43, 31, 22, 10, 45, 25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63 -======= -#elif defined(HAS_CPU_64) - // https://www.chessprogramming.org/BitScan#De_Bruijn_Multiplication_2 - static const unsigned char magic[64] = { -<<<<<<< HEAD - 63, 0, 58, 1, 59, 47, 53, 2, - 60, 39, 48, 27, 54, 33, 42, 3, - 61, 51, 37, 40, 49, 18, 28, 20, - 55, 30, 34, 11, 43, 14, 22, 4, - 62, 57, 46, 52, 38, 26, 32, 41, - 50, 36, 17, 19, 29, 10, 13, 21, - 56, 45, 25, 31, 35, 16, 9, 12, - 44, 24, 15, 8, 23, 7, 6, 5 ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= - 0, 47, 1, 56, 48, 27, 2, 60, - 57, 49, 41, 37, 28, 16, 3, 61, - 54, 58, 35, 52, 50, 42, 21, 44, - 38, 32, 29, 23, 17, 11, 4, 62, - 46, 55, 26, 59, 40, 36, 15, 53, - 34, 51, 20, 43, 31, 22, 10, 45, - 25, 39, 14, 33, 19, 30, 9, 24, - 13, 18, 8, 12, 7, 6, 5, 63 ->>>>>>> 13d6004 (Update last_bit from chessprogramming wiki) }; b |= b >> 1; @@ -850,16 +382,7 @@ int last_bit(unsigned long long b) return magic[(b * 0x03f79d71b4cb0a89) >> 58]; -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - #else -======= -#else ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= #else ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) static const unsigned char clz_table_4bit[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; int n = 63; unsigned int x; @@ -871,39 +394,10 @@ int last_bit(unsigned long long b) if ((x & 0xF0000000) == 0) { n -= 4; x <<= 4; } n -= clz_table_4bit[x >> (32 - 4)]; return n; -<<<<<<< HEAD -<<<<<<< HEAD #endif -======= -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -#endif -======= - #endif ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) -} -#endif // last_bit - -<<<<<<< HEAD -#ifndef bswap_short -/** - * @brief Swap bytes of a short (little <-> big endian). - * @param s An unsigned short. - * @return The mirrored short. - */ -unsigned short bswap_short(unsigned short s) -{ - return (unsigned short) ((s >> 8) & 0x00FF) | ((s & 0x00FF) << 8); ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) } -<<<<<<< HEAD #endif // last_bit -======= -#endif ->>>>>>> ea39994 (Improve clang compatibility) -======= ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) #ifndef bswap_int /** * @brief Mirror the unsigned int (little <-> big endian). @@ -924,26 +418,9 @@ unsigned int bswap_int(unsigned int i) */ unsigned long long vertical_mirror(unsigned long long b) { -<<<<<<< HEAD -<<<<<<< HEAD return bswap_int((unsigned int)(b >> 32)) | ((unsigned long long) bswap_int((unsigned int) b) << 32); } #endif // bswap_int -======= - b = ((b >> 8) & 0x00FF00FF00FF00FFULL) | ((b & 0x00FF00FF00FF00FFULL) << 8); - b = ((b >> 16) & 0x0000FFFF0000FFFFULL) | ((b & 0x0000FFFF0000FFFFULL) << 16); - b = (b >> 32) | (b << 32); - return b; -======= - return bswap_int((unsigned int)(b >> 32)) | ((unsigned long long) bswap_int((unsigned int) b) << 32); ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -} -<<<<<<< HEAD -#endif ->>>>>>> dbeab1c (reduce asm and inline which sometimes breaks debug build) -======= -#endif // bswap_int ->>>>>>> ea39994 (Improve clang compatibility) /** * @brief Mirror the unsigned long long (exchange the line 1 - 8, 2 - 7, 3 - 6 & 4 - 5). @@ -952,43 +429,19 @@ unsigned long long vertical_mirror(unsigned long long b) */ unsigned int horizontal_mirror_32(unsigned int b) { -<<<<<<< HEAD -<<<<<<< HEAD -#ifdef __ARM_ACLE - return __rev(__rbit(b)); -#else -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= #ifdef __ARM_ACLE return __rev(__rbit(b)); #else ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) b = ((b >> 1) & 0x55555555U) + 2 * (b & 0x55555555U); b = ((b >> 2) & 0x33333333U) + 4 * (b & 0x33333333U); b = ((b >> 4) & 0x0F0F0F0FU) + 16 * (b & 0x0F0F0F0FU); return b; -<<<<<<< HEAD -<<<<<<< HEAD #endif -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= -#endif ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) } unsigned long long horizontal_mirror(unsigned long long b) { -<<<<<<< HEAD -<<<<<<< HEAD -#if defined(HAS_CPU_64) && !defined(__ARM_ACLE) -======= -#ifdef HAS_CPU_64 ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= #if defined(HAS_CPU_64) && !defined(__ARM_ACLE) ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) b = ((b >> 1) & 0x5555555555555555ULL) | ((b & 0x5555555555555555ULL) << 1); b = ((b >> 2) & 0x3333333333333333ULL) | ((b & 0x3333333333333333ULL) << 2); b = ((b >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((b & 0x0F0F0F0F0F0F0F0FULL) << 4); @@ -1004,38 +457,10 @@ unsigned long long horizontal_mirror(unsigned long long b) * @param b An unsigned long long * @return The transposed unsigned long long. */ -<<<<<<< HEAD -<<<<<<< HEAD -#ifdef __AVX2__ -<<<<<<< HEAD -unsigned long long transpose(unsigned long long b) -{ - __m256i v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(b)), _mm256_set_epi64x(0, 1, 2, 3)); -======= -#include -unsigned long long transpose(unsigned long long b) -{ - static const __v4di s3210 = { 3, 2, 1, 0 }; -<<<<<<< HEAD - __v4di v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_set_epi64x(0, b)), s3210); ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= - __v4di v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(b)), s3210); ->>>>>>> dbeab1c (reduce asm and inline which sometimes breaks debug build) -======= -#if defined(__AVX2__) && (defined(__x86_64__) || defined(_M_X64)) -======= #ifdef __AVX2__ ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) unsigned long long transpose(unsigned long long b) { -<<<<<<< HEAD - static const V4DI s3210 = {{ 3, 2, 1, 0 }}; - __m256i v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(b)), s3210.v4); ->>>>>>> 1dc032e (Improve visual c compatibility) -======= __m256i v = _mm256_sllv_epi64(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(b)), _mm256_set_epi64x(0, 1, 2, 3)); ->>>>>>> 4303b09 (Returns all full lines in full[4]) return ((unsigned long long) _mm256_movemask_epi8(v) << 32) | (unsigned int) _mm256_movemask_epi8(_mm256_slli_epi64(v, 4)); } @@ -1054,51 +479,9 @@ unsigned long long transpose(unsigned long long b) return b; } -<<<<<<< HEAD -<<<<<<< HEAD #endif // __AVX2__ #ifndef crc32c_u64 -<<<<<<< HEAD -/** - * @brief Caliculate crc32c checksum for 8 bytes data - * @param crc Initial crc from previous data. - * @param data Data to accumulate. - * @return Resulting crc. - */ -unsigned int crc32c_u64(unsigned int crc, unsigned long long data) -{ - crc ^= (unsigned int) data; - crc = crc32c_table[3][crc & 0xff] ^ - crc32c_table[2][(crc >> 8) & 0xff] ^ - crc32c_table[1][(crc >> 16) & 0xff] ^ - crc32c_table[0][crc >> 24]; - crc ^= (unsigned int) (data >> 32); - return crc32c_table[3][crc & 0xff] ^ - crc32c_table[2][(crc >> 8) & 0xff] ^ - crc32c_table[1][(crc >> 16) & 0xff] ^ - crc32c_table[0][crc >> 24]; -} - -/** - * @brief Caliculate crc32c checksum for a byte - * @param crc Initial crc from previous data. - * @param data Data to accumulate. - * @return Resulting crc. - */ -unsigned int crc32c_u8(unsigned int crc, unsigned int data) -{ - return crc32c_table[0][(crc ^ data) & 0xff] ^ (crc >> 8); -} -======= ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -#endif -======= -#endif // __AVX2__ ->>>>>>> ea39994 (Improve clang compatibility) - -======= ->>>>>>> f33d573 (Fix 'nboard pass not parsed' bug, crc32c for game hash too) /** * @brief Caliculate crc32c checksum for 8 bytes data * @param crc Initial crc from previous data. diff --git a/src/bit.h b/src/bit.h index c615fd2..c08243e 100644 --- a/src/bit.h +++ b/src/bit.h @@ -3,39 +3,7 @@ * * Bitwise operations header file. * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2023 -======= - * @date 1998 - 2017 ->>>>>>> b3f048d (copyright changes) -======= - * @date 1998 - 2018 ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= - * @date 1998 - 2020 ->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c) -======= - * @date 1998 - 2021 ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= - * @date 1998 - 2022 ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - * @date 1998 - 2020 ->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line) -======= - * @date 1998 - 2022 ->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame) -======= - * @date 1998 - 2023 ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) * @author Richard Delorme * @version 4.5 */ @@ -53,71 +21,14 @@ struct Random; /* declaration */ -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -void bit_init(void); -<<<<<<< HEAD -// int next_bit(unsigned long long*); -void bitboard_write(unsigned long long, FILE*); -======= -int bit_weighted_count(const unsigned long long); -// int next_bit(unsigned long long*); -void bitboard_write(const unsigned long long, FILE*); ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= -======= void bit_init(void); ->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish) -int bit_weighted_count(unsigned long long); -======= ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) // int next_bit(unsigned long long*); void bitboard_write(unsigned long long, FILE*); ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) unsigned long long transpose(unsigned long long); -<<<<<<< HEAD -<<<<<<< HEAD -unsigned int horizontal_mirror_32(unsigned int b); -======= ->>>>>>> dbeab1c (reduce asm and inline which sometimes breaks debug build) -======= unsigned int horizontal_mirror_32(unsigned int b); ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) unsigned long long horizontal_mirror(unsigned long long); int get_rand_bit(unsigned long long, struct Random*); -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -#if !defined(__AVX2__) && defined(hasSSE2) && !defined(POPCOUNT) - __m128i bit_weighted_count_sse(unsigned long long, unsigned long long); -#elif defined (__ARM_NEON) - uint64x2_t bit_weighted_count_neon(unsigned long long, unsigned long long); -======= -#ifdef __GNUC__ -#define bswap_short(x) __builtin_bswap16(x) -#define bswap_int(x) __builtin_bswap32(x) -#define vertical_mirror(x) __builtin_bswap64(x) -#elif defined(_MSC_VER) -#define bswap_short(x) _byteswap_ushort(x) -#define bswap_int(x) _byteswap_ulong(x) -#define vertical_mirror(x) _byteswap_uint64(x) -#else -unsigned short bswap_short(unsigned short); -unsigned int bswap_int(unsigned int); -unsigned long long vertical_mirror(unsigned long long); -======= -======= -======= -extern const unsigned long long X_TO_BIT[]; -/** Return a bitboard with bit x set. */ -#define x_to_bit(x) X_TO_BIT[x] -======= -======= #if !defined(__AVX2__) && defined(hasSSE2) && !defined(POPCOUNT) __m128i bit_weighted_count_sse(unsigned long long, unsigned long long); #elif defined (__ARM_NEON) @@ -126,179 +37,6 @@ extern const unsigned long long X_TO_BIT[]; int bit_weighted_count(unsigned long long); #endif ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) -extern unsigned long long X_TO_BIT[]; -extern const unsigned long long NEIGHBOUR[]; ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) - -/** Return a bitboard with bit x set. */ -// https://eukaryote.hateblo.jp/entry/2020/04/12/054905 -#ifdef HAS_CPU_64 // 1% slower on Sandy Bridge - #define x_to_bit(x) (1ULL << (x)) -#else - #define x_to_bit(x) X_TO_BIT[x] -#endif - -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> b1eae0d (Reduce flip table by rotated outflank; revise lzcnt & rol8 defs) -======= -#ifndef __has_builtin - #define __has_builtin(x) 0 // Compatibility with non-clang compilers. -#endif - -// mirror byte -#if defined(_M_ARM) // || defined(_M_ARM64) // https://developercommunity.visualstudio.com/content/problem/498995/arm64-missing-rbit-intrinsics.html -#define mirror_byte(b) (_arm_rbit(b) >> 24) -#elif defined(__ARM_ACLE) -#include -#define mirror_byte(b) (__rbit(b) >> 24) -#elif defined(HAS_CPU_64) ->>>>>>> f2da03e (Refine arm builds adding neon support.) -// http://graphics.stanford.edu/~seander/bithacks.html -#define mirror_byte(b) (unsigned char)((((b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32) -#else -static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); } -#endif - -<<<<<<< HEAD ->>>>>>> 0ee9c1c (mirror_byte added for 1 byte bit reverse) -#ifndef __has_builtin - #define __has_builtin(x) 0 // Compatibility with non-clang compilers. ->>>>>>> ea39994 (Improve clang compatibility) -#endif - -======= -// rotl8 ->>>>>>> f2da03e (Refine arm builds adding neon support.) -#if __has_builtin(__builtin_rotateleft8) - #define rotl8(x,y) __builtin_rotateleft8((x),(y)) -#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) && (defined(__x86_64__) || defined(__i386__)) - #define rotl8(x,y) __builtin_ia32_rolqi((x),(y)) -#elif defined(_MSC_VER) - #define rotl8(x,y) _rotl8((x),(y)) -#else // may not compile into 8-bit rotate - #define rotl8(x,y) ((unsigned char)(((x)<<(y))|((unsigned)(x)>>(8-(y))))) -#endif - -// bswap -#ifdef _MSC_VER - #define bswap_short(x) _byteswap_ushort(x) - #define bswap_int(x) _byteswap_ulong(x) - #define vertical_mirror(x) _byteswap_uint64(x) -#else - #if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) || __has_builtin(__builtin_bswap16) - #define bswap_short(x) __builtin_bswap16(x) - #else - #define bswap_short(x) (((unsigned short) (x) >> 8) | ((unsigned short) (x) << 8)) - #endif - #if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_builtin(__builtin_bswap64) - #define bswap_int(x) __builtin_bswap32(x) - #define vertical_mirror(x) __builtin_bswap64(x) - #else - unsigned int bswap_int(unsigned int); - unsigned long long vertical_mirror(unsigned long long); - #endif -#endif - -// ctz / clz -======= -/** Loop over each bit set. */ -<<<<<<< HEAD ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -#if (defined(__GNUC__) && __GNUC__ >= 4) || __has_builtin(__builtin_ctzll) - #define first_bit(x) __builtin_ctzll(x) - #define last_bit(x) (63 - __builtin_clzll(x)) -#elif defined(tzcnt_u64) -======= -#if defined(tzcnt_u64) ->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99) - #define first_bit(x) tzcnt_u64(x) - #define last_bit(x) (63 - lzcnt_u64(x)) -#elif ((defined(__GNUC__) && (__GNUC__ >= 4)) || __has_builtin(__builtin_ctzll)) && !defined(__INTEL_COMPILER) - #define first_bit(x) __builtin_ctzll(x) - #define last_bit(x) (63 - __builtin_clzll(x)) -#else - int first_bit(unsigned long long); - int last_bit(unsigned long long); -#endif - -#if defined(HAS_CPU_64) || !defined(__STDC_HOSTED__) // __STDC_HOSTED__ (C99) to declare var in for statement - #define foreach_bit(i, b) for (i = first_bit(b); b; i = first_bit(b &= (b - 1))) -#else - #ifdef tzcnt_u32 - #define first_bit_32(x) tzcnt_u32(x) - #else - int first_bit_32(unsigned int); - #endif - #define foreach_bit(i, b) (void) i; for (unsigned int _j = 0; _j < sizeof(b) * CHAR_BIT; _j += sizeof(int) * CHAR_BIT) \ - for (int _r = (b >> _j), i = first_bit_32(_r) + _j; _r; i = first_bit_32(_r &= (_r - 1)) + _j) -#endif - -// popcount -#ifdef hasNeon - #ifdef HAS_CPU_64 - #define bit_count(x) vaddv_u8(vcnt_u8(vcreate_u8(x))) - #define bit_count_32(x) vaddv_u8(vcnt_u8(vcreate_u8((unsigned int) x))) - #else - #define bit_count(x) vget_lane_u32(vreinterpret_u32_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(vcnt_u8(vcreate_u8(x)))))), 0) - #define bit_count_32(x) vget_lane_u32(vpaddl_u16(vpaddl_u8(vcnt_u8(vcreate_u8(x)))), 0) - #endif - -#elif defined(POPCOUNT) -<<<<<<< HEAD - /* - #if defined (USE_GAS_X64) - static inline int bit_count (unsigned long long x) { - long long y; - __asm__ ( "popcntq %1,%0" : "=r" (y) : "rm" (x)); - return y; - } - #elif defined (USE_GAS_X86) - static inline int bit_count (unsigned long long x) { - unsigned int y0, y1; - __asm__ ( "popcntl %2,%0\n\t" - "popcntl %3,%1" - : "=&r" (y0), "=&r" (y1) - : "rm" ((unsigned int) x), "rm" ((unsigned int) (x >> 32))); - return y0 + y1; - } - */ - #ifdef _MSC_VER - #if defined(_M_ARM) || defined(_M_ARM64) - #define bit_count(x) _CountOneBits64(x) - #define bit_count_32(x) _CountOneBits(x) - #elif defined(_M_X64) - #define bit_count(x) ((int) __popcnt64(x)) - #define bit_count_32(x) __popcnt(x) - #else - #define bit_count(x) (__popcnt((unsigned int) (x)) + __popcnt((unsigned int) ((x) >> 32))) - #define bit_count_32(x) __popcnt(x) - #endif - #else - #define bit_count(x) __builtin_popcountll(x) - #define bit_count_32(x) __builtin_popcount(x) - #endif -<<<<<<< HEAD ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -#else -<<<<<<< HEAD - int bit_weighted_count(unsigned long long); -======= - extern unsigned char PopCnt16[1 << 16]; - static inline int bit_count(unsigned long long b) { - union { unsigned long long bb; unsigned short u[4]; } v = { b }; - return (unsigned char)(PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]]); - } ->>>>>>> 22be102 (table lookup bit_count for non-POPCOUNT from stockfish) -#endif - -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD extern unsigned long long X_TO_BIT[]; extern const unsigned long long NEIGHBOUR[]; @@ -345,8 +83,6 @@ extern const unsigned long long NEIGHBOUR[]; #endif #elif defined(POPCOUNT) -======= ->>>>>>> 0835dae (Reformat #if's) /* #if defined (USE_GAS_X64) static inline int bit_count (unsigned long long x) { @@ -379,11 +115,6 @@ extern const unsigned long long NEIGHBOUR[]; #define bit_count(x) __builtin_popcountll(x) #define bit_count_32(x) __builtin_popcount(x) #endif -<<<<<<< HEAD -======= ->>>>>>> 4fac39f (get_spreaded_mobility for SSE/32, bit_count_si64 for SSE2) -======= ->>>>>>> 0835dae (Reformat #if's) #define bit_count_si64(x) bit_count(_mm_cvtsi128_si64(x)) #else @@ -396,14 +127,7 @@ extern const unsigned long long NEIGHBOUR[]; union { unsigned int bb; unsigned short u[2]; } v = { b }; return (unsigned char)(PopCnt16[v.u[0]] + PopCnt16[v.u[1]]); } -<<<<<<< HEAD -<<<<<<< HEAD #define bit_count_si64(x) ((unsigned char)(PopCnt16[_mm_extract_epi16((x), 0)] + PopCnt16[_mm_extract_epi16((x), 1)] + PopCnt16[_mm_extract_epi16((x), 2)] + PopCnt16[_mm_extract_epi16((x), 3)])) -======= ->>>>>>> dc7c79c (Omit unpack from get_edge_stability) -======= - #define bit_count_si64(x) ((unsigned char)(PopCnt16[_mm_extract_epi16((x), 0)] + PopCnt16[_mm_extract_epi16((x), 1)] + PopCnt16[_mm_extract_epi16((x), 2)] + PopCnt16[_mm_extract_epi16((x), 3)])) ->>>>>>> 4fac39f (get_spreaded_mobility for SSE/32, bit_count_si64 for SSE2) #endif #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) @@ -413,7 +137,6 @@ extern const unsigned long long NEIGHBOUR[]; #ifndef hasMMX extern bool hasMMX; #endif -<<<<<<< HEAD #endif #if defined(ANDROID) && ((defined(__arm__) && !defined(__ARM_NEON)) || (defined(__i386__) && !defined(hasSSE2))) @@ -434,85 +157,12 @@ typedef union { __m128i v2; __m128d d2; // used in flip_carry_sse_32.c #endif -======= -#if defined(__x86_64__) || defined(_M_X64) -======= -#if defined(__x86_64__) || defined(_M_X64) || defined(__AVX2__) ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) -======= -#if defined(__SSE2__) || defined(_M_X64) ->>>>>>> bc93772 (Avoid modern compliler warnings) - #define hasSSE2 1 -#endif - -#ifdef _MSC_VER - #include - #ifdef _M_IX86 - #define USE_MSVC_X86 1 - #endif -#elif defined(hasSSE2) - #include -#endif - -#ifdef hasSSE2 - #define hasMMX 1 -#endif - -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) - #ifndef hasSSE2 -======= -#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(ANDROID) - #if !defined(hasSSE2) && !defined(hasNeon) ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= -#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) - #ifndef hasSSE2 ->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line) - extern bool hasSSE2; - #endif - #ifndef hasMMX - extern bool hasMMX; - #endif -======= ->>>>>>> 0835dae (Reformat #if's) -#endif - -#if defined(ANDROID) && ((defined(__arm__) && !defined(__ARM_NEON)) || (defined(__i386__) && !defined(hasSSE2))) -extern bool hasSSE2; -#endif - -/** Board : board representation */ -typedef struct Board { - unsigned long long player, opponent; /**< bitboard representation */ -} Board; - -typedef union { - unsigned long long ull[2]; - Board board; // for vboard optimization in search - #ifdef __ARM_NEON - uint64x2_t v2; - #elif defined(hasSSE2) || defined(USE_MSVC_X86) - __m128i v2; - __m128d d2; // used in flip_carry_sse_32.c -<<<<<<< HEAD -#endif ->>>>>>> 1dc032e (Improve visual c compatibility) -======= - #endif ->>>>>>> e22b052 (_mm_cvtsi64_si128 x86 sim using loadl, requires lvalue) } #if defined(__GNUC__) && !defined(hasSSE2) __attribute__ ((aligned (16))) #endif V2DI; -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD typedef union { unsigned long long ull[4]; #ifdef __AVX2__ @@ -529,117 +179,6 @@ typedef union { typedef union { unsigned long long ull[8]; #ifdef __AVX512VL__ -<<<<<<< HEAD - __m512i v8; - #endif - #ifdef __AVX2__ - __m256i v4[2]; - #endif -} V8DI; - -/* Define function attributes directive when available */ - -#if (defined(_MSC_VER) || defined(__clang__)) && defined(hasSSE2) - #define vectorcall __vectorcall -#elif defined(__GNUC__) && defined(__i386__) - #define vectorcall __attribute__((sseregparm)) -#elif 0 // defined(__GNUC__) // erroreous result on pgo-build - #define vectorcall __attribute__((sysv_abi)) -#else - #define vectorcall -#endif - -// X64 compatibility sims for X86 -#if !defined(HAS_CPU_64) && (defined(hasSSE2) || defined(USE_MSVC_X86)) - // static inline __m128i _mm_cvtsi64_si128(const unsigned long long x) { - // return _mm_unpacklo_epi32(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(x >> 32)); - // } - // better code but requires lvalue - #define _mm_cvtsi64_si128(x) _mm_loadl_epi64((__m128i *) &(x)) - static inline unsigned long long vectorcall _mm_cvtsi128_si64(__m128i x) { - return *(unsigned long long *) &x; - } - static inline unsigned long long vectorcall _mm_extract_epi64(__m128i x, int i) { - return ((unsigned long long *) &x)[i]; - } - - #if defined(_MSC_VER) && _MSC_VER<1900 - static inline __m128i _mm_set_epi64x(unsigned long long b, unsigned long long a) { - return _mm_unpacklo_epi64(_mm_cvtsi64_si128(b), _mm_cvtsi64_si128(a)); - } - static inline __m128i _mm_set1_epi64x(unsigned long long x) { - __m128i t = _mm_cvtsi64_si128(x); - return _mm_unpacklo_epi64(t, t); - } - #endif -#endif // !HAS_CPU_64 - -#if __clang_major__ == 3 // undefined reference to `llvm.x86.avx.storeu.dq.256' - #define _mm_storeu_si128(a,b) *(__m128i *)(a) = (b) - #define _mm256_storeu_si256(a,b) *(__m256i *)(a) = (b) -======= -#ifdef __AVX2__ -======= -#ifdef hasSSE2 ->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c) -typedef union { - unsigned long long ull[4]; -<<<<<<< HEAD - #ifdef __AVX2__ - __m256i v4; - #endif - __m128i v2[2]; -} V4DI; ->>>>>>> 1dc032e (Improve visual c compatibility) -======= -typedef union { - unsigned long long ull[4]; -#ifdef __AVX2__ - __m256i v4; ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -#endif -#ifdef hasSSE2 -======= -#ifdef hasSSE2 -======= ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) -typedef union { - unsigned long long ull[4]; - #ifdef __AVX2__ - __m256i v4; - #endif -<<<<<<< HEAD ->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line) -======= - #ifdef hasSSE2 -<<<<<<< HEAD ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) - __m128i v2[2]; -======= - __m128i v2[2]; - #endif - #ifdef USE_MSVC_X86 - __m64 v1[4]; ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) - #endif -======= - #ifdef __AVX2__ - __m256i v4; - #endif - #ifdef hasSSE2 - __m128i v2[2]; - #endif - #ifdef USE_MSVC_X86 - __m64 v1[4]; - #endif ->>>>>>> e22b052 (_mm_cvtsi64_si128 x86 sim using loadl, requires lvalue) -} V4DI; - -typedef union { - unsigned long long ull[8]; - #ifdef __AVX512F__ -======= ->>>>>>> b1cae3c (Rewrite AVX512 LASTFLIP_HIGHCUT not to use kortest) __m512i v8; #endif #ifdef __AVX2__ diff --git a/src/bit_intrinsics.h b/src/bit_intrinsics.h index 6625411..3ddd483 100644 --- a/src/bit_intrinsics.h +++ b/src/bit_intrinsics.h @@ -1,25 +1,9 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) /** * @file bit_intrinsics.h * * CPU dependent bit operation intrinsics. * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD * @date 2020 - 2024 -======= - * @date 2020 - 2022 ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - * @date 2020 - 2023 ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) -======= - * @date 2020 - 2024 ->>>>>>> a09308f (Renew version string and copyright year) * @author Richard Delorme * @author Toshihiko Okuhara * @version 4.5 @@ -28,27 +12,11 @@ #ifndef EDAX_BIT_INTRINSICS_H #define EDAX_BIT_INTRINSICS_H -<<<<<<< HEAD -<<<<<<< HEAD -#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) - #define HAS_CPU_64 1 -#endif - -#if defined(__SSE2__) || defined(__AVX__) || defined(_M_X64) -======= -#if !defined(HAS_CPU_64) && (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64)) - #define HAS_CPU_64 1 -#endif - -#if defined(__SSE2__) || defined(_M_X64) ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= #if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) #define HAS_CPU_64 1 #endif #if defined(__SSE2__) || defined(__AVX__) || defined(_M_X64) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #define hasSSE2 1 #endif @@ -56,60 +24,24 @@ #define hasMMX 1 #endif -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #if defined(ANDROID) && defined(__arm__) #if __ANDROID_API__ < 21 #define DISPATCH_NEON 1 #else #define __ARM_NEON 1 -<<<<<<< HEAD - #endif -#elif defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) - #define __ARM_NEON 1 -#endif -#ifdef __ARM_NEON - #include "arm_neon.h" -======= -#if defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) - #define hasNeon 1 - #ifndef __ARM_NEON__ - #define __ARM_NEON__ 1 -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #endif #elif defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) #define __ARM_NEON 1 #endif -<<<<<<< HEAD -#ifdef __ARM_NEON__ -#include "arm_neon.h" ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= #ifdef __ARM_NEON #include "arm_neon.h" ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #endif #ifdef _MSC_VER #include -<<<<<<< HEAD -<<<<<<< HEAD #ifdef _M_IX86 #define USE_MSVC_X86 1 #endif -======= - #ifdef _M_IX86 - #define USE_MSVC_X86 1 - #endif ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - #ifdef _M_IX86 - #define USE_MSVC_X86 1 - #endif ->>>>>>> 0835dae (Reformat #if's) #elif defined(hasSSE2) #include #endif @@ -119,16 +51,7 @@ #endif // mirror byte -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if defined(_M_ARM) // || (defined(_M_ARM64) && _MSC_VER >= 1922) // https://developercommunity.visualstudio.com/t/ARM64-still-missing-RBIT-intrinsics/10547420 -======= -#if defined(_M_ARM) || (defined(_M_ARM64) && _MSC_VER >= 1922) // https://developercommunity.visualstudio.com/content/problem/498995/arm64-missing-rbit-intrinsics.html ->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2) -======= -#if defined(_M_ARM) // || (defined(_M_ARM64) && _MSC_VER >= 1922) // https://developercommunity.visualstudio.com/t/ARM64-still-missing-RBIT-intrinsics/10547420 ->>>>>>> 66e8cab (MSC ARM64 still missing _arm_rbit) #define mirror_byte(b) (_arm_rbit(b) >> 24) #elif defined(__ARM_ACLE) #include @@ -138,22 +61,6 @@ #define mirror_byte(b) (unsigned char)((((b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32) #else static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); } -======= -#if defined(_M_ARM) // || defined(_M_ARM64) // https://developercommunity.visualstudio.com/content/problem/498995/arm64-missing-rbit-intrinsics.html - #define mirror_byte(b) (_arm_rbit(b) >> 24) -#elif defined(__ARM_ACLE) - #include - #define mirror_byte(b) (__rbit(b) >> 24) -#elif defined(HAS_CPU_64) - // http://graphics.stanford.edu/~seander/bithacks.html - #define mirror_byte(b) (unsigned char)((((b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32) -#else -<<<<<<< HEAD -static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); } ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); } ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) #endif // rotl8 @@ -173,10 +80,6 @@ static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x20080 #define bswap_int(x) _byteswap_ulong(x) #define vertical_mirror(x) _byteswap_uint64(x) #else -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 0835dae (Reformat #if's) #if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) || __has_builtin(__builtin_bswap16) #define bswap_short(x) __builtin_bswap16(x) #else @@ -189,23 +92,6 @@ static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x20080 unsigned int bswap_int(unsigned int); unsigned long long vertical_mirror(unsigned long long); #endif -<<<<<<< HEAD -======= - #if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) || __has_builtin(__builtin_bswap16) - #define bswap_short(x) __builtin_bswap16(x) - #else - #define bswap_short(x) (((unsigned short) (x) >> 8) | ((unsigned short) (x) << 8)) - #endif - #if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_builtin(__builtin_bswap64) - #define bswap_int(x) __builtin_bswap32(x) - #define vertical_mirror(x) __builtin_bswap64(x) - #else - unsigned int bswap_int(unsigned int); - unsigned long long vertical_mirror(unsigned long long); - #endif ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= ->>>>>>> 0835dae (Reformat #if's) #endif // lzcnt / tzcnt (0 allowed) @@ -265,23 +151,11 @@ static inline int _tzcnt_u64(unsigned long long x) { #elif defined(_MSC_VER) static inline int lzcnt_u32(unsigned int n) { -<<<<<<< HEAD -<<<<<<< HEAD unsigned long i; -======= - unsigned int i; ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - unsigned long i; ->>>>>>> 77ab3e9 (Experimental branchless AVX512 lastflip in endgame_sse.c) if (!_BitScanReverse(&i, n)) i = 32 ^ 31; return i ^ 31; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 0835dae (Reformat #if's) #ifdef _M_X64 static inline int lzcnt_u64(unsigned long long n) { unsigned long i; @@ -299,7 +173,6 @@ static inline int _tzcnt_u64(unsigned long long x) { return i ^ 63; } #endif -<<<<<<< HEAD #elif defined(__ARM_FEATURE_CLZ) #if __ARM_ACLE >= 110 @@ -309,45 +182,6 @@ static inline int _tzcnt_u64(unsigned long long x) { #define lzcnt_u32(x) __builtin_clz(x) #define lzcnt_u64(x) __builtin_clzll(x) #endif -======= - #ifdef _M_X64 - static inline int lzcnt_u64(unsigned long long n) { - unsigned long i; - if (!_BitScanReverse64(&i, n)) - i = 64 ^ 63; - return i ^ 63; - } - #else - static inline int lzcnt_u64(unsigned long long n) { - unsigned long i; - if (_BitScanReverse(&i, n >> 32)) - return i ^ 31; - if (!_BitScanReverse(&i, (unsigned int) n)) - i = 64 ^ 63; - return i ^ 63; - } - #endif - -#elif defined(__ARM_FEATURE_CLZ) - #if __ARM_ACLE >= 110 - #define lzcnt_u32(x) __clz(x) - #define lzcnt_u64(x) __clzll(x) - #else // strictly-incorrect patch - #define lzcnt_u32(x) __builtin_clz(x) - #define lzcnt_u64(x) __builtin_clzll(x) - #endif ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - -#elif defined(__ARM_FEATURE_CLZ) - #if __ARM_ACLE >= 110 - #define lzcnt_u32(x) __clz(x) - #define lzcnt_u64(x) __clzll(x) - #else // strictly-incorrect patch - #define lzcnt_u32(x) __builtin_clz(x) - #define lzcnt_u64(x) __builtin_clzll(x) - #endif ->>>>>>> 0835dae (Reformat #if's) #else static inline int lzcnt_u32(unsigned long x) { return (x ? __builtin_clz(x) : 32); } @@ -359,261 +193,11 @@ static inline int _tzcnt_u64(unsigned long long x) { #define tzcnt_u64(x) _tzcnt_u64(x) #elif defined(__ARM_FEATURE_CLZ) -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 0835dae (Reformat #if's) #ifdef _M_ARM #define tzcnt_u32(x) _arm_clz(_arm_rbit(x)) #elif __has_builtin(__rbit) // (__ARM_ARCH >= 6 && __ARM_ISA_THUMB >= 2) || __ARM_ARCH >= 7 // not for gcc #define tzcnt_u32(x) __clz(__rbit(x)) #endif -<<<<<<< HEAD -#endif - -#if defined(__SSE4_2__) || defined(__AVX__) - #ifdef HAS_CPU_64 - #define crc32c_u64(crc,d) _mm_crc32_u64((crc),(d)) - #else - #define crc32c_u64(crc,d) _mm_crc32_u32(_mm_crc32_u32((crc),(d)),((d)>>32)) - #endif - #define crc32c_u8(crc,d) _mm_crc32_u8((crc),(d)) - -#elif defined(__ARM_FEATURE_CRC32) - #include "arm_acle.h" - #define crc32c_u64(crc,d) __crc32cd((crc),(d)) - #define crc32c_u8(crc,d) __crc32cb((crc),(d)) - -#else - unsigned int crc32c_u64(unsigned int crc, unsigned long long data); - unsigned int crc32c_u8(unsigned int crc, unsigned int data); -#endif - -#endif // EDAX_BIT_INTRINSICS_H -======= -/** - * @file bit_intrinsics.h - * - * CPU dependent bit operation intrinsics. - * - * @date 2020 - 2021 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.5 - */ - -#ifndef EDAX_BIT_INTRINSICS_H -#define EDAX_BIT_INTRINSICS_H - -#if !defined(HAS_CPU_64) && (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64)) - #define HAS_CPU_64 1 -#endif - -#if defined(__SSE2__) || defined(_M_X64) - #define hasSSE2 1 -#endif - -#ifdef hasSSE2 - #define hasMMX 1 -#endif - -#if defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) - #define hasNeon 1 - #ifndef __ARM_NEON__ - #define __ARM_NEON__ 1 - #endif -#endif -#ifdef __ARM_NEON__ -#include "arm_neon.h" -#endif - -#ifdef _MSC_VER - #include - #ifdef _M_IX86 - #define USE_MSVC_X86 1 - #endif -#elif defined(hasSSE2) - #include -#endif - -#ifndef __has_builtin - #define __has_builtin(x) 0 // Compatibility with non-clang compilers. -#endif - -// mirror byte -#if defined(_M_ARM) // || defined(_M_ARM64) // https://developercommunity.visualstudio.com/content/problem/498995/arm64-missing-rbit-intrinsics.html -#define mirror_byte(b) (_arm_rbit(b) >> 24) -#elif defined(__ARM_ACLE) -#include -#define mirror_byte(b) (__rbit(b) >> 24) -#elif defined(HAS_CPU_64) -// http://graphics.stanford.edu/~seander/bithacks.html -#define mirror_byte(b) (unsigned char)((((b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32) -#else -static inline unsigned char mirror_byte(unsigned int b) { return ((((b * 0x200802) & 0x4422110) + ((b << 7) & 0x880)) * 0x01010101 >> 24); } -#endif - -// rotl8 -#if __has_builtin(__builtin_rotateleft8) - #define rotl8(x,y) __builtin_rotateleft8((x),(y)) -#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) && (defined(__x86_64__) || defined(__i386__)) - #define rotl8(x,y) __builtin_ia32_rolqi((x),(y)) -#elif defined(_MSC_VER) - #define rotl8(x,y) _rotl8((x),(y)) -#else // may not compile into 8-bit rotate - #define rotl8(x,y) ((unsigned char)(((x)<<(y))|((unsigned char)(x)>>(8-(y))))) -#endif - -// bswap -#ifdef _MSC_VER - #define bswap_short(x) _byteswap_ushort(x) - #define bswap_int(x) _byteswap_ulong(x) - #define vertical_mirror(x) _byteswap_uint64(x) -#else - #if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) || __has_builtin(__builtin_bswap16) - #define bswap_short(x) __builtin_bswap16(x) - #else - #define bswap_short(x) (((unsigned short) (x) >> 8) | ((unsigned short) (x) << 8)) - #endif - #if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_builtin(__builtin_bswap64) - #define bswap_int(x) __builtin_bswap32(x) - #define vertical_mirror(x) __builtin_bswap64(x) - #else - unsigned int bswap_int(unsigned int); - unsigned long long vertical_mirror(unsigned long long); - #endif -#endif - -// lzcnt / tzcnt (0 allowed) - -#ifdef USE_GAS_X86 -#ifdef __LZCNT__ -static inline int _lzcnt_u64(unsigned long long x) { - int y; - __asm__ ( - "lzcntl %1, %0\n\t" - "lzcntl %2, %2\n\t" - "leal (%0, %2), %0\n\t" - "cmovnc %2, %0" - : "=&r" (y) : "0" ((unsigned int) x), "r" ((unsigned int) (x >> 32)) ); - return y; -} -#endif -#ifdef __BMI__ -static inline int _tzcnt_u64(unsigned long long x) { - int y; - __asm__ ( - "tzcntl %1, %0\n\t" - "tzcntl %2, %2\n\t" - "leal (%0, %2), %0\n\t" - "cmovnc %2, %0" - : "=&r" (y) : "0" ((unsigned int) (x >> 32)), "r" ((unsigned int) x) ); - return y; -} -#endif -#elif defined(USE_MSVC_X86) && (defined(__AVX2__) || defined(__LZCNT__)) -static inline int _lzcnt_u64(unsigned long long x) { - __asm { - lzcnt eax, dword ptr x - lzcnt edx, dword ptr x+4 - lea eax, [eax+edx] - cmovnc eax, edx - } -} - -static inline int _tzcnt_u64(unsigned long long x) { - __asm { - tzcnt eax, dword ptr x+4 - tzcnt edx, dword ptr x - lea eax, [eax+edx] - cmovnc eax, edx - } -} -#endif - -#if defined(__AVX2__) || defined(__LZCNT__) - #define lzcnt_u32(x) _lzcnt_u32(x) - #define lzcnt_u64(x) _lzcnt_u64(x) - -#elif defined(_M_ARM) || defined(_M_ARM64) - #define lzcnt_u32(x) _CountLeadingZeros(x) - #define lzcnt_u64(x) _CountLeadingZeros64(x) - -#elif defined(_MSC_VER) - static inline int lzcnt_u32(unsigned int n) { - unsigned int i; - if (!_BitScanReverse(&i, n)) - i = 32 ^ 31; - return i ^ 31; - } - #ifdef _M_X64 - static inline int lzcnt_u64(unsigned long long n) { - unsigned long i; - if (!_BitScanReverse64(&i, n)) - i = 64 ^ 63; - return i ^ 63; - } - #else - static inline int lzcnt_u64(unsigned long long n) { - unsigned long i; - if (_BitScanReverse(&i, n >> 32)) - return i ^ 31; - if (!_BitScanReverse(&i, (unsigned int) n)) - i = 64 ^ 63; - return i ^ 63; - } - #endif - -#elif defined(__ARM_FEATURE_CLZ) - #if __ARM_ACLE >= 110 - #define lzcnt_u32(x) __clz(x) - #define lzcnt_u64(x) __clzll(x) - #else // strictly-incorrect patch - #define lzcnt_u32(x) __builtin_clz(x) - #define lzcnt_u64(x) __builtin_clzll(x) - #endif - -#else - static inline int lzcnt_u32(unsigned long x) { return (x ? __builtin_clz(x) : 32); } - static inline int lzcnt_u64(unsigned long x) { return (x ? __builtin_clzll(x) : 64); } -#endif - -#if defined(__BMI__) || defined(__AVX2__) - #define tzcnt_u32(x) _tzcnt_u32(x) - #define tzcnt_u64(x) _tzcnt_u64(x) - -#elif defined(__ARM_FEATURE_CLZ) - #ifdef _M_ARM - #define tzcnt_u32(x) _arm_clz(_arm_rbit(x)) - #elif __ARM_ACLE >= 110 - #define tzcnt_u32(x) __clz(__rbit(x)) - // #elif defined(__GNUC__) - // #define tzcnt_u32(x) __builtin_ctz(x) // '& 0x07' optimized out assuming x != 0 - #endif -#endif - -#if defined(__SSE4_2__) || defined(__AVX__) - #ifdef HAS_CPU_64 - #define crc32c_u64(crc,d) _mm_crc32_u64((crc),(d)) - #else - #define crc32c_u64(crc,d) _mm_crc32_u32(_mm_crc32_u32((crc),(d)),((d)>>32)) - #endif -#elif defined(__ARM_FEATURE_CRC32) - #define crc32c_u64(crc,d) __crc32cd((crc),(d)) -#else - unsigned int crc32c_u64(unsigned int crc, unsigned long long data); -#endif - -#endif // EDAX_BIT_INTRINSICS_H ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -======= - #ifdef _M_ARM - #define tzcnt_u32(x) _arm_clz(_arm_rbit(x)) - #elif __has_builtin(__rbit) // (__ARM_ARCH >= 6 && __ARM_ISA_THUMB >= 2) || __ARM_ARCH >= 7 // not for gcc - #define tzcnt_u32(x) __clz(__rbit(x)) - #endif -======= ->>>>>>> 0835dae (Reformat #if's) #endif #if defined(__SSE4_2__) || defined(__AVX__) @@ -635,4 +219,3 @@ static inline int _tzcnt_u64(unsigned long long x) { #endif #endif // EDAX_BIT_INTRINSICS_H ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) diff --git a/src/board.c b/src/board.c index e017836..e4f31fb 100644 --- a/src/board.c +++ b/src/board.c @@ -11,35 +11,7 @@ * some board properties. Most of the functions are optimized to be as fast as * possible, while remaining readable. * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2024 -======= - * @date 1998 - 2017 ->>>>>>> b3f048d (copyright changes) -======= - * @date 1998 - 2018 ->>>>>>> 1dc032e (Improve visual c compatibility) -======= - * @date 1998 - 2020 ->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow) -======= - * @date 1998 - 2021 ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= - * @date 1998 - 2022 ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - * @date 1998 - 2023 ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) -======= - * @date 1998 - 2024 ->>>>>>> b4fb773 (AVX optimized board_unique) * @author Richard Delorme * @author Toshihiko Okuhara * @version 4.5 @@ -62,27 +34,12 @@ #elif MOVE_GENERATOR == MOVE_GENERATOR_SSE #include "flip_sse.c" #elif MOVE_GENERATOR == MOVE_GENERATOR_BITSCAN -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #ifdef __ARM_NEON #define flip_neon flip #include "flip_neon_bitscan.c" #else #include "flip_bitscan.c" #endif -<<<<<<< HEAD -======= - #ifdef hasNeon - #define flip_neon flip - #include "flip_neon_bitscan.c" - #else - #include "flip_bitscan.c" - #endif ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #elif MOVE_GENERATOR == MOVE_GENERATOR_ROXANE #include "flip_roxane.c" #elif MOVE_GENERATOR == MOVE_GENERATOR_32 @@ -91,107 +48,29 @@ #include "flip_sse_bswap.c" #elif MOVE_GENERATOR == MOVE_GENERATOR_AVX #include "flip_avx_ppfill.c" -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #elif MOVE_GENERATOR == MOVE_GENERATOR_AVX512 #include "flip_avx512cd.c" #elif MOVE_GENERATOR == MOVE_GENERATOR_NEON -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #ifdef __aarch64__ #include "flip_neon_rbit.c" #else #include "flip_neon_lzcnt.c" #endif -<<<<<<< HEAD -<<<<<<< HEAD -#elif MOVE_GENERATOR == MOVE_GENERATOR_SVE - #include "flip_sve_lzcnt.c" -======= ->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added) -======= -======= -#elif MOVE_GENERATOR == MOVE_GENERATOR_AVX512 - #include "flip_avx512cd.c" ->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator) -#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON - #include "flip_neon_lzcnt.c" ->>>>>>> f2da03e (Refine arm builds adding neon support.) -======= - #ifdef __aarch64__ - #include "flip_neon_rbit.c" - #else - #include "flip_neon_lzcnt.c" - #endif ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) -======= #elif MOVE_GENERATOR == MOVE_GENERATOR_SVE #include "flip_sve_lzcnt.c" ->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build) #else // MOVE_GENERATOR == MOVE_GENERATOR_KINDERGARTEN #include "flip_kindergarten.c" #endif -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if LAST_FLIP_COUNTER == COUNT_LAST_FLIP_CARRY - #include "count_last_flip_carry_64.c" -#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE - #include "count_last_flip_sse.c" -#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BITSCAN - #include "count_last_flip_bitscan.c" -#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_PLAIN - #include "count_last_flip_plain.c" -#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_32 - #include "count_last_flip_32.c" -#elif LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2 - #include "count_last_flip_bmi2.c" -#else // LAST_FLIP_COUNTER == COUNT_LAST_FLIP_KINDERGARTEN - #include "count_last_flip_kindergarten.c" -#endif - ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= ->>>>>>> 6506166 (More SSE optimizations) /** edge stability global data */ unsigned char edge_stability[256 * 256]; -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2) #include "board_mmx.c" -<<<<<<< HEAD #endif #if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(hasSSE2) || defined(__ARM_NEON)) && !defined(ANDROID) #include "board_sse.c" -======= -/** conversion from an 8-bit line to the A1-A8 line */ -// unsigned long long A1_A8[256]; - -======= ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) -======= -#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) -#include "board_mmx.c" -#endif -#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(hasSSE2) || defined(hasNeon)) && !defined(ANDROID) -#include "board_sse.c" ->>>>>>> 1dc032e (Improve visual c compatibility) -======= -#endif -#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(hasSSE2) || defined(__ARM_NEON)) && !defined(ANDROID) - #include "board_sse.c" ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #endif @@ -360,34 +239,8 @@ bool board_lesser(const Board *b1, const Board *b2) if (b1->player != b2->player) return (b1->player < b2->player); else return (b1->opponent < b2->opponent); -<<<<<<< HEAD -======= -} - -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -/** - * @brief Compare two board for equality - * - * @param b1 first board - * @param b2 second board - * @return true if both board are equal - */ -bool board_equal(const Board *b1, const Board *b2) -{ - return (b1->player == b2->player && b1->opponent == b2->opponent); ->>>>>>> 8a7e354 (Exclude hash init time from count games; other minor size opts) } -======= ->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code) -#if !defined(hasSSE2) && !defined(hasNeon) // SSE version in board_sse.c -======= -#if !defined(hasSSE2) && !defined(__ARM_NEON) // SSE version in board_sse.c ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) -======= ->>>>>>> 6bc747d (Split board_flip_* from board_symetry) /** * @brief symetric board * @@ -413,21 +266,9 @@ void board_transpose(const Board *board, Board *sym) sym->player = transpose(board->player); sym->opponent = transpose(board->opponent); } -<<<<<<< HEAD -<<<<<<< HEAD - -void board_symetry(const Board *board, const int s, Board *sym) -{ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#endif -======= ->>>>>>> a23c3d4 (SSE optimized board_symetry again) void board_symetry(const Board *board, const int s, Board *sym) { ->>>>>>> 6bc747d (Split board_flip_* from board_symetry) *sym = *board; if (s & 1) board_horizontal_mirror(sym, sym); @@ -435,34 +276,6 @@ void board_symetry(const Board *board, const int s, Board *sym) board_vertical_mirror(sym, sym); if (s & 4) board_transpose(sym, sym); -<<<<<<< HEAD -======= - register unsigned long long player, opponent; -======= - unsigned long long player, opponent; ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) - - player = board->player; - opponent = board->opponent; - - if (s & 1) { - player = horizontal_mirror(player); - opponent = horizontal_mirror(opponent); - } - if (s & 2) { - player = vertical_mirror(player); - opponent = vertical_mirror(opponent); - } - if (s & 4) { - player = transpose(player); - opponent = transpose(opponent); - } - - sym->player = player; - sym->opponent = opponent; ->>>>>>> dbeab1c (reduce asm and inline which sometimes breaks debug build) -======= ->>>>>>> 6bc747d (Split board_flip_* from board_symetry) board_check(sym); } @@ -527,15 +340,7 @@ void board_rand(Board *board, int n_ply, Random *r) break; } } -<<<<<<< HEAD -<<<<<<< HEAD - board_get_move_flip(board, get_rand_bit(moves, r), &move); -======= - board_get_move(board, get_rand_bit(moves, r), &move); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= board_get_move_flip(board, get_rand_bit(moves, r), &move); ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) board_update(board, &move); } } @@ -553,13 +358,6 @@ void board_rand(Board *board, int n_ply, Random *r) */ unsigned long long board_get_move_flip(const Board *board, const int x, Move *move) { -<<<<<<< HEAD -<<<<<<< HEAD -======= - move->flipped = board_flip(board, x); ->>>>>>> 6506166 (More SSE optimizations) -======= ->>>>>>> 542ee82 (Change store order to reduce register saving) move->x = x; move->flipped = board_flip(board, x); return move->flipped; @@ -580,17 +378,6 @@ bool board_check_move(const Board *board, Move *move) else return true; } -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if !(defined(hasMMX) && (defined(USE_GAS_MMX) || defined(USE_MSVC_X86))) ->>>>>>> 1dc032e (Improve visual c compatibility) -======= -#if !(defined(hasMMX) && (defined(USE_GAS_MMX) || defined(USE_MSVC_X86))) // 32bit MMX/SSE version in board_mmx.c ->>>>>>> 6506166 (More SSE optimizations) -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) /** * @brief Update a board. * @@ -602,29 +389,12 @@ bool board_check_move(const Board *board, Move *move) */ void board_update(Board *board, const Move *move) { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if defined(hasSSE2) && (defined(HAS_CPU_64) || !defined(__3dNOW__)) // 3DNow CPU has fast emms, and possibly slow SSE -======= -#if defined(hasSSE2) && (defined(HAS_CPU_64) || !defined(__3dNOW__)) ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) -======= -#if defined(hasSSE2) && (defined(HAS_CPU_64) || !defined(__3dNOW__)) // 3DNow CPU has fast emms, and possibly slow SSE ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) __m128i OP = _mm_loadu_si128((__m128i *) board); OP = _mm_xor_si128(OP, _mm_or_si128(_mm_set1_epi64x(move->flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x]))); _mm_storeu_si128((__m128i *) board, _mm_shuffle_epi32(OP, 0x4e)); -<<<<<<< HEAD -<<<<<<< HEAD -#elif defined(hasMMX) -======= -#elif defined(hasMMX) // 3DNow CPU has fast emms ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) -======= #elif defined(hasMMX) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) __m64 F = *(__m64 *) &move->flipped; __m64 P = _m_pxor(*(__m64 *) &board->player, _m_por(F, *(__m64 *) &X_TO_BIT[move->x])); __m64 O = _m_pxor(*(__m64 *) &board->opponent, F); @@ -637,14 +407,6 @@ void board_update(Board *board, const Move *move) board->opponent = board->player ^ (move->flipped | X_TO_BIT[move->x]); board->player = O ^ move->flipped; #endif -<<<<<<< HEAD -======= - unsigned long long O = board->opponent; - board->opponent = board->player ^ (move->flipped | X_TO_BIT[move->x]); - board->player = O ^ move->flipped; ->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow) -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) board_check(board); } @@ -659,10 +421,6 @@ void board_update(Board *board, const Move *move) */ void board_restore(Board *board, const Move *move) { -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) #if defined(hasSSE2) && (defined(HAS_CPU_64) || !defined(__3dNOW__)) __m128i OP = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *) board), 0x4e); OP = _mm_xor_si128(OP, _mm_or_si128(_mm_set1_epi64x(move->flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x]))); @@ -681,22 +439,8 @@ void board_restore(Board *board, const Move *move) board->player = board->opponent ^ (move->flipped | X_TO_BIT[move->x]); board->opponent = P ^ move->flipped; #endif -<<<<<<< HEAD -======= - unsigned long long P = board->player; - board->player = board->opponent ^ (move->flipped | X_TO_BIT[move->x]); - board->opponent = P ^ move->flipped; ->>>>>>> 4b9f204 (minor optimize in search_eval_1/2 and search_shallow) - board_check(board); -} -<<<<<<< HEAD -======= -#endif // hasMMX ->>>>>>> 1dc032e (Improve visual c compatibility) -======= board_check(board); } ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) /** * @brief Passing move @@ -712,19 +456,7 @@ void board_pass(Board *board) board_check(board); } -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if (MOVE_GENERATOR != MOVE_GENERATOR_AVX) && (MOVE_GENERATOR != MOVE_GENERATOR_AVX512) && (MOVE_GENERATOR != MOVE_GENERATOR_SSE) && (MOVE_GENERATOR != MOVE_GENERATOR_NEON) // SSE version in board_sse.c -======= -#if !(defined(hasSSE2) && ((MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE))) // SSE version in endgame_sse.c ->>>>>>> 6506166 (More SSE optimizations) -======= -#if (MOVE_GENERATOR != MOVE_GENERATOR_AVX) && (MOVE_GENERATOR != MOVE_GENERATOR_SSE) && (MOVE_GENERATOR != MOVE_GENERATOR_NEON) // SSE version in board_sse.c ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= -#if (MOVE_GENERATOR != MOVE_GENERATOR_AVX) && (MOVE_GENERATOR != MOVE_GENERATOR_AVX512) && (MOVE_GENERATOR != MOVE_GENERATOR_SSE) && (MOVE_GENERATOR != MOVE_GENERATOR_NEON) // SSE version in board_sse.c ->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame) /** * @brief Compute a board resulting of a move played on a previous board. * @@ -743,53 +475,9 @@ unsigned long long board_next(const Board *board, const int x, Board *next) return flipped; } -<<<<<<< HEAD #endif -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if !defined(hasSSE2) && !defined(__ARM_NEON) // SSE version in board_sse.c -======= -/** - * @brief Compute a board resulting of an opponent move played on a previous board. - * - * Compute the board after passing and playing a move. - * - * @param board board to play the move on. - * @param x opponent move to play. - * @param next resulting board. - * @return flipped discs. - */ -unsigned long long board_pass_next(const Board *board, const int x, Board *next) -{ - const unsigned long long flipped = Flip(x, board->opponent, board->player); - - next->opponent = board->opponent ^ (flipped | x_to_bit(x)); - next->player = board->player ^ flipped; - - return flipped; -} -======= ->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c) -#endif - -<<<<<<< HEAD -<<<<<<< HEAD -#if !defined(__x86_64__) && !defined(_M_X64) && !defined(__AVX2__) ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= -#if !defined(__x86_64__) && !defined(_M_X64) && !defined(__AVX2__) // sse version in board_sse.c ->>>>>>> 6506166 (More SSE optimizations) -======= -#if !defined(hasSSE2) && !defined(hasNeon) // sse version in board_sse.c ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= -#if !defined(hasSSE2) && !defined(hasNeon) // SSE version in board_sse.c ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) -======= -#if !defined(hasSSE2) && !defined(__ARM_NEON) // SSE version in board_sse.c ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) /** * @brief Get a part of the moves. * @@ -869,46 +557,13 @@ static inline unsigned long long get_some_moves(const unsigned long long P, cons * @param O bitboard with opponent's discs. * @return all legal moves in a 64-bit unsigned integer. */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if !defined(__x86_64__) && !defined(_M_X64) ->>>>>>> 1dc032e (Improve visual c compatibility) -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) unsigned long long get_moves(const unsigned long long P, const unsigned long long O) { unsigned long long moves, OM; -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(DISPATCH_NEON) -======= - #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) ->>>>>>> 1dc032e (Improve visual c compatibility) if (hasSSE2) return get_moves_sse(P, O); -<<<<<<< HEAD - #endif - #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) - if (hasMMX) -======= -======= - #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(ANDROID) -======= - #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) || defined(DISPATCH_NEON) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) - if (hasSSE2) - return get_moves_sse(P, O); -<<<<<<< HEAD - #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) - else if (hasMMX) ->>>>>>> 0f2fb39 (Chage 32-bit get_moves_mmx/sse parameters to 64 bits) - return get_moves_mmx(P, O); -======= ->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line) #endif #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) if (hasMMX) @@ -923,15 +578,7 @@ unsigned long long get_moves(const unsigned long long P, const unsigned long lon return moves & ~(P|O); // mask with empties } -<<<<<<< HEAD -<<<<<<< HEAD -#endif // hasSSE2/__ARM_NEON -======= -#endif // hasSSE2/hasNeon ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= #endif // hasSSE2/__ARM_NEON ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) /** * @brief Get legal moves on a 6x6 board. @@ -944,19 +591,9 @@ unsigned long long get_moves(const unsigned long long P, const unsigned long lon */ unsigned long long get_moves_6x6(const unsigned long long P, const unsigned long long O) { -<<<<<<< HEAD -<<<<<<< HEAD - unsigned long long PM = P & 0x007E7E7E7E7E7E00; - unsigned long long OM = O & 0x007E7E7E7E7E7E00; - return get_moves(PM, OM) & 0x007E7E7E7E7E7E00; -======= - return get_moves(P & 0x007E7E7E7E7E7E00, O & 0x007E7E7E7E7E7E00) & 0x007E7E7E7E7E7E00; ->>>>>>> 6506166 (More SSE optimizations) -======= unsigned long long PM = P & 0x007E7E7E7E7E7E00; unsigned long long OM = O & 0x007E7E7E7E7E7E00; return get_moves(PM, OM) & 0x007E7E7E7E7E7E00; ->>>>>>> e22b052 (_mm_cvtsi64_si128 x86 sim using loadl, requires lvalue) } /** @@ -968,23 +605,7 @@ unsigned long long get_moves_6x6(const unsigned long long P, const unsigned long */ bool can_move(const unsigned long long P, const unsigned long long O) { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if defined(hasMMX) || defined(__ARM_NEON) -======= -#if defined(USE_GAS_MMX) || defined(__x86_64__) || defined(USE_MSVC_X86) ->>>>>>> 1dc032e (Improve visual c compatibility) -======= -#if defined(__x86_64__) || defined(_M_X64) || defined(hasMMX) ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= -#if defined(hasMMX) || defined(hasNeon) ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= -#if defined(hasMMX) || defined(__ARM_NEON) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) return get_moves(P, O) != 0; #else @@ -1024,27 +645,7 @@ int get_mobility(const unsigned long long P, const unsigned long long O) return bit_count(get_moves(P, O)); } -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -#ifndef __AVX2__ // AVX2 version in board_sse.c -======= -int get_weighted_mobility(const unsigned long long P, const unsigned long long O) -{ - return bit_weighted_count(get_moves(P, O)); -} - -<<<<<<< HEAD -#ifndef __AVX2__ ->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99) -======= ->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2) -======= -#ifndef __AVX2__ ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) -======= #ifndef __AVX2__ // AVX2 version in board_sse.c ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) /** * @brief Get some potential moves. * @@ -1074,44 +675,7 @@ unsigned long long get_potential_moves(const unsigned long long P, const unsigne | get_some_potential_moves(O & 0x007E7E7E7E7E7E00, 9)) & ~(P|O); // mask with empties } -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #endif // AVX2 -======= -======= -#endif // AVX2 ->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99) -======= ->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2) - - #if !(defined(hasSSE2) && !defined(POPCOUNT)) && !defined(hasNeon) -/** - * @brief Get potential mobility. - * - * Count the list of empty squares in contact of a player square. - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return a count of potential moves. - */ -int get_potential_mobility(const unsigned long long P, const unsigned long long O) -{ - #if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) - if (hasMMX) - return get_potential_mobility_mmx(P, O); - #endif - return bit_weighted_count(get_potential_moves(P, O)); -} -<<<<<<< HEAD ->>>>>>> 1dc032e (Improve visual c compatibility) -======= - #endif -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) -#endif // AVX2 ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) /** * @brief search stable edge patterns. @@ -1134,10 +698,6 @@ static int find_edge_stable(const int old_P, const int old_O, int stable) if (E & X) { // is x an empty square ? O = old_O; P = old_P | X; // player plays on it -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) if (X > 0x02) { // flip left discs (using parallel prefix) F = O & (X >> 1); F |= O & (F >> 1); @@ -1145,7 +705,6 @@ static int find_edge_stable(const int old_P, const int old_O, int stable) F |= O2 & (F >> 2); F |= O2 & (F >> 2); F &= -(P & (F >> 1)); -<<<<<<< HEAD O ^= F; P ^= F; } @@ -1154,45 +713,12 @@ static int find_edge_stable(const int old_P, const int old_O, int stable) F -= (X + X) & -(int)(F != 0); O ^= F; P ^= F; -======= - // if (X > 0x02) { // flip left discs (using parallel prefix) - F = O & (X >> 1); - F |= O & (F >> 1); - Y = O & (O >> 1); - F |= Y & (F >> 2); - F |= Y & (F >> 2); - F &= -(P & (F >> 1)); - O ^= F; - P ^= F; - // } - // if (X < 0x40) { // flip right discs (using carry propagation) - F = (O + X + X) & P; - if (F) { - F -= X + X; - O ^= F; - P ^= F; - } ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= - O ^= F; - P ^= F; - } - // if (X < 0x40) { // flip right discs (using carry propagation) - F = (O + X + X) & P; - F -= (X + X) & -(int)(F != 0); - O ^= F; - P ^= F; ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) // } stable = find_edge_stable(P, O, stable); // next move if (!stable) return stable; P = old_P; O = old_O | X; // opponent plays on it -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) if (X > 0x02) { // flip left discs (using parallel prefix) F = P & (X >> 1); F |= P & (F >> 1); @@ -1200,7 +726,6 @@ static int find_edge_stable(const int old_P, const int old_O, int stable) F |= O2 & (F >> 2); F |= O2 & (F >> 2); F &= -(O & (F >> 1)); -<<<<<<< HEAD O ^= F; P ^= F; } @@ -1209,35 +734,6 @@ static int find_edge_stable(const int old_P, const int old_O, int stable) F -= (X + X) & -(int)(F != 0); O ^= F; P ^= F; -======= - // if (X > 0x02) { // flip left discs (using parallel prefix) - F = P & (X >> 1); - F |= P & (F >> 1); - Y = P & (P >> 1); - F |= Y & (F >> 2); - F |= Y & (F >> 2); - F &= -(O & (F >> 1)); - O ^= F; - P ^= F; - // } - // if (X < 0x40) { // flip right discs (using carry propagation) - F = (P + X + X) & O; - if (F) { - F -= X + X; - O ^= F; - P ^= F; - } ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= - O ^= F; - P ^= F; - } - // if (X < 0x40) { // flip right discs (using carry propagation) - F = (P + X + X) & O; - F -= (X + X) & -(int)(F != 0); - O ^= F; - P ^= F; ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) // } stable = find_edge_stable(P, O, stable); // next move if (!stable) return stable; @@ -1248,15 +744,7 @@ static int find_edge_stable(const int old_P, const int old_O, int stable) } /** -<<<<<<< HEAD -<<<<<<< HEAD * @brief Initialize the edge stability table. -======= - * @brief Initialize the edge stability and A1_A8 tables. ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= - * @brief Initialize the edge stability table. ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) */ void edge_stability_init(void) { @@ -1271,46 +759,12 @@ void edge_stability_init(void) } else { rPO = horizontal_mirror_32(PO); if (PO > rPO) -<<<<<<< HEAD -<<<<<<< HEAD - edge_stability[PO] = mirror_byte(edge_stability[rPO]); -======= - edge_stability[PO] = horizontal_mirror_32(edge_stability[rPO]); ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= edge_stability[PO] = mirror_byte(edge_stability[rPO]); ->>>>>>> 0ee9c1c (mirror_byte added for 1 byte bit reverse) else edge_stability[PO] = find_edge_stable(P, O, P); } } // printf("edge_stability_init: %d\n", (int)(cpu_clock() - t)); -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= - -#if (defined(USE_GAS_MMX) || defined(USE_MSVC_X86)) && !defined(hasSSE2) - init_mmx(); -#endif ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= ->>>>>>> cb149ab (Faster flip_avx (ppfill) and variants added) -======= - - /* Q = 0; - for (P = 0; P < 256; ++P) { - A1_A8[P] = Q; - Q = ((Q | ~0x0101010101010101) + 1) & 0x0101010101010101; -<<<<<<< HEAD - } ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= - } */ ->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8) -======= ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) } #ifdef HAS_CPU_64 @@ -1321,180 +775,8 @@ void edge_stability_init(void) #define packH1H8(X) (((((unsigned int)((X) >> 32) & 0x80808080) + (((unsigned int)(X) & 0x80808080) >> 4)) * 0x00204081) >> 24) #endif -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -#if !defined(hasSSE2) && !defined(__ARM_NEON) -======= -#if !defined(__x86_64__) && !defined(_M_X64) -======= -#ifndef HAS_CPU_64 -======= -#ifndef __AVX2__ -<<<<<<< HEAD -#if !(defined(__aarch64__) || defined(_M_ARM64) || defined(hasSSE2)) ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= -#if !defined(hasNeon) && !defined(hasSSE2) ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) -======= ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) -======= -#if !defined(__AVX2__) && !defined(hasNeon) && !defined(hasSSE2) ->>>>>>> dc7c79c (Omit unpack from get_edge_stability) -======= -#if !defined(__AVX2__) && !defined(__ARM_NEON) && !defined(hasSSE2) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) -======= #if !defined(hasSSE2) && !defined(__ARM_NEON) ->>>>>>> cae8121 (minimax search_eval_1; feed moves to search_eval_1/2) -/** - * @brief Get stable edge. - * - * Compute the exact stable edges from precomputed tables. - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return a bitboard with (some of) player's stable discs. - * - */ -unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O) -{ // compute the exact stable edges (from precomputed tables) - return edge_stability[((unsigned int) P & 0xff) * 256 + ((unsigned int) O & 0xff)] - | (unsigned long long) edge_stability[(unsigned int) (P >> 56) * 256 + (unsigned int) (O >> 56)] << 56 - | unpackA2A7(edge_stability[packA1A8(P) * 256 + packA1A8(O)]) - | unpackH2H7(edge_stability[packH1H8(P) * 256 + packH1H8(O)]); -} - -/** - * @brief Estimate the stability of edges. - * - * Count the number (in fact a lower estimate) of stable discs on the edges. - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return the number of stable discs on the edges. - */ -int get_edge_stability(const unsigned long long P, const unsigned long long O) -{ - unsigned int packedstable = edge_stability[((unsigned int) P & 0xff) * 256 + ((unsigned int) O & 0xff)] - | edge_stability[(unsigned int) (P >> 56) * 256 + (unsigned int) (O >> 56)] << 8 - | edge_stability[packA1A8(P) * 256 + packA1A8(O)] << 16 - | edge_stability[packH1H8(P) * 256 + packH1H8(O)] << 24; - return bit_count_32(packedstable & 0xffff7e7e); -} -<<<<<<< HEAD -<<<<<<< HEAD -#endif -<<<<<<< HEAD -======= -#endif ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) - -#if !defined(HAS_CPU_64) && !(defined(ANDROID) && (defined(hasNeon) || defined(hasSSE2))) ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= -#if !defined(hasNeon) && !defined(hasSSE2) ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) -/** - * @brief Get full lines. - * - * @param disc all discs on the board. - * @param full all 1 if full line, otherwise all 0. - */ - -#if !defined(__ARM_NEON) && !defined(hasSSE2) && !defined(hasMMX) - #ifdef HAS_CPU_64 - -static unsigned long long get_full_lines_h(unsigned long long full) -{ - full &= full >> 1; - full &= full >> 2; - full &= full >> 4; - return (full & 0x0101010101010101) * 0xff; -} - -static unsigned long long get_full_lines_v(unsigned long long full) -{ - full &= (full >> 8) | (full << 56); // ror 8 - full &= (full >> 16) | (full << 48); // ror 16 - full &= (full >> 32) | (full << 32); // ror 32 - return full; -} - - #else - -static unsigned int get_full_lines_h_32(unsigned int full) -{ - full &= full >> 1; - full &= full >> 2; - full &= full >> 4; - return (full & 0x01010101) * 0xff; -} - -static unsigned long long get_full_lines_h(unsigned long long full) -{ - return ((unsigned long long) get_full_lines_h_32(full >> 32) << 32) | get_full_lines_h_32(full); -} - -static unsigned long long get_full_lines_v(unsigned long long full) -{ - unsigned int t = (unsigned int) full & (unsigned int)(full >> 32); - t &= (t >> 16) | (t << 16); // ror 16 - t &= (t >> 8) | (t << 24); // ror 8 - return t | ((unsigned long long) t << 32); -} - -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 1dc032e (Improve visual c compatibility) -======= -unsigned long long get_all_full_lines(const unsigned long long disc, V4DI *full) -======= -======= - #endif - -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) -void get_all_full_lines(const unsigned long long disc, unsigned long long full[5]) ->>>>>>> 4303b09 (Returns all full lines in full[4]) -======= -static void get_full_lines(const unsigned long long disc, unsigned long long full[4]) ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) -======= -void get_full_lines(const unsigned long long disc, unsigned long long full[4]) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) -{ - unsigned long long l7, l9, r7, r9; // full lines - - full[0] = get_full_lines_h(disc); - full[1] = get_full_lines_v(disc); - - l7 = r7 = disc; - l7 &= 0xff01010101010101 | (l7 >> 7); r7 &= 0x80808080808080ff | (r7 << 7); - l7 &= 0xffff030303030303 | (l7 >> 14); r7 &= 0xc0c0c0c0c0c0ffff | (r7 << 14); - l7 &= 0xffffffff0f0f0f0f | (l7 >> 28); r7 &= 0xf0f0f0f0ffffffff | (r7 << 28); - full[3] = l7 & r7; - - l9 = r9 = disc; - l9 &= 0xff80808080808080 | (l9 >> 9); r9 &= 0x01010101010101ff | (r9 << 9); - l9 &= 0xffffc0c0c0c0c0c0 | (l9 >> 18); r9 &= 0x030303030303ffff | (r9 << 18); - full[2] = l9 & r9 & (0x0f0f0f0ff0f0f0f0 | (l9 >> 36) | (r9 << 36)); -} -#endif // __ARM_NEON/hasSSE2/hasMMX - ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) /** -<<<<<<< HEAD * @brief Get stable edge. * * Compute the exact stable edges from precomputed tables. @@ -1504,11 +786,7 @@ void get_full_lines(const unsigned long long disc, unsigned long long full[4]) * @return a bitboard with (some of) player's stable discs. * */ -<<<<<<< HEAD unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O) -======= -static unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O) ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) { // compute the exact stable edges (from precomputed tables) return edge_stability[((unsigned int) P & 0xff) * 256 + ((unsigned int) O & 0xff)] | (unsigned long long) edge_stability[(unsigned int) (P >> 56) * 256 + (unsigned int) (O >> 56)] << 56 @@ -1517,88 +795,6 @@ static unsigned long long get_stable_edge(const unsigned long long P, const unsi } /** -<<<<<<< HEAD -======= -======= ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) - * @brief Estimate the stability. - * - * Count the number (in fact a lower estimate) of stable discs. - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return the number of stable discs. - */ -#ifndef __AVX2__ // AVX2 version in board_sse.c - #if !(defined(hasMMX) && !defined(hasSSE2)) // MMX version of get_stability in board_mmx.c - #if !(defined(hasSSE2) && !defined(HAS_CPU_64)) // 32bit SSE version in board_sse.c -// compute the other stable discs (ie discs touching another stable disc in each flipping direction). -int get_spreaded_stability(unsigned long long stable, unsigned long long P_central, unsigned long long full[4]) -{ - unsigned long long stable_h, stable_v, stable_d7, stable_d9, old_stable; - - if (stable == 0) // (2%) - return 0; - - do { - old_stable = stable; - stable_h = ((stable >> 1) | (stable << 1) | full[0]); - stable_v = ((stable >> 8) | (stable << 8) | full[1]); - stable_d9 = ((stable >> 9) | (stable << 9) | full[2]); - stable_d7 = ((stable >> 7) | (stable << 7) | full[3]); - stable |= (stable_h & stable_v & stable_d9 & stable_d7 & P_central); - } while (stable != old_stable); // (44%) - - return bit_count(stable); -} - #endif - -// returns stability count only -int get_stability(const unsigned long long P, const unsigned long long O) -{ - unsigned long long stable = get_stable_edge(P, O); // compute the exact stable edges - unsigned long long P_central = P & 0x007e7e7e7e7e7e00; - unsigned long long full[4]; - - get_full_lines(P | O, full); // add full lines - stable |= (P_central & full[0] & full[1] & full[2] & full[3]); - - return get_spreaded_stability(stable, P_central, full); // compute the other stable discs -} - -// returns all full in full[4] in addition to stability count -int get_stability_fulls(const unsigned long long P, const unsigned long long O, unsigned long long full[5]) -{ - unsigned long long stable = get_stable_edge(P, O); // compute the exact stable edges - unsigned long long P_central = P & 0x007e7e7e7e7e7e00; - - get_full_lines(P | O, full); // add full lines - full[4] = full[0] & full[1] & full[2] & full[3]; - stable |= (P_central & full[4]); - - return get_spreaded_stability(stable, P_central, full); // compute the other stable discs -} - #endif - -/** - * @brief Get intersection of full lines. - * - * Get intersection of full lines. - * - * @param disc bitboard with occupied discs. - * @return the intersection of full lines. - */ -unsigned long long get_all_full_lines(const unsigned long long disc) -{ - unsigned long long full[4]; - get_full_lines(disc, full); - return full[0] & full[1] & full[2] & full[3]; -} -#endif // __AVX2__ - -/** -<<<<<<< HEAD ->>>>>>> 1a7b0ed (flip_bmi2 added; bmi2 version of stability and corner_stability) * @brief Estimate the stability of edges. * * Count the number (in fact a lower estimate) of stable discs on the edges. @@ -1765,8 +961,6 @@ unsigned long long get_all_full_lines(const unsigned long long disc) #endif // __AVX2__ /** -======= ->>>>>>> dc7c79c (Omit unpack from get_edge_stability) * @brief Estimate corner stability. * * Count the number of stable discs around the corner. Limiting the count @@ -1779,32 +973,12 @@ unsigned long long get_all_full_lines(const unsigned long long disc) */ int get_corner_stability(const unsigned long long P) { -<<<<<<< HEAD -<<<<<<< HEAD -#ifdef POPCOUNT - // stable = (((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P; - unsigned int P2187 = (P >> 48) | (P << 16); // ror 48 - unsigned int stable = 0x00818100 & P2187; - stable |= ((((stable * 5) >> 1) & 0x00424200) | (stable << 8) | (stable >> 8)) & P2187; // 1-8 alias does not matter since corner is stable anyway - return bit_count_32(stable); -======= -#if 0 - - const unsigned long long stable = ((((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P); - return bit_count(stable); ->>>>>>> 6506166 (More SSE optimizations) -======= #ifdef POPCOUNT // stable = (((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P; unsigned int P2187 = (P >> 48) | (P << 16); // ror 48 unsigned int stable = 0x00818100 & P2187; stable |= ((((stable * 5) >> 1) & 0x00424200) | (stable << 8) | (stable >> 8)) & P2187; // 1-8 alias does not matter since corner is stable anyway return bit_count_32(stable); -<<<<<<< HEAD - #endif ->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup) -======= ->>>>>>> 9078deb (new get_corner_stability for both 64&32 bit) #else // kindergarten static const char n_stable_h2a2h1g1b1a1[64] = { @@ -1814,20 +988,7 @@ int get_corner_stability(const unsigned long long P) 0, 2, 0, 3, 0, 2, 0, 3, 2, 4, 2, 5, 3, 5, 3, 6 }; -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - #if 0 // defined(__BMI2__) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__) // BMI2 CPU has POPCOUNT -======= - #if 0 // defined(__BMI2__) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__) // kindergarten for generic modern build ->>>>>>> 867c81c (Omit restore board/parity in search_shallow; tweak NWS_STABILITY) -======= #if 0 // defined(__BMI2__) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__) // BMI2 CPU has POPCOUNT ->>>>>>> 9078deb (new get_corner_stability for both 64&32 bit) int cnt = n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) vertical_mirror(P), 0x000081c3)] + n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) P, 0x000081c3)]; @@ -1843,51 +1004,6 @@ int get_corner_stability(const unsigned long long P) + n_stable_h2a2h1g1b1a1[(((unsigned int) P & 0x000081c3) * 0x04410000) >> 26]; #endif // assert(cnt == bit_count((((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P)); -======= -#if defined(__BMI2__) && defined(__x86_64__) -======= -#if 0 // defined(__BMI2__) && defined(__x86_64__) // pext is slow on AMD -<<<<<<< HEAD ->>>>>>> f24cc06 (avoid BMI2 for AMD; more lzcnt/tzcnt in count_last_flip_bitscan) - int cnt = n_stable_h8g8b8a8h7a7[_pext_u64(P, 0xc381000000000000ULL)] - + n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) P, 0x000081c3U)]; -======= -======= -#ifdef USEPEXT // defined(__BMI2__) && defined(__x86_64__) && !defined(AMD_BEFORE_ZEN3) // kindergarten for generic modern build ->>>>>>> 6f4eb2e (VPGATHERDD accumlate_eval) -======= -#ifdef USEPEXT // defined(__BMI2__) && defined(__x86_64__) && !defined(AMD_BEFORE_ZEN3) // kindergarten for generic modern build ->>>>>>> bbc1ddf (VPGATHERDD accumlate_eval) - int cnt = n_stable_h8g8b8a8h7a7[_pext_u64(P, 0xc381000000000000)] - + n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) P, 0x000081c3)]; ->>>>>>> 6506166 (More SSE optimizations) -#else - int cnt = n_stable_h8g8b8a8h7a7[(((unsigned int) (P >> 32) & 0xc3810000) * 0x00000411) >> 26] - + n_stable_h2a2h1g1b1a1[(((unsigned int) P & 0x000081c3) * 0x04410000) >> 26]; -#endif -<<<<<<< HEAD - // assert(cnt == bit_count((((0x0100000000000001ULL & P) << 1) | ((0x8000000000000080ULL & P) >> 1) | ((0x0000000000000081ULL & P) << 8) | ((0x8100000000000000ULL & P) >> 8) | 0x8100000000000081ULL) & P)); ->>>>>>> 1a7b0ed (flip_bmi2 added; bmi2 version of stability and corner_stability) -======= -======= - #if 0 // defined(__BMI2__) && !defined(AMD_BEFORE_ZEN3) // kindergarten for generic modern build - int cnt = n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) vertical_mirror(P), 0x000081c3)] - + n_stable_h2a2h1g1b1a1[_pext_u32((unsigned int) P, 0x000081c3)]; - - #else - static const char n_stable_h8g8b8a8h7a7[64] = { - 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 0, 2, 3, 2, 3, - 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 0, 2, 3, 2, 3, - 1, 1, 2, 2, 2, 3, 3, 4, 1, 1, 2, 2, 3, 4, 4, 5, - 2, 2, 3, 3, 3, 4, 4, 5, 2, 2, 3, 3, 4, 5, 5, 6 - }; - - int cnt = n_stable_h8g8b8a8h7a7[(((unsigned int) (P >> 32) & 0xc3810000) * 0x00000411) >> 26] - + n_stable_h2a2h1g1b1a1[(((unsigned int) P & 0x000081c3) * 0x04410000) >> 26]; - #endif ->>>>>>> 11a54a6 (Revise get_corner_stability and hash_cleanup) - // assert(cnt == bit_count((((0x0100000000000001 & P) << 1) | ((0x8000000000000080 & P) >> 1) | ((0x0000000000000081 & P) << 8) | ((0x8100000000000000 & P) >> 8) | 0x8100000000000081) & P)); ->>>>>>> 6506166 (More SSE optimizations) return cnt; #endif @@ -1901,49 +1017,8 @@ int get_corner_stability(const unsigned long long P) */ unsigned long long board_get_hash_code(const Board *board) { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - unsigned long long crc = crc32c_u64(0, board->player); - return (crc << 32) | crc32c_u64(crc, board->opponent); -======= - const unsigned char *p = (const unsigned char*)board; -======= - const unsigned char *const p = (const unsigned char*)board; ->>>>>>> 0a166fd (Remove 1 element array coding style) - unsigned long long h1, h2; - -#if defined(USE_GAS_MMX) && defined(__3dNOW__) // Faster on AMD but not suitable for CPU with slow emms - if (hasMMX) - return board_get_hash_code_mmx(p); -#elif defined(USE_GAS_MMX) || defined(USE_MSVC_X86) // || defined(__x86_64__) - if (hasSSE2) - return board_get_hash_code_sse(p); -#endif - - h1 = hash_rank[0][p[0]]; h2 = hash_rank[1][p[1]]; - h1 ^= hash_rank[2][p[2]]; h2 ^= hash_rank[3][p[3]]; - h1 ^= hash_rank[4][p[4]]; h2 ^= hash_rank[5][p[5]]; - h1 ^= hash_rank[6][p[6]]; h2 ^= hash_rank[7][p[7]]; - h1 ^= hash_rank[8][p[8]]; h2 ^= hash_rank[9][p[9]]; - h1 ^= hash_rank[10][p[10]]; h2 ^= hash_rank[11][p[11]]; - h1 ^= hash_rank[12][p[12]]; h2 ^= hash_rank[13][p[13]]; - h1 ^= hash_rank[14][p[14]]; h2 ^= hash_rank[15][p[15]]; - - // assert((h1 ^ h2) == board_get_hash_code_sse(p)); - - return h1 ^ h2; ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= - unsigned long long crc; - - crc = crc32c_u64(0, board->player); -======= unsigned long long crc = crc32c_u64(0, board->player); ->>>>>>> 0b8fa13 (More HBOARD hash functions) return (crc << 32) | crc32c_u64(crc, board->opponent); ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) } /** @@ -2022,28 +1097,8 @@ void board_print(const Board *board, const int player, FILE *f) { int i, j, square; unsigned long long bk, wh; -<<<<<<< HEAD -<<<<<<< HEAD const char color[5] = "?*O-."; unsigned long long moves = board_get_moves(board); -<<<<<<< HEAD - - if (player == BLACK) { - bk = board->player; - wh = board->opponent; - } else { - bk = board->opponent; - wh = board->player; - } -======= - const char *color = "?*O-." + 1; -======= - const char color[5] = "?*O-."; ->>>>>>> bc93772 (Avoid modern compliler warnings) - unsigned long long moves = get_moves(board->player, board->opponent); ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) -======= ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) if (player == BLACK) { bk = board->player; @@ -2061,15 +1116,7 @@ void board_print(const Board *board, const int player, FILE *f) square = 2 - (wh & 1) - 2 * (bk & 1); if ((square == EMPTY) && (moves & 1)) square = EMPTY + 1; -<<<<<<< HEAD -<<<<<<< HEAD - fputc(color[square + 1], f); -======= - fputc(color[square], f); ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) -======= fputc(color[square + 1], f); ->>>>>>> bc93772 (Avoid modern compliler warnings) fputc(' ', f); bk >>= 1; wh >>= 1; diff --git a/src/board.h b/src/board.h index 2b1cf85..2da07db 100644 --- a/src/board.h +++ b/src/board.h @@ -3,23 +3,7 @@ * * Board management header file. * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2024 -======= - * @date 1998 - 2021 ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= - * @date 1998 - 2022 ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - * @date 1998 - 2023 ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) -======= - * @date 1998 - 2024 ->>>>>>> a26ed17 (Add flip-sve-lzcnt.c for arm SVE build) * @author Richard Delorme * @version 4.5 */ @@ -44,45 +28,16 @@ void board_init(Board*); int board_set(Board*, const char*); int board_from_FEN(Board*, const char*); bool board_lesser(const Board*, const Board*); -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD void board_horizontal_mirror(const Board *, Board *); void board_vertical_mirror(const Board *, Board *); void board_transpose(const Board *, Board *); -======= -bool board_equal(const Board*, const Board*); ->>>>>>> 8a7e354 (Exclude hash init time from count games; other minor size opts) -======= ->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code) -======= -void board_horizontal_mirror(const Board *, Board *); -void board_vertical_mirror(const Board *, Board *); -void board_transpose(const Board *, Board *); ->>>>>>> 6bc747d (Split board_flip_* from board_symetry) void board_symetry(const Board*, const int, Board*); int board_unique(const Board*, Board*); void board_check(const Board*); void board_rand(Board*, int, struct Random*); // Compare two board for equality -<<<<<<< HEAD -<<<<<<< HEAD -#define board_equal(b1,b2) ((b1)->player == (b2)->player && (b1)->opponent == (b2)->opponent) -======= -#ifdef __AVX2__ -inline bool board_equal(const Board *b1, const Board *b2) -{ - __m128i b = _mm_xor_si128(_mm_loadu_si128((__m128i *) b1), _mm_loadu_si128((__m128i *) b2)); - return _mm_testz_si128(b, b); -} -#else #define board_equal(b1,b2) ((b1)->player == (b2)->player && (b1)->opponent == (b2)->opponent) -#endif ->>>>>>> de58f52 (AVX2 board_equal; delayed hash lock code) -======= -#define board_equal(b1,b2) ((b1)->player == (b2)->player && (b1)->opponent == (b2)->opponent) ->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM) int board_count_last_flips(const Board*, const int); unsigned long long board_get_move_flip(const Board*, const int, struct Move*); @@ -91,9 +46,6 @@ void board_swap_players(Board*); void board_update(Board*, const struct Move*); void board_restore(Board*, const struct Move*); void board_pass(Board*); -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD bool can_move(const unsigned long long, const unsigned long long); unsigned long long get_moves_6x6(const unsigned long long, const unsigned long long); @@ -101,63 +53,6 @@ bool can_move_6x6(const unsigned long long, const unsigned long long); int get_mobility(const unsigned long long, const unsigned long long); #ifdef __AVX2__ __m128i vectorcall get_moves_and_potential(__m256i, __m256i); -<<<<<<< HEAD -#else - unsigned long long get_potential_moves(const unsigned long long, const unsigned long long); -#endif - -void edge_stability_init(void); -unsigned long long get_stable_edge(const unsigned long long, const unsigned long long); -#ifndef __AVX2__ // public for android dispatch - void get_full_lines(const unsigned long long, unsigned long long [4]); - #if !(defined(hasMMX) && !defined(hasSSE2)) - int get_spreaded_stability(unsigned long long, unsigned long long, unsigned long long [4]); - #endif -#endif -unsigned long long get_all_full_lines(const unsigned long long); -int get_stability(const unsigned long long, const unsigned long long); -int get_stability_fulls(const unsigned long long, const unsigned long long, unsigned long long [5]); -int get_edge_stability(const unsigned long long, const unsigned long long); -int get_corner_stability(const unsigned long long); -======= -unsigned long long board_next(const Board*, const int, Board*); ->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c) -======= ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) -unsigned long long board_get_hash_code(const Board*); -int board_get_square_color(const Board*, const int); -bool board_is_occupied(const Board*, const int); -void board_print(const Board*, const int, FILE*); -char* board_to_string(const Board*, const int, char *); -void board_print_FEN(const Board*, const int, FILE*); -char* board_to_FEN(const Board*, const int, char*); -bool board_is_pass(const Board*); -bool board_is_game_over(const Board*); -int board_count_empties(const Board *board); -#if defined(USE_GAS_MMX) || defined(USE_MSVC_X86) - void init_mmx (void); - unsigned long long get_moves_mmx(const unsigned long long, const unsigned long long); - unsigned long long get_moves_sse(const unsigned long long, const unsigned long long); -======= ->>>>>>> 0b8fa13 (More HBOARD hash functions) - -<<<<<<< HEAD -<<<<<<< HEAD -#elif defined(ANDROID) && !defined(__ARM_NEON) && !defined(hasSSE2) - void init_neon (void); - unsigned long long get_moves_sse(unsigned long long, unsigned long long); -======= -unsigned long long get_moves(const unsigned long long, const unsigned long long); -======= ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) -bool can_move(const unsigned long long, const unsigned long long); -unsigned long long get_moves_6x6(const unsigned long long, const unsigned long long); -bool can_move_6x6(const unsigned long long, const unsigned long long); -int get_mobility(const unsigned long long, const unsigned long long); -#ifdef __AVX2__ -__m128i vectorcall get_moves_and_potential(__m256i, __m256i); -======= ->>>>>>> 0835dae (Reformat #if's) #else unsigned long long get_potential_moves(const unsigned long long, const unsigned long long); #endif @@ -195,123 +90,28 @@ int board_count_empties(const Board *board); unsigned long long get_moves_sse(unsigned long long, unsigned long long); #endif -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -#if defined(USE_GAS_MMX) && defined(__3dNOW__) -unsigned long long board_get_hash_code_mmx(const unsigned char *p); -#elif defined(USE_GAS_MMX) || defined(USE_MSVC_X86) -unsigned long long board_get_hash_code_sse(const unsigned char *p); ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -#endif - -======= ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= -#ifdef __AVX2__ -__m128i vectorcall get_moves_and_potential(__m256i, __m256i); -#endif - ->>>>>>> 6a997c5 (new get_moves_and_potential for AVX2) -======= ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) extern unsigned char edge_stability[256 * 256]; -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD // a1/a8/h1/h8 are already stable in horizontal line, so omit them in vertical line to ease kindergarten for CPU_64 #if 0 // defined(__BMI2__) && defined(HAS_CPU_64) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__) // pdep is slow on AMD before Zen3 #define unpackA2A7(x) _pdep_u64((x), 0x0101010101010101) #define unpackH2H7(x) _pdep_u64((x), 0x8080808080808080) -======= -/* Define function attributes directive when available */ -#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__) - #define REGPARM __attribute__((regparm(2))) ->>>>>>> e558fdb (Some cleanups for clang / android build) #else #define unpackA2A7(x) ((((x) & 0x7e) * 0x0000040810204080) & 0x0001010101010100) #define unpackH2H7(x) ((((x) & 0x7e) * 0x0002040810204000) & 0x0080808080808000) #endif -<<<<<<< HEAD #if (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_CARRY) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_KINDERGARTEN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BITSCAN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_32) extern int (*count_last_flip[BOARD_SIZE + 1])(const unsigned long long); -======= -#if ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_PLAIN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2)) - extern int last_flip(int pos, unsigned long long P); -#else - #if LAST_FLIP_COUNTER == COUNT_LAST_FLIP_32 - extern int (REGPARM *count_last_flip[BOARD_SIZE + 1])(const unsigned long long); - #else - extern int (*count_last_flip[BOARD_SIZE + 1])(const unsigned long long); - #endif ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= -#if ((LAST_FLIP_COUNTER == COUNT_LAST_FLIP_PLAIN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2)) -======= -extern unsigned long long A1_A8[256]; -======= ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) - -// a1/a8/h1/h8 are already stable in horizontal line, so omit them in vertical line to ease kindergarten for CPU_64 -#if 0 // defined(__BMI2__) && defined(HAS_CPU_64) && !defined(__bdver4__) && !defined(__znver1__) && !defined(__znver2__) // pdep is slow on AMD before Zen3 - #define unpackA2A7(x) _pdep_u64((x), 0x0101010101010101) - #define unpackH2H7(x) _pdep_u64((x), 0x8080808080808080) -#else - #define unpackA2A7(x) ((((x) & 0x7e) * 0x0000040810204080) & 0x0001010101010100) - #define unpackH2H7(x) ((((x) & 0x7e) * 0x0002040810204000) & 0x0080808080808000) -#endif - -<<<<<<< HEAD -#if (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_PLAIN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_SSE) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BMI2) ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) - extern int last_flip(int pos, unsigned long long P); -#else -======= -#if (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_CARRY) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_KINDERGARTEN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_BITSCAN) || (LAST_FLIP_COUNTER == COUNT_LAST_FLIP_32) ->>>>>>> 52949e1 (Add build options and files for new count_last_flips) - extern int (*count_last_flip[BOARD_SIZE + 1])(const unsigned long long); ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) #define last_flip(x,P) count_last_flip[x](P) #else extern int last_flip(int pos, unsigned long long P); #endif -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) -<<<<<<< HEAD -<<<<<<< HEAD - extern const V4DI lmask_v4[66], rmask_v4[66]; -<<<<<<< HEAD extern __m128i vectorcall mm_Flip(const __m128i OP, int pos); -<<<<<<< HEAD inline __m128i vectorcall reduce_vflip(__m128i flip) { return _mm_or_si128(flip, _mm_shuffle_epi32(flip, 0x4e)); } #define Flip(x,P,O) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_set_epi64x((O), (P)), (x))))) -======= - extern __m256i vectorcall mm_Flip(const __m128i OP, int pos); - inline __m128i vectorcall reduce_vflip(__m256i flip4) { - __m128i flip2 = _mm_or_si128(_mm256_castsi256_si128(flip4), _mm256_extracti128_si256(flip4, 1)); - return _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e)); // SWAP64 - } -======= - extern __m128i vectorcall mm_Flip(const __m128i OP, int pos); - inline __m128i vectorcall reduce_vflip(__m128i flip) { return _mm_or_si128(flip, _mm_shuffle_epi32(flip, 0x4e)); } -<<<<<<< HEAD ->>>>>>> 4b387c1 (Revert AVX Flip results to __m128i, keeping reduce_vflip partially) - #ifdef HAS_CPU_64 - #define Flip(x,P,O) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_insert_epi64(_mm_cvtsi64_si128(P), (O), 1), (x))))) - #else - #define Flip(x,P,O) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_insert_epi32(_mm_insert_epi32(_mm_insert_epi32(\ - _mm_cvtsi32_si128(P), ((P) >> 32), 1), (O), 2), (O >> 32), 3), (x))))) - #endif ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) -======= - #define Flip(x,P,O) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_set_epi64x((O), (P)), (x))))) ->>>>>>> c228033 (Replace mm_flip OP param unpack with _mm_set_epi64x) #define board_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_loadu_si128((__m128i *) (board)), (x))))) #define vboard_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip((board).v2, (x))))) @@ -347,149 +147,6 @@ extern unsigned long long A1_A8[256]; extern void init_flip_sse(void); #endif -======= -#if (MOVE_GENERATOR == MOVE_GENERATOR_SSE_BSWAP) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX) - extern unsigned long long Flip(int, const unsigned long long, const unsigned long long); -======= -#if MOVE_GENERATOR == MOVE_GENERATOR_AVX -======= -#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) ->>>>>>> 393b667 (Experimental AVX512VL/CD version of move generator) - extern __m128i vectorcall mm_Flip(const __m128i OP, int pos); - #define Flip(x,P,O) ((unsigned long long) _mm_cvtsi128_si64(mm_Flip(_mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O)), (x)))) -======= -======= - extern __m256i vectorcall mm_Flip(const __m128i OP, int pos); - inline __m128i vectorcall reduce_vflip(__m256i flip4) { - __m128i flip2 = _mm_or_si128(_mm256_castsi256_si128(flip4), _mm256_extracti128_si256(flip4, 1)); - return _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e)); // SWAP64 - } ->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c) - #ifdef HAS_CPU_64 - #define Flip(x,P,O) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_insert_epi64(_mm_cvtsi64_si128(P), (O), 1), (x))))) - #else - #define Flip(x,P,O) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_insert_epi32(_mm_insert_epi32(_mm_insert_epi32(\ - _mm_cvtsi32_si128(P), ((P) >> 32), 1), (O), 2), (O >> 32), 3), (x))))) - #endif -<<<<<<< HEAD ->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99) - #define board_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(mm_Flip(_mm_loadu_si128((__m128i *) (board)), (x)))) - #define vboard_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(mm_Flip((board), (x)))) -======= - #define board_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip(_mm_loadu_si128((__m128i *) (board)), (x))))) -<<<<<<< HEAD - #define vboard_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip((board), (x))))) ->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c) -======= - #define vboard_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(reduce_vflip(mm_Flip((board).v2, (x))))) ->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM) - -#elif MOVE_GENERATOR == MOVE_GENERATOR_SSE - extern __m128i (vectorcall *mm_flip[BOARD_SIZE + 2])(const __m128i); - #define Flip(x,P,O) ((unsigned long long) _mm_cvtsi128_si64(mm_flip[x](_mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O))))) - #define mm_Flip(OP,x) mm_flip[x](OP) - #define reduce_vflip(x) (x) - #define board_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(mm_flip[x](_mm_loadu_si128((__m128i *) (board))))) - #define vboard_flip(board,x) ((unsigned long long) _mm_cvtsi128_si64(mm_flip[x]((board).v2))) - -<<<<<<< HEAD -<<<<<<< HEAD -#elif MOVE_GENERATOR == MOVE_GENERATOR_SSE_BSWAP - extern unsigned long long flip(int, const unsigned long long, const unsigned long long); - #define Flip(x,P,O) flip((x), (P), (O)) - #define board_flip(board,x) flip((x), (board)->player, (board)->opponent) - ->>>>>>> 6506166 (More SSE optimizations) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -======= -#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON - extern uint64x2_t mm_Flip(uint64x2_t OP, int pos); - #define Flip(x,P,O) vgetq_lane_u64(mm_Flip(vcombine_u64(vcreate_u64(P), vcreate_u64(O)), (x)), 0) - #define board_flip(board,x) vgetq_lane_u64(mm_Flip(vld1q_u64((uint64_t *) (board)), (x)), 0) - #define vboard_flip(board,x) vgetq_lane_u64(mm_Flip((board).v2, (x)), 0) - ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -#elif MOVE_GENERATOR == MOVE_GENERATOR_32 - extern unsigned long long (*flip[BOARD_SIZE + 2])(unsigned int, unsigned int, unsigned int, unsigned int); - #define Flip(x,P,O) flip[x]((unsigned int)(P), (unsigned int)((P) >> 32), (unsigned int)(O), (unsigned int)((O) >> 32)) - #ifdef __BIG_ENDIAN__ - #define board_flip(board,x) flip[x]((unsigned int)((board)->player), ((unsigned int *) &(board)->player)[0], (unsigned int)((board)->opponent), ((unsigned int *) &(board)->opponent)[0]) - #else - #define board_flip(board,x) flip[x]((unsigned int)((board)->player), ((unsigned int *) &(board)->player)[1], (unsigned int)((board)->opponent), ((unsigned int *) &(board)->opponent)[1]) - #endif -<<<<<<< HEAD - #if defined(USE_GAS_MMX) && !defined(hasSSE2) - extern void init_flip_sse(void); - #endif -<<<<<<< HEAD ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= -======= - #if defined(USE_GAS_MMX) && !defined(hasSSE2) - extern void init_flip_sse(void); - #endif ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) - -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 6506166 (More SSE optimizations) -======= -#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON - extern unsigned long long Flip(int , unsigned long long, unsigned long long); - ->>>>>>> f2da03e (Refine arm builds adding neon support.) -#else - #if MOVE_GENERATOR == MOVE_GENERATOR_SSE_BSWAP - extern unsigned long long Flip(int, unsigned long long, unsigned long long); - #else - extern unsigned long long (*flip[BOARD_SIZE + 2])(const unsigned long long, const unsigned long long); - #define Flip(x,P,O) flip[x]((P), (O)) -<<<<<<< HEAD - #endif - - #define board_flip(board,x) Flip((x), (board)->player, (board)->opponent) -#endif - -#ifndef vboard_flip - #define vboard_flip(vboard,x) board_flip(&(vboard).board, (x)) -#endif - -// Use backup copy of search->board in a vector register if available (assume *pboard == vboard on entry) -#ifdef hasSSE2 - #define vboard_update(pboard,vboard,move) _mm_storeu_si128((__m128i *) (pboard), _mm_shuffle_epi32(_mm_xor_si128((vboard).v2, _mm_or_si128(_mm_set1_epi64x((move)->flipped), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x]))), 0x4e)) -#else - #define vboard_update(pboard,vboard,move) board_update((pboard), (move)) -#endif - -// Pass Board in a vector register to Flip -#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE) - unsigned long long vectorcall board_next_sse(__m128i OP, const int x, Board *next); - #define board_next(board,x,next) board_next_sse(_mm_loadu_si128((__m128i *) (board)), (x), (next)) - #define vboard_next(vboard,x,next) board_next_sse((vboard).v2, (x), (next)) -#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON - unsigned long long board_next_neon(uint64x2_t OP, const int x, Board *next); - #define board_next(board,x,next) board_next_neon(vld1q_u64((uint64_t *) (board)), (x), (next)) - #define vboard_next(vboard,x,next) board_next_neon((vboard).v2, (x), (next)) -#else - unsigned long long board_next(const Board *board, const int x, Board *next); - #define vboard_next(vboard,x,next) board_next(&(vboard).board, (x), (next)) -#endif - -// Pass vboard to get_moves if vectorcall available, otherwise board -#if defined(__AVX2__) && (defined(_MSC_VER) || defined(__linux__)) - unsigned long long vectorcall get_moves_avx(__m256i PP, __m256i OO); - #define get_moves(P,O) get_moves_avx(_mm256_set1_epi64x(P), _mm256_set1_epi64x(O)) - #define board_get_moves(board) get_moves_avx(_mm256_set1_epi64x((board)->player), _mm256_set1_epi64x((board)->opponent)) - #define vboard_get_moves(vboard) get_moves_avx(_mm256_broadcastq_epi64((vboard).v2), _mm256_broadcastq_epi64(_mm_unpackhi_epi64((vboard).v2, (vboard).v2))) -#else - unsigned long long get_moves(const unsigned long long, const unsigned long long); - #define board_get_moves(board) get_moves((board)->player, (board)->opponent) - #define vboard_get_moves(vboard) get_moves((vboard).board.player, (vboard).board.opponent) -======= - #define board_flip(board,x) flip[x]((board)->player, (board)->opponent) ->>>>>>> 6506166 (More SSE optimizations) -======= #else #if MOVE_GENERATOR == MOVE_GENERATOR_SSE_BSWAP extern unsigned long long Flip(int, unsigned long long, unsigned long long); @@ -499,7 +156,6 @@ extern unsigned long long A1_A8[256]; #endif #define board_flip(board,x) Flip((x), (board)->player, (board)->opponent) ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) #endif #ifndef vboard_flip diff --git a/src/board_mmx.c b/src/board_mmx.c index e4cccd0..a9dff19 100644 --- a/src/board_mmx.c +++ b/src/board_mmx.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file board_mmx.c * @@ -10,28 +6,9 @@ * If both hasMMX and hasSSE2 are undefined, dynamic dispatching code * will be generated. (This setting requires VC or GCC 4.4+) * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) * @date 2014 - 2023 * @author Toshihiko Okuhara * @version 4.5 -======= - * @date 2014 - 2020 - * @author Toshihiko Okuhara - * @version 4.4 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - * @date 2014 - 2021 -======= - * @date 2014 - 2022 ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) - * @author Toshihiko Okuhara - * @version 4.5 ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) */ #include "bit.h" @@ -39,27 +16,11 @@ #include "board.h" #include "move.h" -<<<<<<< HEAD -<<<<<<< HEAD #ifdef USE_GAS_MMX #ifndef hasMMX #pragma GCC push_options #pragma GCC target ("mmx") #endif -======= -#if !defined(hasSSE2) && defined(USE_GAS_MMX) -#ifndef hasMMX - #pragma GCC push_options - #pragma GCC target ("mmx") -#endif ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -#ifdef USE_GAS_MMX - #ifndef hasMMX - #pragma GCC push_options - #pragma GCC target ("mmx") - #endif ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) #include #endif @@ -70,14 +31,6 @@ static const unsigned long long mask_33 = 0x3333333333333333ULL; static const unsigned long long mask_0F = 0x0f0f0f0f0f0f0f0fULL; #endif -<<<<<<< HEAD -<<<<<<< HEAD -======= -#ifndef hasSSE2 - ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) #ifndef hasMMX bool hasMMX = false; #endif @@ -140,123 +93,6 @@ void init_mmx (void) init_flip_sse(); #endif } -<<<<<<< HEAD -<<<<<<< HEAD -======= -#endif // hasSSE2 - -#ifdef hasMMX -/** - * @brief Update a board. - * - * Update a board by flipping its discs and updating every other data, - * according to the 'move' description. - * - * @param board the board to modify - * @param move A Move structure describing the modification. - */ -#if defined(hasSSE2) && !defined(__3dNOW__) // Faster on CPU with slow emms - -void board_update(Board *board, const Move *move) -{ - __m128i F = _mm_loadl_epi64((__m128i *) &move->flipped); - __m128i OP = _mm_loadu_si128((__m128i *) board); - OP = _mm_xor_si128(OP, _mm_or_si128(_mm_unpacklo_epi64(F, F), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x]))); - _mm_storeu_si128((__m128i *) board, _mm_shuffle_epi32(OP, 0x4e)); - board_check(board); -} - -#elif defined(USE_MSVC_X86) - -void board_update(Board *board, const Move *move) -{ - __m64 F = *(__m64 *) &move->flipped; - __m64 P = _m_pxor(*(__m64 *) &board->player, _m_por(F, *(__m64 *) &X_TO_BIT[move->x])); - __m64 O = _m_pxor(*(__m64 *) &board->opponent, F); - *(__m64 *) &board->player = O; - *(__m64 *) &board->opponent = P; - _mm_empty(); - board_check(board); -} - -#else - -void board_update(Board *board, const Move *move) -{ - __asm__ ( - "movq %2, %%mm1\n\t" - "movq %3, %%mm0\n\t" - "por %%mm1, %%mm0\n\t" - "pxor %0, %%mm0\n\t" - "pxor %1, %%mm1\n\t" - "movq %%mm0, %1\n\t" - "movq %%mm1, %0\n\t" - "emms" - : "=m" (board->player), "=m" (board->opponent) - : "m" (move->flipped), "m" (X_TO_BIT[move->x]) - : "mm0", "mm1"); - board_check(board); -} - -#endif - -/** - * @brief Restore a board. - * - * Restore a board by un-flipping its discs and restoring every other data, - * according to the 'move' description, in order to cancel a board_update_move. - * - * @param board board to restore. - * @param move a Move structure describing the modification. - */ -#if defined(hasSSE2) && !defined(__3dNOW__) - -void board_restore(Board *board, const Move *move) -{ - __m128i F = _mm_loadl_epi64((__m128i *) &move->flipped); - __m128i OP = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *) board), 0x4e); - OP = _mm_xor_si128(OP, _mm_or_si128(_mm_unpacklo_epi64(F, F), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x]))); - _mm_storeu_si128((__m128i *) board, OP); - board_check(board); -} - -#elif defined(USE_MSVC_X86) - -void board_restore(Board *board, const Move *move) -{ - __m64 F = *(__m64 *) &move->flipped; - __m64 P = *(__m64 *) &board->opponent; - __m64 O = *(__m64 *) &board->player; - *(__m64 *) &board->player = _m_pxor(P, _m_por(F, *(__m64 *) &X_TO_BIT[move->x])); - *(__m64 *) &board->opponent = _m_pxor(O, F); - _mm_empty(); - board_check(board); -} - -#else - -void board_restore(Board *board, const Move *move) -{ - __asm__ ( - "movq %2, %%mm1\n\t" - "movq %3, %%mm0\n\t" - "por %%mm1, %%mm0\n\t" - "pxor %1, %%mm0\n\t" - "pxor %0, %%mm1\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm1, %1\n\t" - "emms" - : "=m" (board->player), "=m" (board->opponent) - : "m" (move->flipped), "m" (X_TO_BIT[move->x]) - : "mm0", "mm1"); - board_check(board); -} - -#endif -#endif // hasMMX ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) /** * @brief MMX translation of get_moves @@ -266,31 +102,13 @@ void board_restore(Board *board, const Move *move) */ #ifdef USE_MSVC_X86 -<<<<<<< HEAD -<<<<<<< HEAD unsigned long long get_moves_mmx(const unsigned long long P_, const unsigned long long O_) -======= -unsigned long long get_moves_mmx(unsigned long long P_, unsigned long long O_) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -unsigned long long get_moves_mmx(const unsigned long long P_, const unsigned long long O_) ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) { unsigned int movesL, movesH, mO1, flip1, pre1; __m64 P, O, M, mO, flip, pre; -<<<<<<< HEAD -<<<<<<< HEAD - P = _m_punpckldq(_m_from_int(P_), _m_from_int(P_ >> 32)); - O = _m_punpckldq(_m_from_int(O_), _m_from_int(O_ >> 32)); mO1 = (unsigned int) O_ & 0x7e7e7e7e; -======= - P = *(__m64 *) &P_; - O = *(__m64 *) &O_; mO1 = (unsigned int) O_ & 0x7e7e7e7e; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= P = _m_punpckldq(_m_from_int(P_), _m_from_int(P_ >> 32)); O = _m_punpckldq(_m_from_int(O_), _m_from_int(O_ >> 32)); mO1 = (unsigned int) O_ & 0x7e7e7e7e; ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) /* shift = +8 */ /* shift = +1 */ flip = _m_pand(O, _m_psllqi(P, 8)); flip1 = mO1 & ((unsigned int) P_ << 1); flip = _m_por(flip, _m_pand(O, _m_psllqi(flip, 8))); flip1 |= mO1 & (flip1 << 1); @@ -343,15 +161,7 @@ unsigned long long get_moves_mmx(const unsigned long long P_, const unsigned lon #else -<<<<<<< HEAD -<<<<<<< HEAD unsigned long long get_moves_mmx(const unsigned long long P, const unsigned long long O) -======= -unsigned long long get_moves_mmx(unsigned long long P, unsigned long long O) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -unsigned long long get_moves_mmx(const unsigned long long P, const unsigned long long O) ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) { unsigned long long moves; __asm__ ( @@ -496,11 +306,6 @@ unsigned long long get_moves_mmx(const unsigned long long P, const unsigned long * x 1.5 faster bench stability on 32-bit x86. * */ -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) #ifdef hasMMX static void get_full_lines(const unsigned long long disc_, unsigned long long full[4]) { @@ -578,174 +383,23 @@ int get_stability_fulls(unsigned long long P, unsigned long long O, unsigned lon stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), ((__m64 *) full)[1]); stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), ((__m64 *) full)[3]); stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), ((__m64 *) full)[2]); -======= -#ifdef USE_MSVC_X86 - -unsigned long long get_all_full_lines_mmx(const unsigned long long disc_, V4DI *full) -======= -#if defined(hasMMX) && !defined(hasSSE2) -<<<<<<< HEAD -<<<<<<< HEAD -unsigned long long get_all_full_lines(const unsigned long long disc_, V4DI *full) ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) -======= -void get_all_full_lines(const unsigned long long disc_, unsigned long long full[5]) ->>>>>>> 4303b09 (Returns all full lines in full[4]) -======= -static void get_full_lines(const unsigned long long disc_, unsigned long long full[4]) ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) -{ - __m64 disc = *(__m64 *) &disc_; - __m64 full_l, full_r; - unsigned int full_v; - const __m64 kFF = _m_pcmpeqb(disc, disc); - static const unsigned long long e7[] = { 0xff01010101010101, 0x80808080808080ff, 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff }; - static const unsigned long long e9[] = { 0xff80808080808080, 0x01010101010101ff, 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0x0f0f0f0ff0f0f0f0 }; - - // get_full_lines_mmx(full_d7, disc, 7, e7); - full_l = _m_pand(disc, _m_por(((__m64 *) e7)[0], _m_psrlqi(disc, 7))); - full_r = _m_pand(disc, _m_por(((__m64 *) e7)[1], _m_psllqi(disc, 7))); - full_l = _m_pand(full_l, _m_por(((__m64 *) e7)[2], _m_psrlqi(full_l, 14))); - full_r = _m_pand(full_r, _m_por(((__m64 *) e7)[3], _m_psllqi(full_r, 14))); - full_l = _m_pand(full_l, _m_por(((__m64 *) e7)[4], _m_psrlqi(full_l, 28))); - full_r = _m_pand(full_r, _m_por(((__m64 *) e7)[5], _m_psllqi(full_r, 28))); - ((__m64 *) full)[3] = _m_pand(full_l, full_r); - - // get_full_lines_mmx(full_d9, disc, 9, e9); - full_l = _m_pand(disc, _m_por(((__m64 *) e9)[0], _m_psrlqi(disc, 9))); - full_r = _m_pand(disc, _m_por(((__m64 *) e9)[1], _m_psllqi(disc, 9))); - full_l = _m_pand(full_l, _m_por(((__m64 *) e9)[2], _m_psrlqi(full_l, 18))); - full_r = _m_pand(full_r, _m_por(((__m64 *) e9)[3], _m_psllqi(full_r, 18))); - ((__m64 *) full)[2] = _m_pand(_m_pand(full_l, full_r), _m_por(((__m64 *) e9)[4], _m_por(_m_psrlqi(full_l, 36), _m_psllqi(full_r, 36)))); - - // get_full_lines_mmx(full_h, disc, 1, e1); - ((__m64 *) full)[0] = _m_pcmpeqb(kFF, disc); - _mm_empty(); - - // get_full_lines_mmx(full_v, disc, 8, e8); - full_v = (unsigned int) disc_ & (unsigned int)(disc_ >> 32); - full_v &= (full_v >> 16) | (full_v << 16); // ror 16 - full_v &= (full_v >> 8) | (full_v << 24); // ror 8 - full[1] = full_v | ((unsigned long long) full_v << 32); -} - -// returns all full in full[4] in addition to stability count -int get_stability_fulls(unsigned long long P, unsigned long long O, unsigned long long full[5]) -{ - __m64 P_central, stable, stable_h, stable_v, stable_d7, stable_d9, old_stable, m; - unsigned int OL, OH, PL, PH, t, a1a8, h1h8, SL, SH; - - get_full_lines(P | O, full); - - OL = (unsigned int) O; OH = (unsigned int)(O >> 32); - PL = (unsigned int) P; PH = (unsigned int)(P >> 32); - SL = PL & 0x7f7f7f00; SH = PH & 0x007f7f7f; - P_central = _m_punpckldq(_m_from_int(SL), _m_from_int(SH)); - - // P_central & allfull - full[4] = full[0] & full[1] & full[2] & full[3]; - SL &= (unsigned int) full[4]; - SH &= (unsigned int)(full[4] >> 32); - - // compute the exact stable edges (from precomputed tables) - a1a8 = edge_stability[((((PL & 0x01010101) + ((PH & 0x01010101) << 4)) * 0x01020408) >> 24) * 256 - + ((((OL & 0x01010101) + ((OH & 0x01010101) << 4)) * 0x01020408) >> 24)]; - h1h8 = edge_stability[((((PH & 0x80808080) + ((PL & 0x80808080) >> 4)) * 0x00204081) >> 24) * 256 - + ((((OH & 0x80808080) + ((OL & 0x80808080) >> 4)) * 0x00204081) >> 24)]; - SL |= edge_stability[(PL & 0xff) * 256 + (OL & 0xff)] - | (((a1a8 & 0x0f) * 0x00204081) & 0x01010101) - | (((h1h8 & 0x0f) * 0x10204080) & 0x80808080); - SH |= (edge_stability[((PH >> 16) & 0xff00) + (OH >> 24)] << 24) - | (((a1a8 >> 4) * 0x00204081) & 0x01010101) - | (((h1h8 >> 4) * 0x10204080) & 0x80808080); - stable = _m_punpckldq(_m_from_int(SL), _m_from_int(SH)); - - // now compute the other stable discs (ie discs touching another stable disc in each flipping direction). - t = SL | SH; - if (t) { - do { - old_stable = stable; -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full_h); - stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full_v); - stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full_d7); - stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full_d9); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full.v1[0]); - stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full.v1[1]); - stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full.v1[3]); - stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full.v1[2]); ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full->v1[0]); - stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full->v1[1]); - stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full->v1[3]); - stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full->v1[2]); ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) -======= - stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full[0]); - stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full[1]); - stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full[3]); - stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full[2]); ->>>>>>> 4303b09 (Returns all full lines in full[4]) -======= - stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), ((__m64 *) full)[0]); - stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), ((__m64 *) full)[1]); - stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), ((__m64 *) full)[3]); - stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), ((__m64 *) full)[2]); ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) stable = _m_por(stable, _m_pand(_m_pand(_m_pand(_m_pand(stable_h, stable_v), stable_d7), stable_d9), P_central)); m = _m_pxor(stable, old_stable); } while (_m_to_int(_m_packsswb(m, m)) != 0); -<<<<<<< HEAD -<<<<<<< HEAD #ifdef POPCOUNT t = bit_count_32(_m_to_int(stable)) + bit_count_32(_m_to_int(_m_psrlqi(stable, 32))); #else -======= -#ifdef POPCOUNT - #ifdef _MSC_VER - t = __popcnt(_m_to_int(stable)) + __popcnt(_m_to_int(_m_psrlqi(stable, 32))); - #else - t = __builtin_popcount(_m_to_int(stable)) + __builtin_popcount(_m_to_int(_m_psrlqi(stable, 32))); - #endif -#else ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - #ifdef POPCOUNT - t = bit_count_32(_m_to_int(stable)) + bit_count_32(_m_to_int(_m_psrlqi(stable, 32))); - #else ->>>>>>> 30464b5 (add hash_prefetch to NWS_endgame) m = _m_psubd(stable, _m_pand(_m_psrlqi(stable, 1), *(__m64 *) &mask_55)); m = _m_paddd(_m_pand(m, *(__m64 *) &mask_33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &mask_33)); m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &mask_0F); t = ((unsigned int) _m_to_int(_m_paddb(m, _m_psrlqi(m, 32))) * 0x01010101u) >> 24; -<<<<<<< HEAD -<<<<<<< HEAD - #endif -======= -#endif ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #endif ->>>>>>> 30464b5 (add hash_prefetch to NWS_endgame) } _mm_empty(); return t; } -<<<<<<< HEAD -<<<<<<< HEAD - -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) // returns stability count only int get_stability(const unsigned long long P, const unsigned long long O) { @@ -754,1235 +408,7 @@ int get_stability(const unsigned long long P, const unsigned long long O) return get_stability_fulls(P, O, full); } #endif // hasMMX -======= -#elif defined(USE_GAS_MMX) && !(defined(__clang__) && (__clang__major__ < 3)) -// LLVM ERROR: Unsupported asm: input constraint with a matching output constraint of incompatible type! -======= -#elif defined(USE_GAS_MMX) ->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line) - -#define get_full_lines_mmx(result,disc,dir,edge) __asm__ (\ - "movq %1, %%mm0\n\t" "movq %1, %%mm1\n\t"\ - "psrlq %2, %%mm0\n\t" "psllq %2, %%mm1\n\t"\ - "por %5, %%mm0\n\t" "por %6, %%mm1\n\t"\ - "pand %1, %%mm0\n\t" "pand %1, %%mm1\n\t"\ - "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t"\ - "psrlq %3, %%mm0\n\t" "psllq %3, %%mm1\n\t"\ - "por %7, %%mm0\n\t" "por %8, %%mm1\n\t"\ - "pand %%mm2, %%mm0\n\t" "pand %%mm3, %%mm1\n\t"\ - "movq %%mm0, %%mm2\n\t" "pand %%mm1, %%mm0\n\t"\ - "psrlq %4, %%mm2\n\t" "psllq %4, %%mm1\n\t"\ - "por %9, %%mm2\n\t" "por %10, %%mm1\n\t"\ - "pand %%mm2, %%mm0\n\t" "pand %%mm1, %%mm0\n\t"\ - "movq %%mm0, %0"\ - : "=m" (result)\ - : "y" (disc), "i" (dir), "i" (dir * 2), "i" (dir * 4),\ - "m" (edge[0]), "m" (edge[1]), "m" (edge[2]), "m" (edge[3]), "m" (edge[4]), "m" (edge[5])\ - : "mm0", "mm1", "mm2", "mm3"); - -unsigned long long get_all_full_lines_mmx(const unsigned long long disc_, V4DI *full) -{ - __m64 disc; - unsigned int full_v; - static const unsigned long long e7[] = { 0xff01010101010101, 0x80808080808080ff, 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff }; - static const unsigned long long e9[] = { 0xff80808080808080, 0x01010101010101ff, 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0xfffffffff0f0f0f0, 0x0f0f0f0fffffffff }; - - __asm__ ( - "movd %1, %0\n\t" - "punpckldq %2, %0\n\t" - : "=&y" (disc) : "m" (disc_), "m" (((unsigned int *)&disc_)[1])); - - get_full_lines_mmx(full->ull[3], disc, 7, e7); - get_full_lines_mmx(full->ull[2], disc, 9, e9); - - // get_full_lines_mmx(full_h, disc, 1, e1); - __asm__ ( - "pcmpeqb %%mm0, %%mm0\n\t" - "pcmpeqb %1, %%mm0\n\t" - "movq %%mm0, %0\n\t" - "emms" - : "=m" (full->ull[0]) : "y" (disc) : "mm0"); - - // get_full_lines_mmx(full_v, disc, 8, e8); - full_v = (unsigned int) disc_ & (unsigned int)(disc_ >> 32); - full_v &= (full_v >> 16) | (full_v << 16); // ror 16 - full_v &= (full_v >> 8) | (full_v << 24); // ror 8 - full->ull[1] = full_v | ((unsigned long long) full_v << 32); - - return full->ull[0] & full->ull[1] & full->ull[2] & full->ull[3]; -} - -int get_stability_mmx(unsigned long long P, unsigned long long O) -{ - V4DI full; - unsigned long long allfull; - __m64 P_central, stable; - unsigned int OL, OH, PL, PH, t, a1a8, h1h8, SL, SH; - - allfull = get_all_full_lines_mmx(P | O, &full); - - // compute the exact stable edges (from precomputed tables) - OL = (unsigned int) O; OH = (unsigned int)(O >> 32); - PL = (unsigned int) P; PH = (unsigned int)(P >> 32); - a1a8 = edge_stability[((((PL & 0x01010101u) + ((PH & 0x01010101u) << 4)) * 0x01020408u) >> 24) * 256 - + ((((OL & 0x01010101u) + ((OH & 0x01010101u) << 4)) * 0x01020408u) >> 24)]; - h1h8 = edge_stability[((((PH & 0x80808080u) + ((PL & 0x80808080u) >> 4)) * 0x00204081u) >> 24) * 256 - + ((((OH & 0x80808080u) + ((OL & 0x80808080u) >> 4)) * 0x00204081u) >> 24)]; - SL = edge_stability[(PL & 0xff) * 256 + (OL & 0xff)] - | (((a1a8 & 0x0f) * 0x00204081) & 0x01010101) - | (((h1h8 & 0x0f) * 0x10204080) & 0x80808080); - SH = (edge_stability[((PH >> 16) & 0xff00) + (OH >> 24)] << 24) - | (((a1a8 >> 4) * 0x00204081) & 0x01010101) - | (((h1h8 >> 4) * 0x10204080) & 0x80808080); - - PL &= 0x7f7f7f00; - PH &= 0x007f7f7f; - SL |= (unsigned int) allfull & PL; - SH |= (unsigned int)(allfull >> 32) & PH; - - __asm__( - "movd %2, %0\n\t" "movd %4, %1\n\t" - "movd %3, %%mm0\n\t" "movd %5, %%mm1\n\t" - "punpckldq %%mm0, %0\n\t" "punpckldq %%mm1, %1\n\t" - : "=y" (P_central), "=y" (stable) : "g" (PL), "g" (PH), "g" (SL), "g" (SH) : "mm0", "mm1" ); - - // now compute the other stable discs (ie discs touching another stable disc in each flipping direction). - t = SL | SH; - if (t) { - do { - __asm__ ( - "movq %1, %%mm3\n\t" - "movq %6, %1\n\t" - "movq %%mm3, %%mm0\n\t" "movq %%mm3, %%mm1\n\t" - "psrlq $1, %%mm0\n\t" "psllq $1, %%mm1\n\t" "movq %%mm3, %%mm2\n\t" - "por %%mm1, %%mm0\n\t" "movq %%mm3, %%mm1\n\t" "psrlq $7, %%mm2\n\t" - "por %2, %%mm0\n\t" "psllq $7, %%mm1\n\t" "por %%mm1, %%mm2\n\t" - "pand %%mm0, %1\n\t" "por %4, %%mm2\n\t" - "movq %%mm3, %%mm0\n\t" "movq %%mm3, %%mm1\n\t" "pand %%mm2, %1\n\t" - "psrlq $8, %%mm0\n\t" "psllq $8, %%mm1\n\t" "movq %%mm3, %%mm2\n\t" - "por %%mm1, %%mm0\n\t" "movq %%mm3, %%mm1\n\t" "psrlq $9, %%mm2\n\t" - "por %3, %%mm0\n\t" "psllq $9, %%mm1\n\t" "por %%mm1, %%mm2\n\t" - "pand %%mm0, %1\n\t" "por %5, %%mm2\n\t" - "pand %%mm2, %1\n\t" - "por %%mm3, %1\n\t" - "pxor %1, %%mm3\n\t" - "packsswb %%mm3, %%mm3\n\t" - "movd %%mm3, %0" - : "=g" (t), "+y" (stable) - : "m" (full.ull[0]), "m" (full.ull[1]), "m" (full.ull[3]), "m" (full.ull[2]), "y" (P_central) - : "mm0", "mm1", "mm2", "mm3"); - } while (t); - - // bit_count(stable) -#ifdef POPCOUNT - __asm__ ( - "movd %1, %0\n\t" - "psrlq $32, %1\n\t" - "movd %1, %%edx\n\t" - "popcntl %0, %0\n\t" - "popcntl %%edx, %%edx\n\t" - "addl %%edx, %0" - : "=&a" (t) : "y" (stable) : "edx"); -#else - __asm__ ( - "movq %1, %%mm0\n\t" - "psrlq $1, %1\n\t" - "pand %2, %1\n\t" - "psubd %1, %%mm0\n\t" - - "movq %%mm0, %%mm1\n\t" - "psrlq $2, %%mm0\n\t" - "pand %3, %%mm1\n\t" - "pand %3, %%mm0\n\t" - "paddd %%mm1, %%mm0\n\t" - - "movq %%mm0, %%mm1\n\t" - "psrlq $4, %%mm0\n\t" - "paddd %%mm1, %%mm0\n\t" - "pand %4, %%mm0\n\t" - #ifdef hasSSE2 - "pxor %%mm1, %%mm1\n\t" - "psadbw %%mm1, %%mm0\n\t" - "movd %%mm0, %0\n\t" - #else - "movq %%mm0, %%mm1\n\t" - "psrlq $32, %%mm0\n\t" - "paddb %%mm1, %%mm0\n\t" - - "movd %%mm0, %0\n\t" - "imull $0x01010101, %0, %0\n\t" - "shrl $24, %0" - #endif - : "=a" (t) : "y" (stable), "m" (mask_55), "my" (mask_33), "m" (mask_0F) : "mm0", "mm1"); -#endif - } - __asm__ ( "emms" ); - return t; -} -#endif // USE_MSVC_X86 -======= -#endif // hasMMX ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) - -<<<<<<< HEAD -/** - * @brief MMX translation of get_potential_mobility - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return a count of potential moves. - */ -#ifdef USE_MSVC_X86 - -int get_potential_mobility_mmx(unsigned long long P, unsigned long long O) -{ - __m64 m, mO; - int count; - static const unsigned long long mask_v = 0x00ffffffffffff00ULL; - // static const unsigned long long mask_d = 0x007e7e7e7e7e7e00ULL; // = mask_7e & mask_v - #ifdef POPCOUNT - int mh, ml; - #else - static const unsigned long long mask_15 = 0x1555555555555515ULL; - static const unsigned long long mask_01 = 0x0100000000000001ULL; - #endif - - mO = _m_pand(*(__m64 *) &O, *(__m64 *) &mask_7e); - m = _m_por(_m_psllqi(mO, 1), _m_psrlqi(mO, 1)); - mO = _m_pand(*(__m64 *) &O, *(__m64 *) &mask_v); - m = _m_por(m, _m_por(_m_psllqi(mO, 8), _m_psrlqi(mO, 8))); - mO = _m_pand(mO, *(__m64 *) &mask_7e); - m = _m_por(m, _m_por(_m_psllqi(mO, 7), _m_psrlqi(mO, 7))); - m = _m_por(m, _m_por(_m_psllqi(mO, 9), _m_psrlqi(mO, 9))); - m = _m_pandn(_m_por(*(__m64 *) &O, *(__m64 *) &P), m); - - #ifdef POPCOUNT - ml = _m_to_int(m); - mh = _m_to_int(_m_psrlqi(m, 32)); - count = bit_count_32(ml) + bit_count_32(mh) + bit_count_32((ml & 0x00000081) + (mh & 0x81000000)); - #else - m = _m_paddd(_m_psubd(m, _m_pand(_m_psrlqi(m, 1), *(__m64 *) &mask_15)), _m_pand(m, *(__m64 *) &mask_01)); - m = _m_paddd(_m_pand(m, *(__m64 *) &mask_33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &mask_33)); - m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &mask_0F); - count = ((unsigned int) _m_to_int(_m_paddb(m, _m_psrlqi(m, 32))) * 0x01010101u) >> 24; - #endif - _mm_empty(); - return count; -} - -#elif defined(USE_GAS_MMX) - -int get_potential_mobility_mmx(unsigned long long P, unsigned long long O) -{ - int count; - static const unsigned long long mask_v = 0x00ffffffffffff00ULL; - // static const unsigned long long mask_d = 0x007e7e7e7e7e7e00ULL; // = mask_7e & mask_v - #ifndef POPCOUNT - static const unsigned long long mask_15 = 0x1555555555555515ULL; - static const unsigned long long mask_01 = 0x0100000000000001ULL; - #endif - - __asm__ ( - "movq %3, %%mm2\n\t" "movq %4, %%mm5\n\t" - "pand %2, %%mm2\n\t" "pand %2, %%mm5\n\t" "movq %%mm2, %%mm3\n\t" - "movq %%mm2, %%mm4\n\t" "movq %%mm5, %%mm6\n\t" "pand %%mm5, %%mm3\n\t" - "psllq $1, %%mm2\n\t" "psllq $8, %%mm5\n\t" - "psrlq $1, %%mm4\n\t" "psrlq $8, %%mm6\n\t" - "por %%mm4, %%mm2\n\t" "por %%mm6, %%mm5\n\t" - "por %%mm5, %%mm2\n\t" - "movq %%mm3, %%mm5\n\t" - "movq %%mm3, %%mm4\n\t" "movq %%mm5, %%mm6\n\t" - "psllq $7, %%mm3\n\t" "psllq $9, %%mm5\n\t" - "psrlq $7, %%mm4\n\t" "psrlq $9, %%mm6\n\t" - "por %%mm4, %%mm3\n\t" "por %%mm6, %%mm5\n\t" - "por %%mm3, %%mm2\n\t" "por %%mm5, %%mm2\n\t" - "por %1, %2\n\t" - "pandn %%mm2, %2\n\t" - - #ifdef POPCOUNT - "movd %2, %%ecx\n\t" - "popcntl %%ecx, %0\n\t" "andl $0x00000081, %%ecx\n\t" - "psrlq $32, %2\n\t" "popcntl %%ecx, %%ecx\n\t" - "movd %2, %%edx\n\t" "addl %%ecx, %0\n\t" - "popcntl %%edx, %%ecx\n\t" "andl $0x81000000, %%edx\n\t" - "addl %%ecx, %0\n\t" "popcntl %%edx, %%edx\n\t" - "addl %%edx, %0\n\t" - "emms" - : "=g" (count) : "y" (P), "y" (O), "m" (mask_7e), "m" (mask_v) - : "ecx", "edx", "mm2", "mm3", "mm4", "mm5", "mm6"); - - #else - "movq %2, %1\n\t" "movq %2, %%mm2\n\t" - "psrlq $1, %2\n\t" - "pand %5, %2\n\t" "pand %6, %%mm2\n\t" - "psubd %2, %1\n\t" "paddd %%mm2, %1\n\t" - - "movq %1, %2\n\t" - "psrlq $2, %1\n\t" - "pand %7, %2\n\t" - "pand %7, %1\n\t" - "paddd %2, %1\n\t" - - "movq %1, %2\n\t" - "psrlq $4, %1\n\t" - "paddd %2, %1\n\t" - "pand %8, %1\n\t" - #ifdef hasSSE2 - "pxor %2, %2\n\t" - "psadbw %2, %1\n\t" - "movd %1, %0\n\t" - #else - "movq %1, %2\n\t" - "psrlq $32, %1\n\t" - "paddb %2, %1\n\t" - - "movd %1, %0\n\t" - "imull $0x01010101, %0, %0\n\t" - "shrl $24, %0\n\t" - #endif - "emms" - : "=g" (count) - : "y" (P), "y" (O), "m" (mask_7e), "m" (mask_v), - "m" (mask_15), "m" (mask_01), "m" (mask_33), "m" (mask_0F) - : "mm2", "mm3", "mm4", "mm5", "mm6"); - #endif - - return count; -} -#endif - -<<<<<<< HEAD -/** - * @brief MMX translation of board_get_hash_code. - * - * @param p pointer to 16 bytes to hash. - * @return the hash code of the bitboard - */ - -#if defined(USE_GAS_MMX) && defined(__3dNOW__) - -unsigned long long board_get_hash_code_mmx(const unsigned char *p) -{ - unsigned long long h; - - __asm__ volatile ( - "movq %0, %%mm0\n\t" "movq %1, %%mm1" - : : "m" (hash_rank[0][p[0]]), "m" (hash_rank[1][p[1]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[2][p[2]]), "m" (hash_rank[3][p[3]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[4][p[4]]), "m" (hash_rank[5][p[5]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[6][p[6]]), "m" (hash_rank[7][p[7]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[8][p[8]]), "m" (hash_rank[9][p[9]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[10][p[10]]), "m" (hash_rank[11][p[11]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[12][p[12]]), "m" (hash_rank[13][p[13]])); - __asm__ volatile ( - "pxor %1, %%mm0\n\t" "pxor %2, %%mm1\n\t" - "pxor %%mm1, %%mm0\n\t" - "movd %%mm0, %%eax\n\t" - "punpckhdq %%mm0, %%mm0\n\t" - "movd %%mm0, %%edx\n\t" - "emms" - : "=A" (h) - : "m" (hash_rank[14][p[14]]), "m" (hash_rank[15][p[15]]) - : "mm0", "mm1"); - - return h; -} - -#endif // __3dNOW ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) #if !defined(hasMMX) && defined(USE_GAS_MMX) #pragma GCC pop_options #endif -<<<<<<< HEAD -======= -/** - * @file board_mmx.c - * - * MMX translation of some board.c functions for X86-32 - * - * If both hasMMX and hasSSE2 are undefined, dynamic dispatching code - * will be generated. (This setting requires VC or GCC 4.4+) - * - * @date 2014 - 2020 - * @author Toshihiko Okuhara - * @version 4.4 - */ - -#include "bit.h" -#include "hash.h" -#include "board.h" -#include "move.h" - -#if !defined(hasSSE2) && defined(USE_GAS_MMX) -#ifndef hasMMX - #pragma GCC push_options - #pragma GCC target ("mmx") -#endif - #include -#endif - -static const unsigned long long mask_7e = 0x7e7e7e7e7e7e7e7eULL; -#ifndef POPCOUNT -static const unsigned long long mask_55 = 0x5555555555555555ULL; -static const unsigned long long mask_33 = 0x3333333333333333ULL; -static const unsigned long long mask_0F = 0x0f0f0f0f0f0f0f0fULL; -#endif - -#ifndef hasSSE2 - -#ifndef hasMMX -bool hasMMX = false; -#endif -bool hasSSE2 = false; - -void init_mmx (void) -{ - int flg1, flg2, cpuid_edx, cpuid_ecx; -#ifdef USE_MSVC_X86 - int cpuinfo[4]; - - __asm { - pushfd - pop eax - mov flg2, eax - btc eax, 21 - push eax - popfd - pushfd - pop flg1 - } - - if (flg1 == flg2) /* CPUID not supported */ - return; - - __cpuid(cpuinfo, 1); - cpuid_edx = cpuinfo[3]; - cpuid_ecx = cpuinfo[2]; - -#else - __asm__ ( - "pushfl\n\t" - "popl %0\n\t" - "movl %0, %1\n\t" - "btc $21, %0\n\t" /* flip ID bit in EFLAGS */ - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0" - : "=r" (flg1), "=r" (flg2) ); - - if (flg1 == flg2) /* CPUID not supported */ - return; - - __asm__ ( - "movl $1, %%eax\n\t" - "cpuid" - : "=d" (cpuid_edx), "=c" (cpuid_ecx) :: "%eax", "%ebx" ); - -#endif - -#ifndef hasMMX - hasMMX = ((cpuid_edx & 0x00800000u) != 0); -#endif - hasSSE2 = ((cpuid_edx & 0x04000000u) != 0); - // hasPOPCNT = ((cpuid_ecx & 0x00800000u) != 0); - -#if (MOVE_GENERATOR == MOVE_GENERATOR_32) - if (hasSSE2) - init_flip_sse(); -#endif -} -#endif // hasSSE2 - -#ifdef hasMMX -/** - * @brief Update a board. - * - * Update a board by flipping its discs and updating every other data, - * according to the 'move' description. - * - * @param board the board to modify - * @param move A Move structure describing the modification. - */ -#if defined(hasSSE2) && !defined(__3dNOW__) // Faster on CPU with slow emms - -void board_update(Board *board, const Move *move) -{ - __m128i F = _mm_loadl_epi64((__m128i *) &move->flipped); - __m128i OP = _mm_loadu_si128((__m128i *) board); - OP = _mm_xor_si128(OP, _mm_or_si128(_mm_unpacklo_epi64(F, F), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x]))); - _mm_storel_pi((__m64 *) &board->opponent, _mm_castsi128_ps(OP)); - _mm_storeh_pi((__m64 *) &board->player, _mm_castsi128_ps(OP)); - board_check(board); -} - -#elif defined(USE_MSVC_X86) - -void board_update(Board *board, const Move *move) -{ - __m64 F = *(__m64 *) &move->flipped; - __m64 P = _m_pxor(*(__m64 *) &board->player, _m_por(F, *(__m64 *) &X_TO_BIT[move->x])); - __m64 O = _m_pxor(*(__m64 *) &board->opponent, F); - *(__m64 *) &board->player = O; - *(__m64 *) &board->opponent = P; - _mm_empty(); - board_check(board); -} - -#else - -void board_update(Board *board, const Move *move) -{ - __asm__ ( - "movq %2, %%mm1\n\t" - "movq %3, %%mm0\n\t" - "por %%mm1, %%mm0\n\t" - "pxor %0, %%mm0\n\t" - "pxor %1, %%mm1\n\t" - "movq %%mm0, %1\n\t" - "movq %%mm1, %0\n\t" - "emms" - : "=m" (board->player), "=m" (board->opponent) - : "m" (move->flipped), "m" (x_to_bit(move->x)) - : "mm0", "mm1"); - board_check(board); -} - -#endif - -/** - * @brief Restore a board. - * - * Restore a board by un-flipping its discs and restoring every other data, - * according to the 'move' description, in order to cancel a board_update_move. - * - * @param board board to restore. - * @param move a Move structure describing the modification. - */ -#if defined(hasSSE2) && !defined(__3dNOW__) - -void board_restore(Board *board, const Move *move) -{ - __m128i F = _mm_loadl_epi64((__m128i *) &move->flipped); - __m128i OP = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *) &board->opponent), _mm_loadl_epi64((__m128i *) &board->player)); - OP = _mm_xor_si128(OP, _mm_or_si128(_mm_unpacklo_epi64(F, F), _mm_loadl_epi64((__m128i *) &X_TO_BIT[move->x]))); - _mm_storeu_si128((__m128i *) board, OP); - board_check(board); -} - -#elif defined(USE_MSVC_X86) - -void board_restore(Board *board, const Move *move) -{ - __m64 F = *(__m64 *) &move->flipped; - __m64 P = *(__m64 *) &board->opponent; - __m64 O = *(__m64 *) &board->player; - *(__m64 *) &board->player = _m_pxor(P, _m_por(F, *(__m64 *) &X_TO_BIT[move->x])); - *(__m64 *) &board->opponent = _m_pxor(O, F); - _mm_empty(); - board_check(board); -} - -#else - -void board_restore(Board *board, const Move *move) -{ - __asm__ ( - "movq %2, %%mm1\n\t" - "movq %3, %%mm0\n\t" - "por %%mm1, %%mm0\n\t" - "pxor %1, %%mm0\n\t" - "pxor %0, %%mm1\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm1, %1\n\t" - "emms" - : "=m" (board->player), "=m" (board->opponent) - : "m" (move->flipped), "m" (x_to_bit(move->x)) - : "mm0", "mm1"); - board_check(board); -} - -#endif -#endif // hasMMX - -/** - * @brief MMX translation of get_moves - * - * x 2 faster bench mobility on 32-bit x86. - * - */ -#ifdef USE_MSVC_X86 - -unsigned long long get_moves_mmx(unsigned long long P_, unsigned long long O_) -{ - unsigned int movesL, movesH, mO1, flip1, pre1; - __m64 P, O, M, mO, flip, pre; - - P = *(__m64 *) &P_; - O = *(__m64 *) &O_; mO1 = (unsigned int) O_ & 0x7e7e7e7e; - /* shift = +8 */ /* shift = +1 */ - flip = _m_pand(O, _m_psllqi(P, 8)); flip1 = mO1 & ((unsigned int) P_ << 1); - flip = _m_por(flip, _m_pand(O, _m_psllqi(flip, 8))); flip1 |= mO1 & (flip1 << 1); - pre = _m_pand(O, _m_psllqi(O, 8)); pre1 = mO1 & (mO1 << 1); - flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 16))); flip1 |= pre1 & (flip1 << 2); - flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 16))); flip1 |= pre1 & (flip1 << 2); - M = _m_psllqi(flip, 8); movesL = flip1 << 1; - /* shift = -8 */ /* shift = -1 */ - flip = _m_pand(O, _m_psrlqi(P, 8)); flip1 = mO1 & ((unsigned int) P_ >> 1); - flip = _m_por(flip, _m_pand(O, _m_psrlqi(flip, 8))); flip1 |= mO1 & (flip1 >> 1); - pre = _m_psrlqi(pre, 8); pre1 >>= 1; - flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 16))); flip1 |= pre1 & (flip1 >> 2); - flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 16))); flip1 |= pre1 & (flip1 >> 2); - M = _m_por(M, _m_psrlqi(flip, 8)); movesL |= flip1 >> 1; - /* shift = +7 */ - mO = _m_pand(O, *(__m64 *) &mask_7e); mO1 = (unsigned int)(O_ >> 32) & 0x7e7e7e7e; - flip = _m_pand(mO, _m_psllqi(P, 7)); - flip = _m_por(flip, _m_pand(mO, _m_psllqi(flip, 7))); - pre = _m_pand(mO, _m_psllqi(mO, 7)); - flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 14))); - flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 14))); - M = _m_por(M, _m_psllqi(flip, 7)); - /* shift = -7 */ /* shift = +1 */ - flip = _m_pand(mO, _m_psrlqi(P, 7)); flip1 = mO1 & ((unsigned int)(P_ >> 32) << 1); - flip = _m_por(flip, _m_pand(mO, _m_psrlqi(flip, 7))); flip1 |= mO1 & (flip1 << 1); - pre = _m_psrlqi(pre, 7); pre1 = mO1 & (mO1 << 1); - flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 14))); flip1 |= pre1 & (flip1 << 2); - flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 14))); flip1 |= pre1 & (flip1 << 2); - M = _m_por(M, _m_psrlqi(flip, 7)); movesH = flip1 << 1; - /* shift = +9 */ /* shift = -1 */ - flip = _m_pand(mO, _m_psllqi(P, 9)); flip1 = mO1 & ((unsigned int)(P_ >> 32) >> 1); - flip = _m_por(flip, _m_pand(mO, _m_psllqi(flip, 9))); flip1 |= mO1 & (flip1 >> 1); - pre = _m_pand(mO, _m_psllqi(mO, 9)); pre1 >>= 1; - flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 18))); flip1 |= pre1 & (flip1 >> 2); - flip = _m_por(flip, _m_pand(pre, _m_psllqi(flip, 18))); flip1 |= pre1 & (flip1 >> 2); - M = _m_por(M, _m_psllqi(flip, 9)); movesH |= flip1 >> 1; - /* shift = -9 */ - flip = _m_pand(mO, _m_psrlqi(P, 9)); - flip = _m_por(flip, _m_pand(mO, _m_psrlqi(flip, 9))); - pre = _m_psrlqi(pre, 9); - flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 18))); - flip = _m_por(flip, _m_pand(pre, _m_psrlqi(flip, 18))); - M = _m_por(M, _m_psrlqi(flip, 9)); - - movesL |= _m_to_int(M); - movesH |= _m_to_int(_m_punpckhdq(M, M)); - _mm_empty(); - return (((unsigned long long) movesH << 32) | movesL) & ~(P_|O_); // mask with empties -} - -#else - -unsigned long long get_moves_mmx(unsigned long long P, unsigned long long O) -{ - unsigned long long moves; - __asm__ ( - "movl %1, %%ebx\n\t" "movd %1, %%mm4\n\t" // (movd for store-forwarding) - "movl %3, %%edi\n\t" "movd %3, %%mm5\n\t" - "andl $0x7e7e7e7e, %%edi\n\t" "punpckldq %2, %%mm4\n\t" - "punpckldq %4, %%mm5\n\t" - /* shift=-1 */ /* shift=-8 */ - "movl %%ebx, %%eax\n\t" "movq %%mm4, %%mm0\n\t" - "shrl $1, %%eax\n\t" "psrlq $8, %%mm0\n\t" - "andl %%edi, %%eax\n\t" "pand %%mm5, %%mm0\n\t" // 0 m7&o6 m6&o5 .. m1&o0 - "movl %%eax, %%edx\n\t" "movq %%mm0, %%mm1\n\t" - "shrl $1, %%eax\n\t" "psrlq $8, %%mm0\n\t" - "movl %%edi, %%ecx\n\t" "movq %%mm5, %%mm3\n\t" - "andl %%edi, %%eax\n\t" "pand %%mm5, %%mm0\n\t" // 0 0 m7&o6&o5 .. m2&o1&o0 - "shrl $1, %%ecx\n\t" "psrlq $8, %%mm3\n\t" - "orl %%edx, %%eax\n\t" "por %%mm1, %%mm0\n\t" // 0 m7&o6 (m6&o5)|(m7&o6&o5) .. (m1&o0) - "andl %%edi, %%ecx\n\t" "pand %%mm5, %%mm3\n\t" // 0 o7&o6 o6&o5 o5&o4 o4&o3 .. - "movl %%eax, %%edx\n\t" "movq %%mm0, %%mm2\n\t" - "shrl $2, %%eax\n\t" "psrlq $16, %%mm0\n\t" - "andl %%ecx, %%eax\n\t" "pand %%mm3, %%mm0\n\t" // 0 0 0 m7&o6&o5&o4 (m6&o5&o4&o3)|(m7&o6&o5&o4&o3) .. - "orl %%eax, %%edx\n\t" "por %%mm0, %%mm2\n\t" - "shrl $2, %%eax\n\t" "psrlq $16, %%mm0\n\t" - "andl %%ecx, %%eax\n\t" "pand %%mm3, %%mm0\n\t" // 0 0 0 0 0 m7&o6&..&o2 (m6&o5&..&o1)|(m7&o6&..&o1) .. - "orl %%edx, %%eax\n\t" "por %%mm0, %%mm2\n\t" - "shrl $1, %%eax\n\t" "psrlq $8, %%mm2\n\t" - /* shift=+1 */ /* shift=+8 */ - "movq %%mm4, %%mm0\n\t" - "addl %%ebx, %%ebx\n\t" "psllq $8, %%mm0\n\t" - "andl %%edi, %%ebx\n\t" "pand %%mm5, %%mm0\n\t" - "movl %%ebx, %%edx\n\t" "movq %%mm0, %%mm1\n\t" - "addl %%ebx, %%ebx\n\t" "psllq $8, %%mm0\n\t" - "andl %%edi, %%ebx\n\t" "pand %%mm5, %%mm0\n\t" - "orl %%ebx, %%edx\n\t" "por %%mm1, %%mm0\n\t" - "addl %%ecx, %%ecx\n\t" "psllq $8, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "leal (,%%edx,4), %%ebx\n\t" "psllq $16, %%mm0\n\t" - "andl %%ecx, %%ebx\n\t" "pand %%mm3, %%mm0\n\t" - "orl %%ebx, %%edx\n\t" "por %%mm0, %%mm1\n\t" - "shll $2, %%ebx\n\t" "psllq $16, %%mm0\n\t" - "andl %%ecx, %%ebx\n\t" "pand %%mm3, %%mm0\n\t" - "orl %%edx, %%ebx\n\t" "por %%mm1, %%mm0\n\t" - "addl %%ebx, %%ebx\n\t" "psllq $8, %%mm0\n\t" - "orl %%eax, %%ebx\n\t" "por %%mm0, %%mm2\n\t" - /* shift=-7 */ - "pand %5, %%mm5\n\t" - "movq %%mm4, %%mm0\n\t" - "psrlq $7, %%mm0\n\t" - "pand %%mm5, %%mm0\n\t" - "movq %%mm0, %%mm1\n\t" - "psrlq $7, %%mm0\n\t" - "pand %%mm5, %%mm0\n\t" - "movq %%mm5, %%mm3\n\t" - "por %%mm1, %%mm0\n\t" - "psrlq $7, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "pand %%mm5, %%mm3\n\t" - "psrlq $14, %%mm0\n\t" - "pand %%mm3, %%mm0\n\t" - "movl %2, %%esi\n\t" "por %%mm0, %%mm1\n\t" - "movl %4, %%edi\n\t" "psrlq $14, %%mm0\n\t" - "andl $0x7e7e7e7e,%%edi\n\t" "pand %%mm3, %%mm0\n\t" - "movl %%edi, %%ecx\n\t" "por %%mm1, %%mm0\n\t" - "shrl $1, %%ecx\n\t" "psrlq $7, %%mm0\n\t" - "andl %%edi, %%ecx\n\t" "por %%mm0, %%mm2\n\t" - /* shift=-1 */ /* shift=+7 */ - "movl %%esi, %%eax\n\t" "movq %%mm4, %%mm0\n\t" - "shrl $1, %%eax\n\t" "psllq $7, %%mm0\n\t" - "andl %%edi, %%eax\n\t" "pand %%mm5, %%mm0\n\t" - "movl %%eax, %%edx\n\t" "movq %%mm0, %%mm1\n\t" - "shrl $1, %%eax\n\t" "psllq $7, %%mm0\n\t" - "andl %%edi, %%eax\n\t" "pand %%mm5, %%mm0\n\t" - "orl %%edx, %%eax\n\t" "por %%mm1, %%mm0\n\t" - "psllq $7, %%mm3\n\t" - "movl %%eax, %%edx\n\t" "movq %%mm0, %%mm1\n\t" - "shrl $2, %%eax\n\t" "psllq $14, %%mm0\n\t" - "andl %%ecx, %%eax\n\t" "pand %%mm3, %%mm0\n\t" - "orl %%eax, %%edx\n\t" "por %%mm0, %%mm1\n\t" - "shrl $2, %%eax\n\t" "psllq $14, %%mm0\n\t" - "andl %%ecx, %%eax\n\t" "pand %%mm3, %%mm0\n\t" - "orl %%edx, %%eax\n\t" "por %%mm1, %%mm0\n\t" - "shrl $1, %%eax\n\t" "psllq $7, %%mm0\n\t" - "por %%mm0, %%mm2\n\t" - /* shift=+1 */ /* shift=-9 */ - "movq %%mm4, %%mm0\n\t" - "addl %%esi, %%esi\n\t" "psrlq $9, %%mm0\n\t" - "andl %%edi, %%esi\n\t" "pand %%mm5, %%mm0\n\t" - "movl %%esi, %%edx\n\t" "movq %%mm0, %%mm1\n\t" - "addl %%esi, %%esi\n\t" "psrlq $9, %%mm0\n\t" - "andl %%edi, %%esi\n\t" "pand %%mm5, %%mm0\n\t" - "movq %%mm5, %%mm3\n\t" - "orl %%esi, %%edx\n\t" "por %%mm1, %%mm0\n\t" - "psrlq $9, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "addl %%ecx, %%ecx\n\t" "pand %%mm5, %%mm3\n\t" - "leal (,%%edx,4), %%esi\n\t" "psrlq $18, %%mm0\n\t" - "andl %%ecx, %%esi\n\t" "pand %%mm3, %%mm0\n\t" - "orl %%esi, %%edx\n\t" "por %%mm0, %%mm1\n\t" - "shll $2, %%esi\n\t" "psrlq $18, %%mm0\n\t" - "andl %%ecx, %%esi\n\t" "pand %%mm3, %%mm0\n\t" - "orl %%edx, %%esi\n\t" "por %%mm1, %%mm0\n\t" - "addl %%esi, %%esi\n\t" "psrlq $9, %%mm0\n\t" - "orl %%eax, %%esi\n\t" "por %%mm0, %%mm2\n\t" - /* shift=+9 */ - "movq %%mm4, %%mm0\n\t" - "psllq $9, %%mm0\n\t" - "pand %%mm5, %%mm0\n\t" - "movq %%mm0, %%mm1\n\t" - "psllq $9, %%mm0\n\t" - "pand %%mm5, %%mm0\n\t" - "por %%mm1, %%mm0\n\t" - "psllq $9, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "psllq $18, %%mm0\n\t" - "pand %%mm3, %%mm0\n\t" - "movl %1, %%eax\n\t" "por %%mm0, %%mm1\n\t" - "movl %2, %%edx\n\t" "psllq $18, %%mm0\n\t" - "orl %3, %%eax\n\t" "pand %%mm3, %%mm0\n\t" - "orl %4, %%edx\n\t" "por %%mm1, %%mm0\n\t" - "notl %%eax\n\t" "psllq $9, %%mm0\n\t" - "notl %%edx\n\t" "por %%mm0, %%mm2\n\t" - /* mm2|(esi:ebx) is the pseudo-feasible moves at this point. */ - /* Let edx:eax be the feasible moves, i.e., mm2 restricted to empty squares. */ - "movd %%mm2, %%ecx\n\t" "punpckhdq %%mm2, %%mm2\n\t" - "orl %%ecx, %%ebx\n\t" - "movd %%mm2, %%ecx\n\t" - "orl %%ecx, %%esi\n\t" - "andl %%ebx, %%eax\n\t" - "andl %%esi, %%edx\n\t" - "emms" /* Reset the FP/MMX unit. */ - : "=&A" (moves) - : "m" (P), "m" (((unsigned int *)&P)[1]), "m" (O), "m" (((unsigned int *)&O)[1]), "m" (mask_7e) - : "ebx", "ecx", "esi", "edi", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5" ); - - return moves; -} -#endif - -/** - * @brief MMX translation of get_stability() - * - * x 1.5 faster bench stability on 32-bit x86. - * - */ -#ifdef USE_MSVC_X86 - -int get_stability_mmx(unsigned long long P_, unsigned long long O_) -{ - __m64 P, O, P_central, disc, full_h, full_v, full_d7, full_d9, full_l, full_r, stable; - __m64 stable_h, stable_v, stable_d7, stable_d9, old_stable, m; - unsigned int OL, OH, PL, PH, t, a1a8po, h1h8po; - static const unsigned long long MFF = 0xffffffffffffffff; - static const unsigned long long edge = 0xff818181818181ffULL; - static const unsigned long long e7[] = { 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff }; - static const unsigned long long e9[] = { 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0x0f0f0f0ff0f0f0f0 }; - - P = *(__m64 *) &P_; - O = *(__m64 *) &O_; - disc = _m_por(P, O); - P_central = _m_pandn(*(__m64 *) &edge, P); - - // get full lines and set intersection of them to stable - // get_full_lines_mmx(full_h, disc, 1, e1); - full_h = _m_pcmpeqb(*(__m64 *) &MFF, disc); - stable = _m_pand(P_central, full_h); - - // get_full_lines_mmx(full_v, disc, 8, e8); - full_v = _m_pand(_m_punpcklbw(disc, disc), _m_punpckhbw(disc, disc)); // (d,d,c,c,b,b,a,a) & (h,h,g,g,f,f,e,e) - full_v = _m_pand(_m_punpcklwd(full_v, full_v), _m_punpckhwd(full_v, full_v)); // (dh,dh,dh,dh,cg,cg,cg,cg) & (bf,bf,bf,bf,ae,ae,ae,ae) - full_v = _m_pand(_m_punpckldq(full_v, full_v), _m_punpckhdq(full_v, full_v)); // (bdfh*4, bdfh*4) & (aceg*4, aceg*4) - stable = _m_pand(stable, full_v); - - // get_full_lines_mmx(full_d7, disc, 7, e7); - full_l = _m_pand(disc, _m_por(*(__m64 *) &edge, _m_psrlqi(disc, 7))); - full_r = _m_pand(disc, _m_por(*(__m64 *) &edge, _m_psllqi(disc, 7))); - full_l = _m_pand(full_l, _m_por(*(__m64 *) &e7[0], _m_psrlqi(full_l, 14))); - full_r = _m_pand(full_r, _m_por(*(__m64 *) &e7[1], _m_psllqi(full_r, 14))); - full_l = _m_pand(full_l, _m_por(*(__m64 *) &e7[2], _m_psrlqi(full_l, 28))); - full_r = _m_pand(full_r, _m_por(*(__m64 *) &e7[3], _m_psllqi(full_r, 28))); - full_d7 = _m_pand(full_l, full_r); - stable = _m_pand(stable, full_d7); - - // get_full_lines_mmx(full_d9, disc, 9, e9); - - full_l = _m_pand(disc, _m_por(*(__m64 *) &edge, _m_psrlqi(disc, 9))); - full_r = _m_pand(disc, _m_por(*(__m64 *) &edge, _m_psllqi(disc, 9))); - full_l = _m_pand(full_l, _m_por(*(__m64 *) &e9[0], _m_psrlqi(full_l, 18))); - full_r = _m_pand(full_r, _m_por(*(__m64 *) &e9[1], _m_psllqi(full_r, 18))); - full_d9 = _m_pand(_m_pand(full_l, full_r), _m_por(*(__m64 *) &e9[2], _m_por(_m_psrlqi(full_l, 36), _m_psllqi(full_r, 36)))); - stable = _m_pand(stable, full_d9); - - // compute the exact stable edges (from precomputed tables) - OL = (unsigned int) O_; OH = (unsigned int)(O_ >> 32); - PL = (unsigned int) P_; PH = (unsigned int)(P_ >> 32); - a1a8po = ((((PL & 0x01010101u) + ((PH & 0x01010101u) << 4)) * 0x01020408u) >> 24) * 256 - + ((((OL & 0x01010101u) + ((OH & 0x01010101u) << 4)) * 0x01020408u) >> 24); - h1h8po = ((((PH & 0x80808080u) + ((PL & 0x80808080u) >> 4)) * 0x00204081u) >> 24) * 256 - + ((((OH & 0x80808080u) + ((OL & 0x80808080u) >> 4)) * 0x00204081u) >> 24); - stable = _m_por(stable, _m_por(_m_por(*(__m64 *) &A1_A8[edge_stability[a1a8po]], - _m_psllqi(*(__m64 *) &A1_A8[edge_stability[h1h8po]], 7)), - _m_punpckldq(_m_from_int(edge_stability[(PL & 0xff) * 256 + (OL & 0xff)]), - _m_from_int(edge_stability[((PH >> 16) & 0xff00) + (OH >> 24)] << 24)))); - - // now compute the other stable discs (ie discs touching another stable disc in each flipping direction). - t = _m_to_int(_m_packsswb(stable, stable)); - if (t) { - do { - old_stable = stable; - stable_h = _m_por(_m_por(_m_psrlqi(stable, 1), _m_psllqi(stable, 1)), full_h); - stable_v = _m_por(_m_por(_m_psrlqi(stable, 8), _m_psllqi(stable, 8)), full_v); - stable_d7 = _m_por(_m_por(_m_psrlqi(stable, 7), _m_psllqi(stable, 7)), full_d7); - stable_d9 = _m_por(_m_por(_m_psrlqi(stable, 9), _m_psllqi(stable, 9)), full_d9); - stable = _m_por(stable, _m_pand(_m_pand(_m_pand(_m_pand(stable_h, stable_v), stable_d7), stable_d9), P_central)); - m = _m_pxor(stable, old_stable); - } while (_m_to_int(_m_packsswb(m, m)) != 0); - -#ifdef POPCOUNT - t = __popcnt(_m_to_int(stable)) + __popcnt(_m_to_int(_m_psrlqi(stable, 32))); -#else - m = _m_psubd(stable, _m_pand(_m_psrlqi(stable, 1), *(__m64 *) &mask_55)); - m = _m_paddd(_m_pand(m, *(__m64 *) &mask_33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &mask_33)); - m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &mask_0F); - t = ((unsigned int) _m_to_int(_m_paddb(m, _m_psrlqi(m, 32))) * 0x01010101u) >> 24; -#endif - } - _mm_empty(); - return t; -} - -#elif defined(USE_GAS_MMX) && !(defined(__clang__) && (__clang__major__ < 3)) -// LLVM ERROR: Unsupported asm: input constraint with a matching output constraint of incompatible type! - -#define get_full_lines_mmx(result,disc,dir,edge) __asm__ (\ - "movq %2, %%mm0\n\t" "movq %2, %%mm1\n\t"\ - "psrlq %3, %%mm0\n\t" "psllq %3, %%mm1\n\t"\ - "por %6, %%mm0\n\t" "por %6, %%mm1\n\t"\ - "pand %2, %%mm0\n\t" "pand %2, %%mm1\n\t"\ - "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t"\ - "psrlq %4, %%mm0\n\t" "psllq %4, %%mm1\n\t"\ - "por %7, %%mm0\n\t" "por %8, %%mm1\n\t"\ - "pand %%mm2, %%mm0\n\t" "pand %%mm3, %%mm1\n\t"\ - "movq %%mm0, %%mm2\n\t" "pand %%mm1, %%mm0\n\t"\ - "psrlq %5, %%mm2\n\t" "psllq %5, %%mm1\n\t"\ - "por %9, %%mm2\n\t" "por %10, %%mm1\n\t"\ - "pand %%mm2, %%mm0\n\t" "pand %%mm1, %%mm0\n\t"\ - "movq %%mm0, %0\n\t"\ - "pand %%mm0, %1"\ - : "=m" (result), "+y" (stable)\ - : "y" (disc), "i" (dir), "i" (dir * 2), "i" (dir * 4),\ - "my" (e0), "m" (edge[0]), "m" (edge[1]), "m" (edge[2]), "m" (edge[3])\ - : "mm0", "mm1", "mm2", "mm3"); - -int get_stability_mmx(unsigned long long P_, unsigned long long O_) -{ - __m64 P, O, P_central, disc, full_h, full_v, full_d7, full_d9, stable; -#ifdef hasSSE2 - __v2di PO; -#endif - unsigned int OL, OH, PL, PH, t, a1a8po, h1h8po; - static const unsigned long long e0 = 0xff818181818181ffULL; - static const unsigned long long e7[] = { 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff }; - static const unsigned long long e9[] = { 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0xfffffffff0f0f0f0, 0x0f0f0f0fffffffff }; - - __asm__ ( - "movd %2, %0\n\t" "movd %4, %1\n\t" // (movd for store-forwarding) - "punpckldq %3, %0\n\t" "punpckldq %5, %1" - : "=&y" (P), "=&y" (O) : "m" (P_), "m" (((unsigned int *)&P_)[1]), "m" (O_), "m" (((unsigned int *)&O_)[1])); -#ifdef hasSSE2 - PO = _mm_unpacklo_epi64(_mm_movpi64_epi64(O), _mm_movpi64_epi64(P)); -#endif - __asm__ ( - "por %3, %0\n\t" - "pandn %3, %1\n\t" - "movq %1, %2" - : "=y" (disc), "=y" (stable), "=m" (P_central) - : "y" (P), "0" (O), "1" (e0)); - - // get full lines and set intersection of them to stable - // get_full_lines_mmx(full_h, disc, 1, e1); - __asm__ ( - "pcmpeqb %%mm0, %%mm0\n\t" - "pcmpeqb %2, %%mm0\n\t" - "movq %%mm0, %0\n\t" - "pand %%mm0, %1" - : "=m" (full_h), "+y" (stable) : "y" (disc) : "mm0"); - // get_full_lines_mmx(full_v, disc, 8, e8); - __asm__ ( - "movq %2, %%mm0\n\t" "movq %2, %%mm1\n\t" - "punpcklbw %%mm0, %%mm0\n\t" "punpckhbw %%mm1, %%mm1\n\t" - "pand %%mm1, %%mm0\n\t" // (d,d,c,c,b,b,a,a) & (h,h,g,g,f,f,e,e) -#ifdef hasSSE2 - "pshufw $177, %%mm0, %%mm1\n\t" - "pand %%mm1, %%mm0\n\t" // (cg,cg,dh,dh,ae,ae,bf,bf) & (dh,dh,cg,cg,bf,bf,ae,ae) - "pshufw $78, %%mm0, %%mm1\n\t" - "pand %%mm1, %%mm0\n\t" // (abef*4, cdgh*4) & (cdgh*4, abef*4) -#else - "movq %%mm0, %%mm1\n\t" - "punpcklwd %%mm0, %%mm0\n\t" "punpckhwd %%mm1, %%mm1\n\t" - "pand %%mm1, %%mm0\n\t" // (dh,dh,dh,dh,cg,cg,cg,cg) & (bf,bf,bf,bf,ae,ae,ae,ae) - "movq %%mm0, %%mm1\n\t" - "punpckldq %%mm0, %%mm0\n\t" "punpckhdq %%mm1, %%mm1\n\t" - "pand %%mm1, %%mm0\n\t" // (bdfh*4, bdfh*4) & (aceg*4, aceg*4) -#endif - "movq %%mm0, %0\n\t" - "pand %%mm0, %1" - : "=m" (full_v), "+y" (stable) : "y" (disc) : "mm0", "mm1"); - get_full_lines_mmx(full_d7, disc, 7, e7); - get_full_lines_mmx(full_d9, disc, 9, e9); - - // compute the exact stable edges (from precomputed tables) - OL = (unsigned int) O_; OH = (unsigned int)(O_ >> 32); - PL = (unsigned int) P_; PH = (unsigned int)(P_ >> 32); -#ifdef hasSSE2 - a1a8po = _mm_movemask_epi8(_mm_slli_epi64(PO, 7)); - h1h8po = _mm_movemask_epi8(PO); -#else - a1a8po = ((((PL & 0x01010101u) + ((PH & 0x01010101u) << 4)) * 0x01020408u) >> 24) * 256 - + ((((OL & 0x01010101u) + ((OH & 0x01010101u) << 4)) * 0x01020408u) >> 24); - h1h8po = ((((PH & 0x80808080u) + ((PL & 0x80808080u) >> 4)) * 0x00204081u) >> 24) * 256 - + ((((OH & 0x80808080u) + ((OL & 0x80808080u) >> 4)) * 0x00204081u) >> 24); -#endif - __asm__( - "movd %1, %%mm0\n\t" "por %3, %0\n\t" - "movd %2, %%mm1\n\t" - "punpckldq %%mm1, %%mm0\n\t" "movq %4, %%mm1\n\t" - "por %%mm0, %0\n\t" "psllq $7, %%mm1\n\t" - "por %%mm1, %0" - : "+y" (stable) - : "g" ((int) edge_stability[(PL & 0xff) * 256 + (OL & 0xff)]), - "g" (edge_stability[((PH >> 16) & 0xff00) + (OH >> 24)] << 24), - "m" (A1_A8[edge_stability[a1a8po]]), - "m" (A1_A8[edge_stability[h1h8po]]) - : "mm0", "mm1"); - - // now compute the other stable discs (ie discs touching another stable disc in each flipping direction). - __asm__ ( - "movq %1, %%mm0\n\t" - "packsswb %%mm0, %%mm0\n\t" - "movd %%mm0, %0\n\t" - : "=g" (t) : "y" (stable) : "mm0" ); - - if (t) { - do { - __asm__ ( - "movq %1, %%mm3\n\t" - "movq %6, %1\n\t" - "movq %%mm3, %%mm0\n\t" "movq %%mm3, %%mm1\n\t" - "psrlq $1, %%mm0\n\t" "psllq $1, %%mm1\n\t" "movq %%mm3, %%mm2\n\t" - "por %%mm1, %%mm0\n\t" "movq %%mm3, %%mm1\n\t" "psrlq $7, %%mm2\n\t" - "por %2, %%mm0\n\t" "psllq $7, %%mm1\n\t" "por %%mm1, %%mm2\n\t" - "pand %%mm0, %1\n\t" "por %4, %%mm2\n\t" - "movq %%mm3, %%mm0\n\t" "movq %%mm3, %%mm1\n\t" "pand %%mm2, %1\n\t" - "psrlq $8, %%mm0\n\t" "psllq $8, %%mm1\n\t" "movq %%mm3, %%mm2\n\t" - "por %%mm1, %%mm0\n\t" "movq %%mm3, %%mm1\n\t" "psrlq $9, %%mm2\n\t" - "por %3, %%mm0\n\t" "psllq $9, %%mm1\n\t" "por %%mm1, %%mm2\n\t" - "pand %%mm0, %1\n\t" "por %5, %%mm2\n\t" - "pand %%mm2, %1\n\t" - "por %%mm3, %1\n\t" - "pxor %1, %%mm3\n\t" - "packsswb %%mm3, %%mm3\n\t" - "movd %%mm3, %0" - : "=g" (t), "+y" (stable) - : "m" (full_h), "m" (full_v), "m" (full_d7), "m" (full_d9), "m" (P_central) - : "mm0", "mm1", "mm2", "mm3"); - } while (t); - - // bit_count(stable) -#ifdef POPCOUNT - __asm__ ( - "movd %1, %0\n\t" - "psrlq $32, %1\n\t" - "movd %1, %%edx\n\t" - "popcntl %0, %0\n\t" - "popcntl %%edx, %%edx\n\t" - "addl %%edx, %0" - : "=&a" (t) : "y" (stable) : "edx"); -#else - __asm__ ( - "movq %1, %%mm0\n\t" - "psrlq $1, %1\n\t" - "pand %2, %1\n\t" - "psubd %1, %%mm0\n\t" - - "movq %%mm0, %%mm1\n\t" - "psrlq $2, %%mm0\n\t" - "pand %3, %%mm1\n\t" - "pand %3, %%mm0\n\t" - "paddd %%mm1, %%mm0\n\t" - - "movq %%mm0, %%mm1\n\t" - "psrlq $4, %%mm0\n\t" - "paddd %%mm1, %%mm0\n\t" - "pand %4, %%mm0\n\t" - #ifdef hasSSE2 - "pxor %%mm1, %%mm1\n\t" - "psadbw %%mm1, %%mm0\n\t" - "movd %%mm0, %0\n\t" - #else - "movq %%mm0, %%mm1\n\t" - "psrlq $32, %%mm0\n\t" - "paddb %%mm1, %%mm0\n\t" - - "movd %%mm0, %0\n\t" - "imull $0x01010101, %0, %0\n\t" - "shrl $24, %0" - #endif - : "=a" (t) : "y" (stable), "m" (mask_55), "my" (mask_33), "m" (mask_0F) : "mm0", "mm1"); -#endif - } - __asm__ ( "emms" ); - return t; -} -#endif // USE_MSVC_X86 - -/** - * @brief MMX translation of get_potential_mobility - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return a count of potential moves. - */ -#ifdef USE_MSVC_X86 - -int get_potential_mobility_mmx(unsigned long long P, unsigned long long O) -{ - __m64 m, mO; - int count; - static const unsigned long long mask_v = 0x00ffffffffffff00ULL; - // static const unsigned long long mask_d = 0x007e7e7e7e7e7e00ULL; // = mask_7e & mask_v -#ifdef POPCOUNT - int mh, ml; -#else - static const unsigned long long mask_15 = 0x1555555555555515ULL; - static const unsigned long long mask_01 = 0x0100000000000001ULL; -#endif - - mO = _m_pand(*(__m64 *) &O, *(__m64 *) &mask_7e); - m = _m_por(_m_psllqi(mO, 1), _m_psrlqi(mO, 1)); - mO = _m_pand(*(__m64 *) &O, *(__m64 *) &mask_v); - m = _m_por(m, _m_por(_m_psllqi(mO, 8), _m_psrlqi(mO, 8))); - mO = _m_pand(mO, *(__m64 *) &mask_7e); - m = _m_por(m, _m_por(_m_psllqi(mO, 7), _m_psrlqi(mO, 7))); - m = _m_por(m, _m_por(_m_psllqi(mO, 9), _m_psrlqi(mO, 9))); - m = _m_pandn(_m_por(*(__m64 *) &O, *(__m64 *) &P), m); - -#ifdef POPCOUNT - ml = _m_to_int(m); - mh = _m_to_int(_m_psrlqi(m, 32)); - count = __popcnt(ml) + __popcnt(mh) + __popcnt((ml & 0x00000081) + (mh & 0x81000000)); -#else - m = _m_paddd(_m_psubd(m, _m_pand(_m_psrlqi(m, 1), *(__m64 *) &mask_15)), _m_pand(m, *(__m64 *) &mask_01)); - m = _m_paddd(_m_pand(m, *(__m64 *) &mask_33), _m_pand(_m_psrlqi(m, 2), *(__m64 *) &mask_33)); - m = _m_pand(_m_paddd(m, _m_psrlqi(m, 4)), *(__m64 *) &mask_0F); - count = ((unsigned int) _m_to_int(_m_paddb(m, _m_psrlqi(m, 32))) * 0x01010101u) >> 24; -#endif - _mm_empty(); - return count; -} - -#elif defined(USE_GAS_MMX) - -int get_potential_mobility_mmx(unsigned long long P, unsigned long long O) -{ - int count; - static const unsigned long long mask_v = 0x00ffffffffffff00ULL; - // static const unsigned long long mask_d = 0x007e7e7e7e7e7e00ULL; // = mask_7e & mask_v -#ifndef POPCOUNT - static const unsigned long long mask_15 = 0x1555555555555515ULL; - static const unsigned long long mask_01 = 0x0100000000000001ULL; -#endif - - __asm__ ( - "movq %3, %%mm2\n\t" "movq %4, %%mm5\n\t" - "pand %2, %%mm2\n\t" "pand %2, %%mm5\n\t" "movq %%mm2, %%mm3\n\t" - "movq %%mm2, %%mm4\n\t" "movq %%mm5, %%mm6\n\t" "pand %%mm5, %%mm3\n\t" - "psllq $1, %%mm2\n\t" "psllq $8, %%mm5\n\t" - "psrlq $1, %%mm4\n\t" "psrlq $8, %%mm6\n\t" - "por %%mm4, %%mm2\n\t" "por %%mm6, %%mm5\n\t" - "por %%mm5, %%mm2\n\t" - "movq %%mm3, %%mm5\n\t" - "movq %%mm3, %%mm4\n\t" "movq %%mm5, %%mm6\n\t" - "psllq $7, %%mm3\n\t" "psllq $9, %%mm5\n\t" - "psrlq $7, %%mm4\n\t" "psrlq $9, %%mm6\n\t" - "por %%mm4, %%mm3\n\t" "por %%mm6, %%mm5\n\t" - "por %%mm3, %%mm2\n\t" "por %%mm5, %%mm2\n\t" - "por %1, %2\n\t" - "pandn %%mm2, %2\n\t" - -#ifdef POPCOUNT - "movd %2, %%ecx\n\t" - "popcntl %%ecx, %0\n\t" "andl $0x00000081, %%ecx\n\t" - "psrlq $32, %2\n\t" "popcntl %%ecx, %%ecx\n\t" - "movd %2, %%edx\n\t" "addl %%ecx, %0\n\t" - "popcntl %%edx, %%ecx\n\t" "andl $0x81000000, %%edx\n\t" - "addl %%ecx, %0\n\t" "popcntl %%edx, %%edx\n\t" - "addl %%edx, %0\n\t" - "emms" - : "=g" (count) : "y" (P), "y" (O), "m" (mask_7e), "m" (mask_v) - : "ecx", "edx", "mm2", "mm3", "mm4", "mm5", "mm6"); - -#else - "movq %2, %1\n\t" "movq %2, %%mm2\n\t" - "psrlq $1, %2\n\t" - "pand %5, %2\n\t" "pand %6, %%mm2\n\t" - "psubd %2, %1\n\t" "paddd %%mm2, %1\n\t" - - "movq %1, %2\n\t" - "psrlq $2, %1\n\t" - "pand %7, %2\n\t" - "pand %7, %1\n\t" - "paddd %2, %1\n\t" - - "movq %1, %2\n\t" - "psrlq $4, %1\n\t" - "paddd %2, %1\n\t" - "pand %8, %1\n\t" - #ifdef hasSSE2 - "pxor %2, %2\n\t" - "psadbw %2, %1\n\t" - "movd %1, %0\n\t" - #else - "movq %1, %2\n\t" - "psrlq $32, %1\n\t" - "paddb %2, %1\n\t" - - "movd %1, %0\n\t" - "imull $0x01010101, %0, %0\n\t" - "shrl $24, %0\n\t" - #endif - "emms" - : "=g" (count) - : "y" (P), "y" (O), "m" (mask_7e), "m" (mask_v), - "m" (mask_15), "m" (mask_01), "m" (mask_33), "m" (mask_0F) - : "mm2", "mm3", "mm4", "mm5", "mm6"); -#endif - - return count; -} -#endif - -/** - * @brief MMX translation of board_get_hash_code. - * - * @param p pointer to 16 bytes to hash. - * @return the hash code of the bitboard - */ - -#if defined(USE_GAS_MMX) && defined(__3dNOW__) - -unsigned long long board_get_hash_code_mmx(const unsigned char *p) -{ - unsigned long long h; - - __asm__ volatile ( - "movq %0, %%mm0\n\t" "movq %1, %%mm1" - : : "m" (hash_rank[0][p[0]]), "m" (hash_rank[1][p[1]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[2][p[2]]), "m" (hash_rank[3][p[3]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[4][p[4]]), "m" (hash_rank[5][p[5]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[6][p[6]]), "m" (hash_rank[7][p[7]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[8][p[8]]), "m" (hash_rank[9][p[9]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[10][p[10]]), "m" (hash_rank[11][p[11]])); - __asm__ volatile ( - "pxor %0, %%mm0\n\t" "pxor %1, %%mm1" - : : "m" (hash_rank[12][p[12]]), "m" (hash_rank[13][p[13]])); - __asm__ volatile ( - "pxor %1, %%mm0\n\t" "pxor %2, %%mm1\n\t" - "pxor %%mm1, %%mm0\n\t" - "movd %%mm0, %%eax\n\t" - "punpckhdq %%mm0, %%mm0\n\t" - "movd %%mm0, %%edx\n\t" - "emms" - : "=A" (h) - : "m" (hash_rank[14][p[14]]), "m" (hash_rank[15][p[15]]) - : "mm0", "mm1"); - - return h; -} - -#endif // __3dNOW - -#if !defined(hasMMX) && defined(USE_GAS_MMX) - #pragma GCC pop_options -#endif ->>>>>>> 1dc032e (Improve visual c compatibility) -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/board_sse.c b/src/board_sse.c index 23acd3a..9a5ef40 100644 --- a/src/board_sse.c +++ b/src/board_sse.c @@ -1,90 +1,33 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file board_sse.c * * SSE/AVX translation of some board.c functions * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> b4fb773 (AVX optimized board_unique) * @date 2014 - 2024 * @author Toshihiko Okuhara * @version 4.5 -======= - * @date 2014 - 2020 - * @author Toshihiko Okuhara - * @version 4.4 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - * @date 2014 - 2022 -======= - * @date 2014 - 2023 ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) - * @author Toshihiko Okuhara - * @version 4.5 ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) */ #include "bit.h" #include "hash.h" #include "board.h" -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -#if defined(ANDROID) && !defined(HAS_CPU_64) && !defined(hasSSE2) -======= -#if defined(ANDROID) && !defined(hasNeon) && !defined(hasSSE2) ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= #if defined(ANDROID) && !defined(HAS_CPU_64) && !defined(hasSSE2) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) #include "android/cpu-features.h" bool hasSSE2 = false; void init_neon (void) { -<<<<<<< HEAD -<<<<<<< HEAD #ifdef __arm__ -======= -#ifdef __arm__ ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= - #ifdef __arm__ ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) if (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) { #if (MOVE_GENERATOR == MOVE_GENERATOR_BITSCAN) extern unsigned long long (*flip_neon[66])(const unsigned long long, const unsigned long long); memcpy(flip, flip_neon, sizeof(flip_neon)); #endif -<<<<<<< HEAD -<<<<<<< HEAD hasSSE2 = true; // for eval_update_sse } #elif defined(__i386__) // android x86 w/o SSE2 - uncommon and not tested -<<<<<<< HEAD -======= - hasSSE2 = true; -======= - hasSSE2 = true; // for eval_update_sse ->>>>>>> e3cea41 (New vectored bit_weighted_count_sse) - } -<<<<<<< HEAD -#else // android x86 w/o SSE2 - uncommon and not tested ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= - #else // android x86 w/o SSE2 - uncommon and not tested ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) int cpuid_edx, cpuid_ecx; __asm__ ( "movl $1, %%eax\n\t" @@ -92,41 +35,18 @@ void init_neon (void) : "=d" (cpuid_edx), "=c" (cpuid_ecx) :: "%eax", "%ebx" ); if ((cpuid_edx & 0x04000000u) != 0) hasSSE2 = true; -<<<<<<< HEAD -<<<<<<< HEAD #endif } #endif -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -#endif -======= - #endif ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) -} -#endif - ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) /** * @brief SSE2 translation of board_symetry * * @param board input board -<<<<<<< HEAD -<<<<<<< HEAD -======= - * @param s symetry ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 6bc747d (Split board_flip_* from board_symetry) * @param sym symetric output board */ #ifdef hasSSE2 -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD static __m128i vectorcall board_horizontal_mirror_sse(__m128i bb) { const __m128i mask0F0F = _mm_set1_epi16(0x0F0F); @@ -315,202 +235,19 @@ int board_unique(const Board *board, Board *unique) } #endif -<<<<<<< HEAD -/** - * @brief Compute a board resulting of a move played on a previous board. - * - * @param OP board to play the move on. -======= -void board_symetry(const Board *board, const int s, Board *sym) -======= -void board_horizontal_mirror(const Board *board, Board *sym) ->>>>>>> 6bc747d (Split board_flip_* from board_symetry) -{ - __m128i bb = _mm_loadu_si128((__m128i *) board); -======= -static __m128i vectorcall board_horizontal_mirror_sse(__m128i bb) -{ ->>>>>>> a23c3d4 (SSE optimized board_symetry again) - const __m128i mask0F0F = _mm_set1_epi16(0x0F0F); - #if defined(__SSSE3__) || defined(__AVX__) // pshufb (cf. http://wm.ite.pl/articles/sse-popcount.html) - const __m128i mbitrev = _mm_set_epi8(15, 7, 11, 3, 13, 5, 9, 1, 14, 6, 10, 2, 12, 4, 8, 0); - bb = _mm_or_si128(_mm_shuffle_epi8(mbitrev, _mm_and_si128(_mm_srli_epi64(bb, 4), mask0F0F)), - _mm_slli_epi64(_mm_shuffle_epi8(mbitrev, _mm_and_si128(bb, mask0F0F)), 4)); - #else - const __m128i mask5555 = _mm_set1_epi16(0x5555); - const __m128i mask3333 = _mm_set1_epi16(0x3333); - bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 1), mask5555), _mm_slli_epi64(_mm_and_si128(bb, mask5555), 1)); - bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 2), mask3333), _mm_slli_epi64(_mm_and_si128(bb, mask3333), 2)); - bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 4), mask0F0F), _mm_slli_epi64(_mm_and_si128(bb, mask0F0F), 4)); - #endif - return bb; -} - -void board_horizontal_mirror(const Board *board, Board *sym) -{ - _mm_storeu_si128((__m128i *) sym, board_horizontal_mirror_sse(_mm_loadu_si128((__m128i *) board))); -} - -static __m128i vectorcall board_vertical_mirror_sse(__m128i bb) -{ - #if defined(__SSSE3__) || defined(__AVX__) // pshufb - return _mm_shuffle_epi8(bb, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); - #else - bb = _mm_or_si128(_mm_srli_epi16(bb, 8), _mm_slli_epi16(bb, 8)); - return _mm_shufflehi_epi16(_mm_shufflelo_epi16(bb, 0x1b), 0x1b); - #endif -} - -void board_vertical_mirror(const Board *board, Board *sym) -{ - #if defined(__SSSE3__) || defined(__AVX__) || !defined(HAS_CPU_64) - _mm_storeu_si128((__m128i *) sym, board_vertical_mirror_sse(_mm_loadu_si128((__m128i *) board))); - #else // use BSWAP64 - sym->player = vertical_mirror(board->player); - sym->opponent = vertical_mirror(board->opponent); - #endif -} - -static __m128i vectorcall board_transpose_sse(__m128i bb) -{ - const __m128i mask00AA = _mm_set1_epi16(0x00AA); - const __m128i maskCCCC = _mm_set1_epi32(0x0000CCCC); - const __m128i mask00F0 = _mm_set1_epi64x(0x00000000F0F0F0F0); - __m128i tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 7)), mask00AA); - bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 7)); - tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 14)), maskCCCC); - bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 14)); - tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 28)), mask00F0); - bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 28)); - return bb; -} - -void board_transpose(const Board *board, Board *sym) -{ - _mm_storeu_si128((__m128i *) sym, board_transpose_sse(_mm_loadu_si128((__m128i *) board))); -} - -void board_symetry(const Board *board, const int s, Board *sym) -{ - __m128i bb = _mm_loadu_si128((__m128i *) board); - if (s & 1) - bb = board_horizontal_mirror_sse(bb); - if (s & 2) - bb = board_vertical_mirror_sse(bb); - if (s & 4) - bb = board_transpose_sse(bb); - - _mm_storeu_si128((__m128i *) sym, bb); - board_check(sym); -} - -#elif defined(__ARM_NEON) && !defined(DISPATCH_NEON) - -static uint64x2_t board_horizontal_mirror_neon(uint64x2_t bb) -{ - #ifdef HAS_CPU_64 - bb = vreinterpretq_u64_u8(vrbitq_u8(vreinterpretq_u8_u64(bb))); - #else - bb = vbslq_u64(vdupq_n_u64(0x5555555555555555), vshrq_n_u64(bb, 1), vshlq_n_u64(bb, 1)); - bb = vbslq_u64(vdupq_n_u64(0x3333333333333333), vshrq_n_u64(bb, 2), vshlq_n_u64(bb, 2)); - bb = vreinterpretq_u64_u8(vsliq_n_u8(vshrq_n_u8(vreinterpretq_u8_u64(bb), 4), vreinterpretq_u8_u64(bb), 4)); - #endif - return bb; -} - -void board_horizontal_mirror(const Board *board, Board *sym) -{ - vst1q_u64((uint64_t *) sym, board_horizontal_mirror_neon(vld1q_u64((uint64_t *) board))); -} - -static uint64x2_t board_vertical_mirror_neon(uint64x2_t bb) -{ - return vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(bb))); -} - -void board_vertical_mirror(const Board *board, Board *sym) -{ - vst1q_u64((uint64_t *) sym, board_vertical_mirror_neon(vld1q_u64((uint64_t *) board))); -} - -static uint64x2_t board_transpose_neon(uint64x2_t bb) -{ - uint64x2_t tt = vandq_u64(veorq_u64(bb, vshrq_n_u64(bb, 7)), vdupq_n_u64(0x00AA00AA00AA00AA)); - bb = veorq_u64(veorq_u64(bb, tt), vshlq_n_u64(tt, 7)); - tt = vandq_u64(veorq_u64(bb, vshrq_n_u64(bb, 14)), vdupq_n_u64(0x0000CCCC0000CCCC)); - bb = veorq_u64(veorq_u64(bb, tt), vshlq_n_u64(tt, 14)); - tt = vandq_u64(veorq_u64(bb, vshrq_n_u64(bb, 28)), vdupq_n_u64(0x00000000F0F0F0F0)); - bb = veorq_u64(veorq_u64(bb, tt), vshlq_n_u64(tt, 28)); - return bb; -} - -void board_transpose(const Board *board, Board *sym) -{ - vst1q_u64((uint64_t *) sym, board_transpose_neon(vld1q_u64((uint64_t *) board))); -} - -void board_symetry(const Board *board, const int s, Board *sym) -{ - uint64x2_t bb = vld1q_u64((uint64_t *) board); - if (s & 1) - bb = board_horizontal_mirror_neon(bb); - if (s & 2) - bb = board_vertical_mirror_neon(bb); - if (s & 4) - bb = board_transpose_neon(bb); - - vst1q_u64((uint64_t *) sym, bb); - board_check(sym); -} - -#endif // hasSSE2/Neon - -======= ->>>>>>> b4fb773 (AVX optimized board_unique) /** * @brief Compute a board resulting of a move played on a previous board. * -<<<<<<< HEAD - * @param board board to play the move on. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= * @param OP board to play the move on. ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) * @param x move to play. * @param next resulting board. * @return flipped discs. */ -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE) -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM) unsigned long long vectorcall board_next_sse(__m128i OP, const int x, Board *next) { __m128i flipped = reduce_vflip(mm_Flip(OP, x)); -======= -======= -#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE) -======= -#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_AVX512) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE) ->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame) - ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -unsigned long long board_next(const Board *board, const int x, Board *next) -======= -unsigned long long vectorcall vboard_next(__m128i OP, const int x, Board *next) ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) -{ -<<<<<<< HEAD - __m128i flipped = mm_Flip(OP, x); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - __m128i flipped = reduce_vflip(mm_Flip(OP, x)); ->>>>>>> a2d40bc (AVX flip reduction after TESTZ in endgame_sse.c) OP = _mm_xor_si128(OP, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x]))); _mm_storeu_si128((__m128i *) next, _mm_shuffle_epi32(OP, 0x4e)); @@ -518,27 +255,6 @@ unsigned long long vectorcall vboard_next(__m128i OP, const int x, Board *next) return _mm_cvtsi128_si64(flipped); } -<<<<<<< HEAD -<<<<<<< HEAD -#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON - -unsigned long long board_next_neon(uint64x2_t OP, const int x, Board *next) -{ - uint64x2_t flipped = mm_Flip(OP, x); - #if !defined(_MSC_VER) && !defined(__clang__) // MSVC-arm32 does not have vld1q_lane_u64 - // arm64-gcc-13: 21, armv8a-clang-16: 23, msvc-arm64-19: 22, gcc-arm-13: 18, clang-armv7-11: 29 // https://godbolt.org/z/cvhns39rK - OP = veorq_u64(OP, vorrq_u64(flipped, vld1q_lane_u64((uint64_t *) &X_TO_BIT[x], flipped, 0))); - vst1q_u64((uint64_t *) next, vextq_u64(OP, OP, 1)); - #else // arm64-gcc-13: 21, armv8a-clang-16: 22, msvc-arm64-19: 21, gcc-arm-13: 23, clang-armv7-11: 27 - OP = veorq_u64(OP, flipped); - vst1q_u64((uint64_t *) next, vcombine_u64(vget_high_u64(OP), vorr_u64(vget_low_u64(OP), vld1_u64((uint64_t *) &X_TO_BIT[x])))); - #endif - return vgetq_lane_u64(flipped, 0); -} -#endif - -======= -======= #elif MOVE_GENERATOR == MOVE_GENERATOR_NEON unsigned long long board_next_neon(uint64x2_t OP, const int x, Board *next) @@ -556,56 +272,7 @@ unsigned long long board_next_neon(uint64x2_t OP, const int x, Board *next) } #endif ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -/** -<<<<<<< HEAD - * @brief Compute a board resulting of an opponent move played on a previous board. - * - * Compute the board after passing and playing a move. - * - * @param board board to play the move on. - * @param x opponent move to play. - * @param next resulting board. - * @return flipped discs. - */ -#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE) - -unsigned long long board_pass_next(const Board *board, const int x, Board *next) -{ - __m128i PO = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *) board), 0x4e); - __m128i flipped = mm_Flip(PO, x); - - PO = _mm_xor_si128(PO, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x]))); - _mm_storeu_si128((__m128i *) next, _mm_shuffle_epi32(PO, 0x4e)); - - return _mm_cvtsi128_si64(flipped); -} - -#elif MOVE_GENERATOR == MOVE_GENERATOR_NEON - -unsigned long long board_pass_next(const Board *board, const int x, Board *next) -{ - uint64x2_t OP = vld1q_u64((uint64_t *) board); - uint64x2_t PO = vextq_u64(OP, OP, 1); - uint64x2_t flipped = mm_Flip(PO, x); - -#ifdef HAS_CPU_64 // vld1q_lane_u64 - PO = veorq_u64(PO, vorrq_u64(flipped, vld1q_lane_u64((uint64_t *) &X_TO_BIT[x], flipped, 0))); - vst1q_u64((uint64_t *) next, vextq_u64(PO, PO, 1)); -#else - PO = veorq_u64(OP, flipped); - vst1_u64(&next->player, vget_high_u64(PO)); - vst1_u64(&next->opponent, vorr_u64(vget_low_u64(PO), vld1_u64(&X_TO_BIT[x]))); -#endif - return vgetq_lane_u64(flipped, 0); -} - -#endif - ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** -======= ->>>>>>> 23e04d1 (Backport endgame_sse optimizations into endgame.c) * @brief X64 optimized get_moves * * Diag-7 is converted to diag-9 (v.v.) using vertical mirroring @@ -617,53 +284,10 @@ unsigned long long board_pass_next(const Board *board, const int x, Board *next) */ #ifdef __AVX2__ // 4 AVX -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #if defined(_MSC_VER) || defined(__linux__) // vectorcall and SYSV-ABI passes __m256i in registers -======= -#if (vBoard == __m128i) && (defined(_MSC_VER) || defined(__linux__)) // vectorcall and SYSV-ABI passes __m256i in registers ->>>>>>> 78ce5d7 (more precise rboard/vboard opt; reexamine neon vboard_next) -======= - #if (defined(_MSC_VER) || defined(__linux__)) // vectorcall and SYSV-ABI passes __m256i in registers ->>>>>>> 7bd8076 (vboard opt using union V2DI; MSVC can assign it to XMM) -======= - #if defined(_MSC_VER) || defined(__linux__) // vectorcall and SYSV-ABI passes __m256i in registers ->>>>>>> f6ae8a3 (Drop some excessive 32bit optimizations) -unsigned long long vectorcall get_moves_avx(__m256i PP, __m256i OO) -{ - #else -unsigned long long get_moves(unsigned long long P, unsigned long long O) // minGW -{ - __m256i PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P)); - __m256i OO = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(O)); - #endif - __m256i MM, flip_l, flip_r, pre_l, pre_r, shift2; - __m128i M; - const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1); - __m256i mOO = _mm256_and_si256(OO, _mm256_set_epi64x(0x007E7E7E7E7E7E00, 0x007E7E7E7E7E7E00, 0x00FFFFFFFFFFFF00, 0x7E7E7E7E7E7E7E7E)); - __m128i occupied = _mm_or_si128(_mm256_castsi256_si128(PP), _mm256_castsi256_si128(OO)); -======= -unsigned long long get_moves(const unsigned long long P, const unsigned long long O) -======= -#if defined(_MSC_VER) || defined(__clang__) -======= -#if defined(_MSC_VER) || defined(__linux__) // vectorcall and SYSV-ABI passes __m256i in registers ->>>>>>> 29ed6b6 (Include gcc linux to get_moves_avx with mm256 params) -unsigned long long vectorcall get_moves_avx(__m256i PP, __m256i OO) ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) -{ -#else -======= - #if (vBoard == __m128i) && (defined(_MSC_VER) || defined(__linux__)) // vectorcall and SYSV-ABI passes __m256i in registers unsigned long long vectorcall get_moves_avx(__m256i PP, __m256i OO) { #else ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) unsigned long long get_moves(unsigned long long P, unsigned long long O) // minGW { __m256i PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P)); @@ -672,20 +296,8 @@ unsigned long long get_moves(unsigned long long P, unsigned long long O) // minG __m256i MM, flip_l, flip_r, pre_l, pre_r, shift2; __m128i M; const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1); -<<<<<<< HEAD -<<<<<<< HEAD - const __m256i mflipH = _mm256_set_epi64x(0x7e7e7e7e7e7e7e7e, 0x7e7e7e7e7e7e7e7e, -1, 0x7e7e7e7e7e7e7e7e); - - PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P)); - mOO = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(O)), mflipH); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - __m256i mOO = _mm256_and_si256(OO, _mm256_set_epi64x(0x7e7e7e7e7e7e7e7e, 0x7e7e7e7e7e7e7e7e, -1, 0x7e7e7e7e7e7e7e7e)); -======= __m256i mOO = _mm256_and_si256(OO, _mm256_set_epi64x(0x007E7E7E7E7E7E00, 0x007E7E7E7E7E7E00, 0x00FFFFFFFFFFFF00, 0x7E7E7E7E7E7E7E7E)); ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) __m128i occupied = _mm_or_si128(_mm256_castsi256_si128(PP), _mm256_castsi256_si128(OO)); ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) flip_l = _mm256_and_si256(mOO, _mm256_sllv_epi64(PP, shift1897)); flip_r = _mm256_and_si256(mOO, _mm256_srlv_epi64(PP, shift1897)); @@ -698,27 +310,10 @@ unsigned long long get_moves(unsigned long long P, unsigned long long O) // minG flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2))); flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2))); flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2))); -<<<<<<< HEAD -<<<<<<< HEAD - MM = _mm256_or_si256(_mm256_sllv_epi64(flip_l, shift1897), _mm256_srlv_epi64(flip_r, shift1897)); - - M = _mm_or_si128(_mm256_castsi256_si128(MM), _mm256_extracti128_si256(MM, 1)); - return _mm_cvtsi128_si64(_mm_andnot_si128(occupied, _mm_or_si128(M, _mm_unpackhi_epi64(M, M)))); // mask with empties -======= - MM = _mm256_sllv_epi64(flip_l, shift1897); - MM = _mm256_or_si256(MM, _mm256_srlv_epi64(flip_r, shift1897)); -======= MM = _mm256_or_si256(_mm256_sllv_epi64(flip_l, shift1897), _mm256_srlv_epi64(flip_r, shift1897)); ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) M = _mm_or_si128(_mm256_castsi256_si128(MM), _mm256_extracti128_si256(MM, 1)); -<<<<<<< HEAD - M = _mm_or_si128(M, _mm_unpackhi_epi64(M, M)); - return _mm_cvtsi128_si64(M) & ~(P|O); // mask with empties ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= return _mm_cvtsi128_si64(_mm_andnot_si128(occupied, _mm_or_si128(M, _mm_unpackhi_epi64(M, M)))); // mask with empties ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) } #elif defined(__x86_64__) || defined(_M_X64) // 2 SSE, 2 CPU @@ -750,15 +345,7 @@ unsigned long long get_moves(const unsigned long long P, const unsigned long lon return moves & ~(P|O); // mask with empties } -<<<<<<< HEAD -<<<<<<< HEAD -#elif defined(__aarch64__) || defined(_M_ARM64) // 4 CPU -======= -#elif 0 // 4 CPU ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #elif defined(__aarch64__) || defined(_M_ARM64) // 4 CPU ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) unsigned long long get_moves(const unsigned long long P, const unsigned long long O) { @@ -782,26 +369,9 @@ unsigned long long get_moves(const unsigned long long P, const unsigned long lon return moves & ~(P|O); // mask with empties } -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD #elif defined(__ARM_NEON) // 3 Neon, 1 CPU(32) #ifndef DISPATCH_NEON -<<<<<<< HEAD - #define get_moves_sse get_moves // no dispatch - #endif -======= -#else // __x86_64__ -======= -#elif defined(__ARM_NEON__) // 3 Neon, 1 CPU(32) -======= -#elif defined(__ARM_NEON) // 3 Neon, 1 CPU(32) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) - - #ifdef hasNeon -======= ->>>>>>> b1cae3c (Rewrite AVX512 LASTFLIP_HIGHCUT not to use kortest) #define get_moves_sse get_moves // no dispatch #endif @@ -847,109 +417,29 @@ unsigned long long get_moves_sse(unsigned long long P, unsigned long long O) } #else // AVX/x86_64/arm ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) /** * @brief SSE optimized get_moves for x86 - 3 SSE, 1 CPU(32) * */ -<<<<<<< HEAD -<<<<<<< HEAD -#if defined(hasSSE2) || defined(USE_MSVC_X86) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -#if defined(hasSSE2) || defined(USE_MSVC_X86) || defined(ANDROID) + #if defined(hasSSE2) || defined(USE_MSVC_X86) || defined(ANDROID) -#ifdef hasSSE2 -#define get_moves_sse get_moves // no dispatch -#endif ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) + #ifdef hasSSE2 + #define get_moves_sse get_moves // no dispatch + #endif unsigned long long get_moves_sse(const unsigned long long P, const unsigned long long O) { unsigned int mO, movesL, movesH, flip1, pre1; -<<<<<<< HEAD - uint64x1_t rP, rO; - uint64x2_t PP, OO, MM, flip, pre; + __m128i OP, rOP, PP, OO, MM, flip, pre; - /* vertical_mirror in PP[1], OO[1] */ mO = (unsigned int) O & 0x7e7e7e7e; - rP = vreinterpret_u64_u8(vrev64_u8(vcreate_u8(P))); flip1 = mO & ((unsigned int) P << 1); - PP = vcombine_u64(vcreate_u64(P), rP); flip1 |= mO & (flip1 << 1); - pre1 = mO & (mO << 1); - rO = vreinterpret_u64_u8(vrev64_u8(vcreate_u8(O))); flip1 |= pre1 & (flip1 << 2); - OO = vcombine_u64(vcreate_u64(O), rO); flip1 |= pre1 & (flip1 << 2); - movesL = flip1 << 1; - - flip = vandq_u64(OO, vshlq_n_u64(PP, 8)); flip1 = mO & ((unsigned int) P >> 1); - flip = vorrq_u64(flip, vandq_u64(OO, vshlq_n_u64(flip, 8))); flip1 |= mO & (flip1 >> 1); - pre = vandq_u64(OO, vshlq_n_u64(OO, 8)); pre1 >>= 1; - flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 16))); flip1 |= pre1 & (flip1 >> 2); - flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 16))); flip1 |= pre1 & (flip1 >> 2); - MM = vshlq_n_u64(flip, 8); movesL |= flip1 >> 1; - - OO = vandq_u64(OO, vdupq_n_u64(0x7e7e7e7e7e7e7e7e)); mO = (unsigned int) (O >> 32) & 0x7e7e7e7e; - flip = vandq_u64(OO, vshlq_n_u64(PP, 7)); flip1 = mO & ((unsigned int) (P >> 32) << 1); - flip = vorrq_u64(flip, vandq_u64(OO, vshlq_n_u64(flip, 7))); flip1 |= mO & (flip1 << 1); - pre = vandq_u64(OO, vshlq_n_u64(OO, 7)); pre1 = mO & (mO << 1); - flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 14))); flip1 |= pre1 & (flip1 << 2); - flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 14))); flip1 |= pre1 & (flip1 << 2); - MM = vorrq_u64(MM, vshlq_n_u64(flip, 7)); movesH = flip1 << 1; - - flip = vandq_u64(OO, vshlq_n_u64(PP, 9)); flip1 = mO & ((unsigned int) (P >> 32) >> 1); - flip = vorrq_u64(flip, vandq_u64(OO, vshlq_n_u64(flip, 9))); flip1 |= mO & (flip1 >> 1); - pre = vandq_u64(OO, vshlq_n_u64(OO, 9)); pre1 >>= 1; - flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 18))); flip1 |= pre1 & (flip1 >> 2); - flip = vorrq_u64(flip, vandq_u64(pre, vshlq_n_u64(flip, 18))); flip1 |= pre1 & (flip1 >> 2); - MM = vorrq_u64(MM, vshlq_n_u64(flip, 9)); movesH |= flip1 >> 1; - - movesL |= vgetq_lane_u32(vreinterpretq_u32_u64(MM), 0) | bswap_int(vgetq_lane_u32(vreinterpretq_u32_u64(MM), 3)); - movesH |= vgetq_lane_u32(vreinterpretq_u32_u64(MM), 1) | bswap_int(vgetq_lane_u32(vreinterpretq_u32_u64(MM), 2)); - return (movesL | ((unsigned long long) movesH << 32)) & ~(P|O); // mask with empties -} - -#else // AVX/x86_64/arm -/** - * @brief SSE optimized get_moves for x86 - 3 SSE, 1 CPU(32) - * - */ - #if defined(hasSSE2) || defined(USE_MSVC_X86) || defined(ANDROID) - -======= - #if defined(hasSSE2) || defined(USE_MSVC_X86) || defined(ANDROID) - ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) - #ifdef hasSSE2 - #define get_moves_sse get_moves // no dispatch - #endif - -unsigned long long get_moves_sse(const unsigned long long P, const unsigned long long O) -{ - unsigned int mO, movesL, movesH, flip1, pre1; - __m128i OP, rOP, PP, OO, MM, flip, pre; -======= - __m128i OP, rOP, PP, OO, MM, flip, pre; -<<<<<<< HEAD - const __m128i mask7e = _mm_set1_epi8(0x7e); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) - - // vertical_mirror in PP[1], OO[1] - OP = _mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O)); mO = (unsigned int) O & 0x7e7e7e7eU; - rOP = _mm_shufflelo_epi16(OP, 0x1B); flip1 = mO & ((unsigned int) P << 1); - rOP = _mm_shufflehi_epi16(rOP, 0x1B); flip1 |= mO & (flip1 << 1); -<<<<<<< HEAD -<<<<<<< HEAD - rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8)); pre1 = mO & (mO << 1); -======= - pre1 = mO & (mO << 1); - rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8)); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8)); pre1 = mO & (mO << 1); ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) - flip1 |= pre1 & (flip1 << 2); - PP = _mm_unpacklo_epi64(OP, rOP); flip1 |= pre1 & (flip1 << 2); - OO = _mm_unpackhi_epi64(OP, rOP); movesL = flip1 << 1; + // vertical_mirror in PP[1], OO[1] + OP = _mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O)); mO = (unsigned int) O & 0x7e7e7e7eU; + rOP = _mm_shufflelo_epi16(OP, 0x1B); flip1 = mO & ((unsigned int) P << 1); + rOP = _mm_shufflehi_epi16(rOP, 0x1B); flip1 |= mO & (flip1 << 1); + rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8)); pre1 = mO & (mO << 1); + flip1 |= pre1 & (flip1 << 2); + PP = _mm_unpacklo_epi64(OP, rOP); flip1 |= pre1 & (flip1 << 2); + OO = _mm_unpackhi_epi64(OP, rOP); movesL = flip1 << 1; flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 8)); flip1 = mO & ((unsigned int) P >> 1); flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 8))); flip1 |= mO & (flip1 >> 1); @@ -958,15 +448,7 @@ unsigned long long get_moves_sse(const unsigned long long P, const unsigned long flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 16))); flip1 |= pre1 & (flip1 >> 2); MM = _mm_slli_epi64(flip, 8); movesL |= flip1 >> 1; -<<<<<<< HEAD -<<<<<<< HEAD - OO = _mm_and_si128(OO, _mm_set1_epi8(0x7e)); mO = (unsigned int) (O >> 32) & 0x7e7e7e7eU; -======= - OO = _mm_and_si128(OO, mask7e); mO = (unsigned int) (O >> 32) & 0x7e7e7e7eU; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= OO = _mm_and_si128(OO, _mm_set1_epi8(0x7e)); mO = (unsigned int) (O >> 32) & 0x7e7e7e7eU; ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 7)); flip1 = mO & ((unsigned int) (P >> 32) << 1); flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 7))); flip1 |= mO & (flip1 << 1); pre = _mm_and_si128(OO, _mm_slli_epi64(OO, 7)); pre1 = mO & (mO << 1); @@ -988,22 +470,9 @@ unsigned long long get_moves_sse(const unsigned long long P, const unsigned long return (movesL | ((unsigned long long) movesH << 32)) & ~(P|O); // mask with empties } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) #else // non-VEX asm unsigned long long get_moves_sse(const unsigned long long P, const unsigned long long O) -======= -#else // non-VEX asm - -<<<<<<< HEAD -unsigned long long get_moves_sse(unsigned long long P, unsigned long long O) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -unsigned long long get_moves_sse(const unsigned long long P, const unsigned long long O) ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) { unsigned long long moves; static const V2DI mask7e = {{ 0x7e7e7e7e7e7e7e7eULL, 0x7e7e7e7e7e7e7e7eULL }}; @@ -1113,50 +582,19 @@ unsigned long long get_moves_sse(const unsigned long long P, const unsigned long return moves; } -<<<<<<< HEAD -<<<<<<< HEAD #endif // hasSSE2 #endif // x86 -<<<<<<< HEAD -<<<<<<< HEAD #if defined(hasSSE2) || (defined(__ARM_NEON) && !defined(DISPATCH_NEON)) /** * @brief SSE/neon optimized get_stable_edge -======= -#endif // hasSSE2 -======= - #endif // hasSSE2 ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) -#endif // x86 - -#if defined(hasSSE2) || defined(hasNeon) // no dispatch - -/** - * @brief SSE optimized get_stable_edge ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -#if defined(hasSSE2) || defined(hasNeon) -======= -#if defined(hasSSE2) || (defined(__ARM_NEON) && !defined(DISPATCH_NEON)) ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) - -/** - * @brief SSE/neon optimized get_stable_edge ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) * * @param P bitboard with player's discs. * @param O bitboard with opponent's discs. * @return a bitboard with (some of) player's stable discs. * */ -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) #if defined(__aarch64__) || defined(_M_ARM64) // for vaddvq unsigned long long get_stable_edge(unsigned long long P, unsigned long long O) { // compute the exact stable edges (from precomputed tables) @@ -1168,7 +606,6 @@ unsigned long long get_stable_edge(unsigned long long P, unsigned long long O) return edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)] | (unsigned long long) edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 56 | unpackA2A7(a1a8) | unpackH2H7(h1h8); -<<<<<<< HEAD } #elif defined(__ARM_NEON) // Neon kindergarten @@ -1195,75 +632,6 @@ unsigned long long get_stable_edge(const unsigned long long P, const unsigned lo { // compute the exact stable edges (from precomputed tables) unsigned int a1a8, h1h8; -======= -static unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O) -======= -#if defined(__aarch64__) || defined(_M_ARM64) -unsigned long long get_stable_edge(unsigned long long P, unsigned long long O) -======= -#if defined(__aarch64__) || defined(_M_ARM64) // for vaddvq -<<<<<<< HEAD -unsigned long long get_stable_edge_sse(unsigned long long P, unsigned long long O) ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= -unsigned long long get_stable_edge(unsigned long long P, unsigned long long O) ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) -{ // compute the exact stable edges (from precomputed tables) - // const int16x8_t shiftv = { 0, 1, 2, 3, 4, 5, 6, 7 }; // error on MSVC - const uint64x2_t shiftv = { 0x0003000200010000, 0x0007000600050004 }; - uint8x16_t PO = vzip1q_u8(vreinterpretq_u8_u64(vdupq_n_u64(O)), vreinterpretq_u8_u64(vdupq_n_u64(P))); - unsigned int a1a8 = edge_stability[vaddvq_u16(vshlq_u16(vreinterpretq_u16_u8(vandq_u8(PO, vdupq_n_u8(1))), vreinterpretq_s16_u64(shiftv)))]; - unsigned int h1h8 = edge_stability[vaddvq_u16(vshlq_u16(vreinterpretq_u16_u8(vshrq_n_u8(PO, 7)), vreinterpretq_s16_u64(shiftv)))]; - return edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)] - | (unsigned long long) edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 56 - | unpackA1A8(a1a8) | unpackH1H8(h1h8); -======= ->>>>>>> 21206f2 (Exclude corners from unpackA2A7/H2H7 to ease CPU_64 kindergarten) -} - - #elif defined(hasNeon) // Neon kindergarten -unsigned long long get_stable_edge(unsigned long long P, unsigned long long O) -{ // compute the exact stable edges (from precomputed tables) - const uint64x2_t kMul = { 0x1020408001020408, 0x1020408001020408 }; - uint64x2_t PP = vcombine_u64(vshl_n_u64(vcreate_u64(P), 7), vcreate_u64(P)); - uint64x2_t OO = vcombine_u64(vshl_n_u64(vcreate_u64(O), 7), vcreate_u64(O)); - uint32x4_t QP = vmulq_u32(vreinterpretq_u32_u64(kMul), vreinterpretq_u32_u8(vshrq_n_u8(vreinterpretq_u8_u64(PP), 7))); - uint32x4_t QO = vmulq_u32(vreinterpretq_u32_u64(kMul), vreinterpretq_u32_u8(vshrq_n_u8(vreinterpretq_u8_u64(OO), 7))); - uint32x2_t DP = vpadd_u32(vget_low_u32(QP), vget_high_u32(QP)); // P_h1h8 * * * P_a1a8 * * * - uint32x2_t DO = vpadd_u32(vget_low_u32(QO), vget_high_u32(QO)); // O_h1h8 * * * O_a1a8 * * * - uint8x8_t DB = vtrn_u8(vreinterpret_u8_u32(DO), vreinterpret_u8_u32(DP)).val[1]; // P_h1h8 O_h1h8 * * P_a1a8 O_a1a8 * * - unsigned int a1a8 = edge_stability[vget_lane_u16(vreinterpret_u16_u8(DB), 1)]; - unsigned int h1h8 = edge_stability[vget_lane_u16(vreinterpret_u16_u8(DB), 3)]; - uint8x16_t PO = vzipq_u8(vreinterpretq_u8_u64(OO), vreinterpretq_u8_u64(PP)).val[1]; - return edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)] - | (unsigned long long) edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 56 - | unpackA2A7(a1a8) | unpackH2H7(h1h8); -} - -<<<<<<< HEAD -<<<<<<< HEAD -#elif defined(__x86_64__) || defined(_M_X64) -======= - #elif defined(hasSSE2) ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) -unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O) ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= -#elif defined(hasSSE2) || defined(USE_MSVC_X86) -<<<<<<< HEAD -unsigned long long get_stable_edge_sse(const unsigned long long P, const unsigned long long O) ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= -unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O) ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) -{ - // compute the exact stable edges (from precomputed tables) -<<<<<<< HEAD - unsigned int a1a8po, h1h8po; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - unsigned int a1a8, h1h8; ->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8) unsigned long long stable_edge; __m128i P0 = _mm_cvtsi64_si128(P); @@ -1273,8 +641,6 @@ unsigned long long get_stable_edge(const unsigned long long P, const unsigned lo | ((unsigned long long) edge_stability[_mm_extract_epi16(PO, 7)] << 56); PO = _mm_unpacklo_epi64(O0, P0); -<<<<<<< HEAD -<<<<<<< HEAD a1a8 = edge_stability[_mm_movemask_epi8(_mm_slli_epi64(PO, 7))]; h1h8 = edge_stability[_mm_movemask_epi8(PO)]; stable_edge |= unpackA2A7(a1a8) | unpackH2H7(h1h8); @@ -1282,13 +648,11 @@ unsigned long long get_stable_edge(const unsigned long long P, const unsigned lo return stable_edge; } #endif -<<<<<<< HEAD /** * @brief SSE/neon optimized get_edge_stability * * Compute the exact stable edges from precomputed tables. -<<<<<<< HEAD * * @param P bitboard with player's discs. * @param O bitboard with opponent's discs. @@ -1341,102 +705,11 @@ int get_edge_stability(const unsigned long long P, const unsigned long long O) /** * @brief AVX2/SSE/neon optimized get_full_lines. -======= - a1a8po = _mm_movemask_epi8(_mm_slli_epi64(PO, 7)); - h1h8po = _mm_movemask_epi8(PO); -#if 0 // def __BMI2__ // pdep is slow on AMD - stable_edge |= _pdep_u64(edge_stability[a1a8po], 0x0101010101010101) - | _pdep_u64(edge_stability[h1h8po], 0x8080808080808080); -#else - stable_edge |= A1_A8[edge_stability[a1a8po]] | (A1_A8[edge_stability[h1h8po]] << 7); -#endif -======= - a1a8 = edge_stability[_mm_movemask_epi8(_mm_slli_epi64(PO, 7))]; - h1h8 = edge_stability[_mm_movemask_epi8(PO)]; - stable_edge |= unpackA2A7(a1a8) | unpackH2H7(h1h8); - ->>>>>>> 93110ce (Use computation or optional pdep to unpack A1_A8) - return stable_edge; -} -#endif -======= ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) - -/** -<<<<<<< HEAD -<<<<<<< HEAD - * @brief X64 optimized get_stability ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -======= - * @brief SSE optimized get_edge_stability -======= ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return the number of stable discs on the edges. - * - */ - #if defined(__aarch64__) || defined(_M_ARM64) // for vaddvq -int get_edge_stability(const unsigned long long P, const unsigned long long O) -{ - const uint64x2_t shiftv = { 0x0003000200010000, 0x0007000600050004 }; - uint8x16_t PO = vzip1q_u8(vreinterpretq_u8_u64(vdupq_n_u64(O)), vreinterpretq_u8_u64(vdupq_n_u64(P))); - uint8x8_t packedstable = vcreate_u8((edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)] - | edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 8) & 0x7e7e); - packedstable = vset_lane_u8(edge_stability[vaddvq_u16(vshlq_u16(vreinterpretq_u16_u8(vandq_u8(PO, vdupq_n_u8(1))), vreinterpretq_s16_u64(shiftv)))], packedstable, 2); - packedstable = vset_lane_u8(edge_stability[vaddvq_u16(vshlq_u16(vreinterpretq_u16_u8(vshrq_n_u8(PO, 7)), vreinterpretq_s16_u64(shiftv)))], packedstable, 3); - return vaddv_u8(vcnt_u8(packedstable)); -} - - #elif defined(__ARM_NEON) // Neon kindergarten -int get_edge_stability(const unsigned long long P, const unsigned long long O) -{ - const uint64x2_t kMul = { 0x1020408001020408, 0x1020408001020408 }; - uint64x2_t PP = vcombine_u64(vshl_n_u64(vcreate_u64(P), 7), vcreate_u64(P)); - uint64x2_t OO = vcombine_u64(vshl_n_u64(vcreate_u64(O), 7), vcreate_u64(O)); - uint32x4_t QP = vmulq_u32(vreinterpretq_u32_u64(kMul), vreinterpretq_u32_u8(vshrq_n_u8(vreinterpretq_u8_u64(PP), 7))); - uint32x4_t QO = vmulq_u32(vreinterpretq_u32_u64(kMul), vreinterpretq_u32_u8(vshrq_n_u8(vreinterpretq_u8_u64(OO), 7))); - uint32x2_t DP = vpadd_u32(vget_low_u32(QP), vget_high_u32(QP)); // P_h1h8 * * * P_a1a8 * * * - uint32x2_t DO = vpadd_u32(vget_low_u32(QO), vget_high_u32(QO)); // O_h1h8 * * * O_a1a8 * * * - uint8x8_t DB = vtrn_u8(vreinterpret_u8_u32(DO), vreinterpret_u8_u32(DP)).val[1]; // P_h1h8 O_h1h8 * * P_a1a8 O_a1a8 * * - uint8x16_t PO = vzipq_u8(vreinterpretq_u8_u64(OO), vreinterpretq_u8_u64(PP)).val[1]; - uint8x8_t packedstable = vcreate_u8((edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 0)] - | edge_stability[vgetq_lane_u16(vreinterpretq_u16_u8(PO), 7)] << 8) & 0x7e7e); - packedstable = vset_lane_u8(edge_stability[vget_lane_u16(vreinterpret_u16_u8(DB), 1)], packedstable, 2); - packedstable = vset_lane_u8(edge_stability[vget_lane_u16(vreinterpret_u16_u8(DB), 3)], packedstable, 3); - return vget_lane_u32(vpaddl_u16(vpaddl_u8(vcnt_u8(packedstable))), 0); -} - - #elif defined(hasSSE2) -int get_edge_stability(const unsigned long long P, const unsigned long long O) -{ - __m128i P0 = _mm_cvtsi64_si128(P); - __m128i O0 = _mm_cvtsi64_si128(O); - __m128i PO = _mm_unpacklo_epi8(O0, P0); - unsigned int packedstable = edge_stability[_mm_extract_epi16(PO, 0)] | edge_stability[_mm_extract_epi16(PO, 7)] << 8; - PO = _mm_unpacklo_epi64(O0, P0); - packedstable |= edge_stability[_mm_movemask_epi8(_mm_slli_epi64(PO, 7))] << 16 | edge_stability[_mm_movemask_epi8(PO)] << 24; - return bit_count_32(packedstable & 0xffff7e7e); -} - #endif - -/** -<<<<<<< HEAD ->>>>>>> dc7c79c (Omit unpack from get_edge_stability) - * @brief AVX2/SSE optimized get_stability ->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines) -======= - * @brief AVX2/SSE/neon optimized get_full_lines. ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) * * SSE pcmpeqb for horizontal get_full_lines. * CPU rotate for vertical get_full_lines. * Diag-7 is converted to diag-9 using vertical mirroring. * -<<<<<<< HEAD -<<<<<<< HEAD * @param disc all discs on the board. * @param full all 1 if full line, otherwise all 0. */ @@ -1446,48 +719,14 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc) { __m128i l81, l79, l8; __m256i v4_disc, lr79; -<<<<<<< HEAD -<<<<<<< HEAD - const __m128i kff = _mm_set1_epi8(-1); -======= - const __m128i kff = _mm_set1_epi8(0xff); ->>>>>>> 593fff4 (use appropriate _mm_set1) -======= const __m128i kff = _mm_set1_epi8(-1); ->>>>>>> 47c2589 (Fix w32-modern build and gcc build) - #if 0 // PCMPEQQ -======= - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return the number of stable discs. -======= - * @param disc all discs on the board. - * @param full all 1 if full line, otherwise all 0. ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) - */ - #ifdef __AVX2__ - -static __m256i vectorcall get_full_lines(const unsigned long long disc) -{ - __m128i l81, l79, l8; - __m256i v4_disc, lr79; - const __m128i kff = _mm_set1_epi64x(0xffffffffffffffff); -<<<<<<< HEAD -#if 0 // PCMPEQQ ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #if 0 // PCMPEQQ ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }}; // V8SI static const V4DI m792 = {{ 0x0000008040201008, 0x0000000102040810, 0x1008040201000000, 0x0810204080000000 }}; static const V4DI m793 = {{ 0x0000804020100804, 0x0000010204081020, 0x2010080402010000, 0x0408102040800000 }}; static const V4DI m794 = {{ 0x0080402010080402, 0x0001020408102040, 0x4020100804020100, 0x0204081020408000 }}; static const V2DI m795 = {{ 0x8040201008040201, 0x0102040810204080 }}; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines) l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_broadcastq_epi64(l81); l81 = _mm_cmpeq_epi8(kff, l81); lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m792.v4), m792.v4), m792.v4)); @@ -1495,36 +734,14 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc) l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 1)); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m794.v4), m794.v4), m794.v4)); l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 2)); l79 = _mm_and_si128(_mm_cmpeq_epi64(_mm_and_si128(_mm256_castsi256_si128(v4_disc), m795.v2), m795.v2), m795.v2); l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 4)); l79 = _mm_or_si128(l79, _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79))); -<<<<<<< HEAD #elif 0 // PCMPEQD -======= - l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_broadcastq_epi64(l81); - l81 = _mm_cmpeq_epi8(kff, l81); lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4); - lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m792.v4), m792.v4), m792.v4)); - l8 = disc; lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m793.v4), m793.v4), m793.v4)); - l8 &= (l8 >> 8) | (l8 << 56); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m794.v4), m794.v4), m794.v4)); - l8 &= (l8 >> 16) | (l8 << 48); l79 = _mm_and_si128(_mm_cmpeq_epi64(_mm_and_si128(_mm256_castsi256_si128(v4_disc), m795.v2), m795.v2), m795.v2); - l8 &= (l8 >> 32) | (l8 << 32); l79 = _mm_or_si128(l79, _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79))); -======= ->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines) - -<<<<<<< HEAD -#elif 0 // PCMPEQD ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - #elif 0 // PCMPEQD ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) __m256i lm79; static const V4DI m790 = {{ 0x80c0e0f0783c1e0f, 0x0103070f1e3c78f0, 0x70381c0e07030100, 0x0e1c3870e0c08000 }}; static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }}; // V8SI static const V4DI m792 = {{ 0x2010884440201088, 0x0408112202040811, 0x2211080411080402, 0x4488102088102040 }}; // V8SI static const V4DI m793 = {{ 0x8844221110884422, 0x1122448808112244, 0x0000000044221108, 0x0000000022448810 }}; // V8SI -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines) l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_broadcastq_epi64(l81); l81 = _mm_cmpeq_epi8(kff, l81); lm79 = _mm256_and_si256(v4_disc, m790.v4); lm79 = _mm256_or_si256(lm79, _mm256_shuffle_epi32(lm79, 0xb1)); @@ -1533,27 +750,8 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc) l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 2)); lr79 = _mm256_and_si256(_mm256_or_si256(lr79, _mm256_shuffle_epi32(lr79, 0xb1)), m790.v4); l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 4)); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4)); l79 = _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79)); -<<<<<<< HEAD #else // Kogge-Stone -======= - l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_broadcastq_epi64(l81); - l81 = _mm_cmpeq_epi8(kff, l81); lm79 = _mm256_and_si256(v4_disc, m790.v4); - lm79 = _mm256_or_si256(lm79, _mm256_shuffle_epi32(lm79, 0xb1)); - l8 = disc; lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m792.v4), m792.v4), m792.v4); - l8 &= (l8 >> 8) | (l8 << 56); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m793.v4), m793.v4), m793.v4)); - l8 &= (l8 >> 16) | (l8 << 48); lr79 = _mm256_and_si256(_mm256_or_si256(lr79, _mm256_shuffle_epi32(lr79, 0xb1)), m790.v4); - l8 &= (l8 >> 32) | (l8 << 32); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4)); - l79 = _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79)); -======= ->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines) - -<<<<<<< HEAD -#else // Kogge-Stone ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - #else // Kogge-Stone ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) const __m128i mcpyswap = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); const __m128i mbswapll = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7); static const V4DI shiftlr[] = {{{ 9, 7, 7, 9 }}, {{ 18, 14, 14, 18 }}, {{ 36, 28, 28, 36 }}}; @@ -1561,10 +759,6 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc) static const V4DI e791 = {{ 0xffffc0c0c0c0c0c0, 0xffff030303030303, 0xffff030303030303, 0xffffc0c0c0c0c0c0 }}; static const V4DI e792 = {{ 0xfffffffff0f0f0f0, 0xffffffff0f0f0f0f, 0xffffffff0f0f0f0f, 0xfffffffff0f0f0f0 }}; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines) l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_castsi128_si256(_mm_shuffle_epi8(l81, mcpyswap)); l81 = _mm_cmpeq_epi8(kff, l81); v4_disc = _mm256_permute4x64_epi64(v4_disc, 0x50); // disc, disc, rdisc, rdisc lr79 = _mm256_and_si256(v4_disc, _mm256_or_si256(e790.v4, _mm256_srlv_epi64(v4_disc, shiftlr[0].v4))); @@ -1572,16 +766,12 @@ static __m256i vectorcall get_full_lines(const unsigned long long disc) l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 1)); lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e792.v4, _mm256_srlv_epi64(lr79, shiftlr[2].v4))); l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 2)); l79 = _mm_shuffle_epi8(_mm256_extracti128_si256(lr79, 1), mbswapll); l8 = _mm_and_si128(l8, _mm_alignr_epi8(l8, l8, 4)); l79 = _mm_and_si128(l79, _mm256_castsi256_si128(lr79)); -<<<<<<< HEAD -<<<<<<< HEAD #endif l81 = _mm_unpacklo_epi64(l81, l8); return _mm256_insertf128_si256(_mm256_castsi128_si256(l81), l79, 1); -<<<<<<< HEAD } #elif defined(__ARM_NEON) -<<<<<<< HEAD void get_full_lines(const unsigned long long disc, unsigned long long full[4]) { @@ -1631,1062 +821,6 @@ void get_full_lines(const unsigned long long disc, unsigned long long full[4]) #endif #endif // hasSSE2/__ARM_NEON -#ifdef __AVX2__ -/** - * @brief AVX2 optimized get_stability - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return the number of stable discs. - */ - -// compute the other stable discs (ie discs touching another stable disc in each flipping direction). -static int vectorcall get_spreaded_stability(unsigned long long stable, unsigned long long P_central, __m256i v4_full) -{ - __m128i v2_stable, v2_old_stable, v2_P_central; - __m256i v4_stable; - const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1); -======= - l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_castsi128_si256(_mm_shuffle_epi8(l81, mcpyswap)); - l81 = _mm_cmpeq_epi8(kff, l81); lr79 = _mm256_permute4x64_epi64(v4_disc, 0x50); // disc, disc, rdisc, rdisc - lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e790.v4, _mm256_srlv_epi64(lr79, shiftlr[0].v4))); - l8 = disc; lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e791.v4, _mm256_srlv_epi64(lr79, shiftlr[1].v4))); - l8 &= (l8 >> 8) | (l8 << 56); lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e792.v4, _mm256_srlv_epi64(lr79, shiftlr[2].v4))); - l8 &= (l8 >> 16) | (l8 << 48); l79 = _mm_shuffle_epi8(_mm256_extracti128_si256(lr79, 1), mbswapll); - l8 &= (l8 >> 32) | (l8 << 32); l79 = _mm_and_si128(l79, _mm256_castsi256_si128(lr79)); - -======= ->>>>>>> dd57cbd (add hash_prefetch; revise AVX flip & full_lines) -#endif -======= - #endif ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) - l81 = _mm_unpacklo_epi64(l81, l8); - _mm256_storeu_si256((__m256i *) full, _mm256_insertf128_si256(_mm256_castsi128_si256(l81), l79, 1)); - l81 = _mm_and_si128(l81, l79); - _mm_storel_epi64((__m128i *) &full[4], _mm_and_si128(l81, _mm_shuffle_epi32(l81, 0x4e))); -} - -int get_stability_fulls(const unsigned long long P, const unsigned long long O, unsigned long long full[5]) -{ - unsigned long long stable, P_central; - __m128i v2_stable, v2_old_stable, v2_P_central; - __m256i v4_stable, v4_full; - const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1); - -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - // add full lines - v2_stable = _mm_and_si128(l81, l79); - stable |= _mm_cvtsi128_si64(_mm_and_si128(v2_stable, _mm_unpackhi_epi64(v2_stable, v2_stable))) & P_central; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - // compute the exact stable edges (from precomputed tables) and add full lines -<<<<<<< HEAD - stable = get_stable_edge_sse(P, O) | (get_all_full_lines(disc, &full) & P_central); ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) -======= - stable = get_stable_edge_sse(P, O) | (get_all_full_lines(P | O, &full) & P_central); ->>>>>>> 6c3ed52 (Dogaishi hash reduction by Matsuo & Narazaki; edge-precise get_full_line) -======= - // compute the exact stable edges (from precomputed tables) -======= ->>>>>>> 8566ed0 (vector call version of board_next & get_moves) - get_all_full_lines(P | O, full); - - // compute the exact stable edges (from precomputed tables) - stable = get_stable_edge(P, O); - - // add full lines - P_central = (P & 0x007e7e7e7e7e7e00); -<<<<<<< HEAD - stable |= (allfull & P_central); ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) -======= - stable |= (full[4] & P_central); ->>>>>>> 4303b09 (Returns all full lines in full[4]) - - if (stable == 0) - return 0; - -<<<<<<< HEAD -======= - // now compute the other stable discs (ie discs touching another stable disc in each flipping direction). -<<<<<<< HEAD - v4_full = _mm256_insertf128_si256(_mm256_castsi128_si256(l81), l79, 1); ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 9e2bbc5 (split get_all_full_lines from get_stability) - v2_stable = _mm_cvtsi64_si128(stable); - v2_P_central = _mm_cvtsi64_si128(P_central); - v4_full = _mm256_loadu_si256((__m256i *) full); - do { - v2_old_stable = v2_stable; - v4_stable = _mm256_broadcastq_epi64(v2_stable); - v4_stable = _mm256_or_si256(_mm256_or_si256(_mm256_srlv_epi64(v4_stable, shift1897), _mm256_sllv_epi64(v4_stable, shift1897)), v4_full); - v2_stable = _mm_and_si128(_mm256_castsi256_si128(v4_stable), _mm256_extracti128_si256(v4_stable, 1)); - v2_stable = _mm_and_si128(v2_stable, _mm_unpackhi_epi64(v2_stable, v2_stable)); - v2_stable = _mm_or_si128(v2_old_stable, _mm_and_si128(v2_stable, v2_P_central)); - } while (!_mm_testc_si128(v2_old_stable, v2_stable)); - - return bit_count(_mm_cvtsi128_si64(v2_stable)); -======= ->>>>>>> 2969de2 (Refactor get_full_lines; fix get_stability MMX) -} -<<<<<<< HEAD -#elif defined(hasSSE2) && !defined(HAS_CPU_64) -// 32bit SSE optimized get_spreaded_stability -int get_spreaded_stability(unsigned long long stable, unsigned long long P_central, unsigned long long full[4]) -{ - __m128i v_stable, stable_vh, stable_d79, old_stable; - - if (stable == 0) // (2%) - return 0; - - v_stable = _mm_cvtsi64_si128(stable); - do { - old_stable = v_stable; - stable_vh = _mm_loadu_si128((__m128i *) &full[0]); - stable_vh = _mm_or_si128(stable_vh, _mm_unpacklo_epi64(_mm_srli_epi64(v_stable, 1), _mm_srli_epi64(v_stable, 8))); - stable_vh = _mm_or_si128(stable_vh, _mm_unpacklo_epi64(_mm_slli_epi64(v_stable, 1), _mm_slli_epi64(v_stable, 8))); - stable_d79 = _mm_loadu_si128((__m128i *) &full[2]); - stable_d79 = _mm_or_si128(stable_d79, _mm_unpacklo_epi64(_mm_srli_epi64(v_stable, 9), _mm_srli_epi64(v_stable, 7))); - stable_d79 = _mm_or_si128(stable_d79, _mm_unpacklo_epi64(_mm_slli_epi64(v_stable, 9), _mm_slli_epi64(v_stable, 7))); - v_stable = _mm_and_si128(stable_vh, stable_d79); - v_stable = _mm_and_si128(v_stable, _mm_unpackhi_epi64(v_stable, v_stable)); - v_stable = _mm_or_si128(old_stable, _mm_and_si128(v_stable, _mm_loadl_epi64((__m128i *) &P_central))); - } while (_mm_movemask_epi8(_mm_cmpeq_epi8(v_stable, old_stable)) != 0xffff); // (44%) - - return bit_count_si64(v_stable); -} -#endif - -#ifdef __AVX2__ -// returns stability count only -int get_stability(const unsigned long long P, const unsigned long long O) -{ - unsigned long long stable = get_stable_edge(P, O); // compute the exact stable edges - unsigned long long P_central = P & 0x007e7e7e7e7e7e00; - - __m256i v4_full = get_full_lines(P | O); // add full lines - __m128i v2_full = _mm_and_si128(_mm256_castsi256_si128(v4_full), _mm256_extracti128_si256(v4_full, 1)); - stable |= (P_central & _mm_cvtsi128_si64(_mm_and_si128(v2_full, _mm_unpackhi_epi64(v2_full, v2_full)))); - - return get_spreaded_stability(stable, P_central, v4_full); // compute the other stable discs -} - -// returns all full in full[4] in addition to stability count -int get_stability_fulls(const unsigned long long P, const unsigned long long O, unsigned long long full[5]) -{ - unsigned long long stable = get_stable_edge(P, O); // compute the exact stable edges - unsigned long long P_central = P & 0x007e7e7e7e7e7e00; - - __m256i v4_full = get_full_lines(P | O); // add full lines - __m128i v2_full = _mm_and_si128(_mm256_castsi256_si128(v4_full), _mm256_extracti128_si256(v4_full, 1)); - // _mm256_storeu_si256((__m256i *) full, v4_full); - full[4] = _mm_cvtsi128_si64(_mm_and_si128(v2_full, _mm_unpackhi_epi64(v2_full, v2_full))); - stable |= (P_central & full[4]); - - return get_spreaded_stability(stable, P_central, v4_full); // compute the other stable discs -} - -// returns all full lines only -unsigned long long get_all_full_lines(const unsigned long long disc) -{ - __m256i v4_full = get_full_lines(disc); - __m128i v2_full = _mm_and_si128(_mm256_castsi256_si128(v4_full), _mm256_extracti128_si256(v4_full, 1)); - return _mm_cvtsi128_si64(_mm_and_si128(v2_full, _mm_unpackhi_epi64(v2_full, v2_full))); -} - -/** - * @brief AVX2 optimized get_moves + get_potential_moves. - * - * Get the bitboard of empty squares in contact of a player square, as well as real mobility. - * - * @param PP broadcasted bitboard with player's discs. - * @param OO broadcasted bitboard with opponent's discs. - * @return potential moves in a higner 64-bit, real moves in a lower 64-bit. - */ -__m128i vectorcall get_moves_and_potential(__m256i PP, __m256i OO) -{ - __m256i MM, potmob, flip_l, flip_r, pre_l, pre_r, shift2; - const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1); - __m256i mOO = _mm256_and_si256(OO, _mm256_set_epi64x(0x007E7E7E7E7E7E00, 0x007E7E7E7E7E7E00, 0x00FFFFFFFFFFFF00, 0x7E7E7E7E7E7E7E7E)); - __m128i occupied = _mm_or_si128(_mm256_castsi256_si128(PP), _mm256_castsi256_si128(OO)); - - flip_l = _mm256_and_si256(mOO, _mm256_sllv_epi64(PP, shift1897)); - flip_r = _mm256_and_si256(mOO, _mm256_srlv_epi64(PP, shift1897)); - flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(mOO, _mm256_sllv_epi64(flip_l, shift1897))); - flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(mOO, _mm256_srlv_epi64(flip_r, shift1897))); - pre_l = _mm256_sllv_epi64(mOO, shift1897); pre_r = _mm256_srlv_epi64(mOO, shift1897); - potmob = _mm256_or_si256(pre_l, pre_r); - pre_l = _mm256_and_si256(mOO, pre_l); pre_r = _mm256_and_si256(mOO, pre_r); - shift2 = _mm256_add_epi64(shift1897, shift1897); - flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2))); - flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2))); - flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2))); - flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2))); - MM = _mm256_or_si256(_mm256_sllv_epi64(flip_l, shift1897), _mm256_srlv_epi64(flip_r, shift1897)); - - MM = _mm256_or_si256(_mm256_unpacklo_epi64(MM, potmob), _mm256_unpackhi_epi64(MM, potmob)); - return _mm_andnot_si128(occupied, _mm_or_si128(_mm256_castsi256_si128(MM), _mm256_extracti128_si256(MM, 1))); // mask with empties -} - -#endif -======= -/** - * @file board_sse.c - * - * SSE/AVX translation of some board.c functions - * - * @date 2014 - 2020 - * @author Toshihiko Okuhara - * @version 4.4 - */ - -#include "bit.h" -#include "hash.h" -#include "board.h" - -/** - * @brief SSE2 translation of board_symetry - * - * @param board input board - * @param s symetry - * @param sym symetric output board - */ -#ifdef hasSSE2 - -void board_symetry(const Board *board, const int s, Board *sym) -{ - __m128i bb = _mm_loadu_si128((__m128i *) board); - __m128i tt; - const __m128i mask0F0F = _mm_set1_epi16(0x0F0F); - const __m128i mask00AA = _mm_set1_epi16(0x00AA); - const __m128i maskCCCC = _mm_set1_epi32(0x0000CCCC); - const __m128i mask00F0 = _mm_set1_epi64x(0x00000000F0F0F0F0); -#if defined(__SSSE3__) || defined(__AVX__) // pshufb - const __m128i mbswapll = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7); - const __m128i mbitrev = _mm_set_epi8(15, 7, 11, 3, 13, 5, 9, 1, 14, 6, 10, 2, 12, 4, 8, 0); - - if (s & 1) { // horizontal_mirror (cf. http://wm.ite.pl/articles/sse-popcount.html) - bb = _mm_or_si128(_mm_shuffle_epi8(mbitrev, _mm_and_si128(_mm_srli_epi64(bb, 4), mask0F0F)), - _mm_slli_epi64(_mm_shuffle_epi8(mbitrev, _mm_and_si128(bb, mask0F0F)), 4)); - } - - if (s & 2) { // vertical_mirror - bb = _mm_shuffle_epi8(bb, mbswapll); - } - -#else - const __m128i mask5555 = _mm_set1_epi16(0x5555); - const __m128i mask3333 = _mm_set1_epi16(0x3333); - - if (s & 1) { // horizontal_mirror - bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 1), mask5555), _mm_slli_epi64(_mm_and_si128(bb, mask5555), 1)); - bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 2), mask3333), _mm_slli_epi64(_mm_and_si128(bb, mask3333), 2)); - bb = _mm_or_si128(_mm_and_si128(_mm_srli_epi64(bb, 4), mask0F0F), _mm_slli_epi64(_mm_and_si128(bb, mask0F0F), 4)); - } - - if (s & 2) { // vertical_mirror - bb = _mm_or_si128(_mm_srli_epi16(bb, 8), _mm_slli_epi16(bb, 8)); - bb = _mm_shufflehi_epi16(_mm_shufflelo_epi16(bb, 0x1b), 0x1b); - } -#endif - - if (s & 4) { // transpose - tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 7)), mask00AA); - bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 7)); - tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 14)), maskCCCC); - bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 14)); - tt = _mm_and_si128(_mm_xor_si128(bb, _mm_srli_epi64(bb, 28)), mask00F0); - bb = _mm_xor_si128(_mm_xor_si128(bb, tt), _mm_slli_epi64(tt, 28)); - } - -#ifdef __clang__ - sym->player = bb[0]; - sym->opponent = bb[1]; -#else // error on clang 3.8 - _mm_storeu_si128((__m128i *) sym, bb); -#endif - - board_check(sym); -} - -#if (MOVE_GENERATOR == MOVE_GENERATOR_AVX) || (MOVE_GENERATOR == MOVE_GENERATOR_SSE) -/** - * @brief Compute a board resulting of a move played on a previous board. - * - * @param board board to play the move on. - * @param x move to play. - * @param next resulting board. - * @return flipped discs. - */ -unsigned long long board_next(const Board *board, const int x, Board *next) -{ - __m128i OP = _mm_loadu_si128((__m128i *) board); - __m128i flipped = mm_Flip(OP, x); - - OP = _mm_xor_si128(OP, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x]))); - _mm_storeu_si128((__m128i *) next, _mm_shuffle_epi32(OP, 0x4e)); - - return _mm_cvtsi128_si64(flipped); -} - -/** - * @brief Compute a board resulting of an opponent move played on a previous board. - * - * Compute the board after passing and playing a move. - * - * @param board board to play the move on. - * @param x opponent move to play. - * @param next resulting board. - * @return flipped discs. - */ -unsigned long long board_pass_next(const Board *board, const int x, Board *next) -{ - __m128i PO = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *) board), 0x4e); - __m128i flipped = mm_Flip(PO, x); - - PO = _mm_xor_si128(PO, _mm_or_si128(flipped, _mm_loadl_epi64((__m128i *) &X_TO_BIT[x]))); - _mm_storeu_si128((__m128i *) next, _mm_shuffle_epi32(PO, 0x4e)); - - return _mm_cvtsi128_si64(flipped); -} -#endif - -#endif // hasSSE2 - -/** - * @brief X64 optimized get_moves - * - * Diag-7 is converted to diag-9 (v.v.) using vertical mirroring - * in SSE versions. - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return all legal moves in a 64-bit unsigned integer. - */ -#ifdef __AVX2__ // 4 AVX - -unsigned long long get_moves(const unsigned long long P, const unsigned long long O) -{ - __m256i PP, mOO, MM, flip_l, flip_r, pre_l, pre_r, shift2; - __m128i M; - const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1); - const __m256i mflipH = _mm256_set_epi64x(0x7e7e7e7e7e7e7e7e, 0x7e7e7e7e7e7e7e7e, -1, 0x7e7e7e7e7e7e7e7e); - - PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P)); - mOO = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(O)), mflipH); - - flip_l = _mm256_and_si256(mOO, _mm256_sllv_epi64(PP, shift1897)); - flip_r = _mm256_and_si256(mOO, _mm256_srlv_epi64(PP, shift1897)); - flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(mOO, _mm256_sllv_epi64(flip_l, shift1897))); - flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(mOO, _mm256_srlv_epi64(flip_r, shift1897))); - pre_l = _mm256_and_si256(mOO, _mm256_sllv_epi64(mOO, shift1897)); - pre_r = _mm256_srlv_epi64(pre_l, shift1897); - shift2 = _mm256_add_epi64(shift1897, shift1897); - flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2))); - flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2))); - flip_l = _mm256_or_si256(flip_l, _mm256_and_si256(pre_l, _mm256_sllv_epi64(flip_l, shift2))); - flip_r = _mm256_or_si256(flip_r, _mm256_and_si256(pre_r, _mm256_srlv_epi64(flip_r, shift2))); - MM = _mm256_sllv_epi64(flip_l, shift1897); - MM = _mm256_or_si256(MM, _mm256_srlv_epi64(flip_r, shift1897)); - - M = _mm_or_si128(_mm256_castsi256_si128(MM), _mm256_extracti128_si256(MM, 1)); - M = _mm_or_si128(M, _mm_unpackhi_epi64(M, M)); - return _mm_cvtsi128_si64(M) & ~(P|O); // mask with empties -} - -#elif defined(__x86_64__) || defined(_M_X64) // 2 SSE, 2 CPU - -unsigned long long get_moves(const unsigned long long P, const unsigned long long O) -{ - unsigned long long moves, mO, flip1, pre1, flip8, pre8; - __m128i PP, mOO, MM, flip, pre; - - mO = O & 0x7e7e7e7e7e7e7e7eULL; - PP = _mm_set_epi64x(vertical_mirror(P), P); - mOO = _mm_set_epi64x(vertical_mirror(mO), mO); - /* shift=-9:+7 */ /* shift=+1 */ /* shift = +8 */ - flip = _mm_and_si128(mOO, _mm_slli_epi64(PP, 7)); flip1 = mO & (P << 1); flip8 = O & (P << 8); - flip = _mm_or_si128(flip, _mm_and_si128(mOO, _mm_slli_epi64(flip, 7))); flip1 |= mO & (flip1 << 1); flip8 |= O & (flip8 << 8); - pre = _mm_and_si128(mOO, _mm_slli_epi64(mOO, 7)); pre1 = mO & (mO << 1); pre8 = O & (O << 8); - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 14))); flip1 |= pre1 & (flip1 << 2); flip8 |= pre8 & (flip8 << 16); - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 14))); flip1 |= pre1 & (flip1 << 2); flip8 |= pre8 & (flip8 << 16); - MM = _mm_slli_epi64(flip, 7); moves = flip1 << 1; moves |= flip8 << 8; - /* shift=-7:+9 */ /* shift=-1 */ /* shift = -8 */ - flip = _mm_and_si128(mOO, _mm_slli_epi64(PP, 9)); flip1 = mO & (P >> 1); flip8 = O & (P >> 8); - flip = _mm_or_si128(flip, _mm_and_si128(mOO, _mm_slli_epi64(flip, 9))); flip1 |= mO & (flip1 >> 1); flip8 |= O & (flip8 >> 8); - pre = _mm_and_si128(mOO, _mm_slli_epi64(mOO, 9)); pre1 >>= 1; pre8 >>= 8; - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 18))); flip1 |= pre1 & (flip1 >> 2); flip8 |= pre8 & (flip8 >> 16); - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 18))); flip1 |= pre1 & (flip1 >> 2); flip8 |= pre8 & (flip8 >> 16); - MM = _mm_or_si128(MM, _mm_slli_epi64(flip, 9)); moves |= flip1 >> 1; moves |= flip8 >> 8; - - moves |= _mm_cvtsi128_si64(MM) | vertical_mirror(_mm_cvtsi128_si64(_mm_unpackhi_epi64(MM, MM))); - return moves & ~(P|O); // mask with empties -} - -#elif 0 // 4 CPU - -unsigned long long get_moves(const unsigned long long P, const unsigned long long O) -{ - unsigned long long moves, mO; - unsigned long long flip1, flip7, flip9, flip8, pre1, pre7, pre9, pre8; - - mO = O & 0x7e7e7e7e7e7e7e7eULL; - flip1 = mO & (P << 1); flip7 = mO & (P << 7); flip9 = mO & (P << 9); flip8 = O & (P << 8); - flip1 |= mO & (flip1 << 1); flip7 |= mO & (flip7 << 7); flip9 |= mO & (flip9 << 9); flip8 |= O & (flip8 << 8); - pre1 = mO & (mO << 1); pre7 = mO & (mO << 7); pre9 = mO & (mO << 9); pre8 = O & (O << 8); - flip1 |= pre1 & (flip1 << 2); flip7 |= pre7 & (flip7 << 14); flip9 |= pre9 & (flip9 << 18); flip8 |= pre8 & (flip8 << 16); - flip1 |= pre1 & (flip1 << 2); flip7 |= pre7 & (flip7 << 14); flip9 |= pre9 & (flip9 << 18); flip8 |= pre8 & (flip8 << 16); - moves = flip1 << 1; moves |= flip7 << 7; moves |= flip9 << 9; moves |= flip8 << 8; - flip1 = mO & (P >> 1); flip7 = mO & (P >> 7); flip9 = mO & (P >> 9); flip8 = O & (P >> 8); - flip1 |= mO & (flip1 >> 1); flip7 |= mO & (flip7 >> 7); flip9 |= mO & (flip9 >> 9); flip8 |= O & (flip8 >> 8); - pre1 >>= 1; pre7 >>= 7; pre9 >>= 9; pre8 >>= 8; - flip1 |= pre1 & (flip1 >> 2); flip7 |= pre7 & (flip7 >> 14); flip9 |= pre9 & (flip9 >> 18); flip8 |= pre8 & (flip8 >> 16); - flip1 |= pre1 & (flip1 >> 2); flip7 |= pre7 & (flip7 >> 14); flip9 |= pre9 & (flip9 >> 18); flip8 |= pre8 & (flip8 >> 16); - moves |= flip1 >> 1; moves |= flip7 >> 7; moves |= flip9 >> 9; moves |= flip8 >> 8; - - return moves & ~(P|O); // mask with empties -} - -#else // __x86_64__ -/** - * @brief SSE optimized get_moves for x86 (3 SSE, 1 CPU) - * - */ -#if defined(hasSSE2) || defined(USE_MSVC_X86) - -unsigned long long get_moves_sse(unsigned long long P, unsigned long long O) -{ - unsigned int mO, movesL, movesH, flip1, pre1; - __m128i OP, rOP, PP, OO, MM, flip, pre; - const __m128i mask7e = _mm_set1_epi8(0x7e); - - // vertical_mirror in PP[1], OO[1] - OP = _mm_unpacklo_epi64(_mm_cvtsi64_si128(P), _mm_cvtsi64_si128(O)); mO = (unsigned int) O & 0x7e7e7e7eU; - rOP = _mm_shufflelo_epi16(OP, 0x1B); flip1 = mO & ((unsigned int) P << 1); - rOP = _mm_shufflehi_epi16(rOP, 0x1B); flip1 |= mO & (flip1 << 1); - pre1 = mO & (mO << 1); - rOP = _mm_or_si128(_mm_srli_epi16(rOP, 8), _mm_slli_epi16(rOP, 8)); - flip1 |= pre1 & (flip1 << 2); - PP = _mm_unpacklo_epi64(OP, rOP); flip1 |= pre1 & (flip1 << 2); - OO = _mm_unpackhi_epi64(OP, rOP); movesL = flip1 << 1; - - flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 8)); flip1 = mO & ((unsigned int) P >> 1); - flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 8))); flip1 |= mO & (flip1 >> 1); - pre = _mm_and_si128(OO, _mm_slli_epi64(OO, 8)); pre1 >>= 1; - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 16))); flip1 |= pre1 & (flip1 >> 2); - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 16))); flip1 |= pre1 & (flip1 >> 2); - MM = _mm_slli_epi64(flip, 8); movesL |= flip1 >> 1; - - OO = _mm_and_si128(OO, mask7e); mO = (unsigned int) (O >> 32) & 0x7e7e7e7eU; - flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 7)); flip1 = mO & ((unsigned int) (P >> 32) << 1); - flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 7))); flip1 |= mO & (flip1 << 1); - pre = _mm_and_si128(OO, _mm_slli_epi64(OO, 7)); pre1 = mO & (mO << 1); - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 14))); flip1 |= pre1 & (flip1 << 2); - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 14))); flip1 |= pre1 & (flip1 << 2); - MM = _mm_or_si128(MM, _mm_slli_epi64(flip, 7)); movesH = flip1 << 1; - - flip = _mm_and_si128(OO, _mm_slli_epi64(PP, 9)); flip1 = mO & ((unsigned int) (P >> 32) >> 1); - flip = _mm_or_si128(flip, _mm_and_si128(OO, _mm_slli_epi64(flip, 9))); flip1 |= mO & (flip1 >> 1); - pre = _mm_and_si128(OO, _mm_slli_epi64(OO, 9)); pre1 >>= 1; - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 18))); flip1 |= pre1 & (flip1 >> 2); - flip = _mm_or_si128(flip, _mm_and_si128(pre, _mm_slli_epi64(flip, 18))); flip1 |= pre1 & (flip1 >> 2); - MM = _mm_or_si128(MM, _mm_slli_epi64(flip, 9)); movesH |= flip1 >> 1; - - movesL |= _mm_cvtsi128_si32(MM); MM = _mm_srli_si128(MM, 4); - movesH |= _mm_cvtsi128_si32(MM); MM = _mm_srli_si128(MM, 4); - movesH |= bswap_int(_mm_cvtsi128_si32(MM)); - movesL |= bswap_int(_mm_cvtsi128_si32(_mm_srli_si128(MM, 4))); - return (movesL | ((unsigned long long) movesH << 32)) & ~(P|O); // mask with empties -} - -#else // non-VEX asm - -unsigned long long get_moves_sse(unsigned long long P, unsigned long long O) -{ - unsigned long long moves; - static const V2DI mask7e = {{ 0x7e7e7e7e7e7e7e7eULL, 0x7e7e7e7e7e7e7e7eULL }}; - - __asm__ ( - "movl %1, %%ebx\n\t" - "movl %3, %%edi\n\t" - "andl $0x7e7e7e7e, %%edi\n\t" - /* shift=-1 */ /* vertical mirror in PP[1], OO[1] */ - "movl %%ebx, %%eax\n\t" "movd %1, %%xmm4\n\t" // (movd for store-forwarding) - "shrl $1, %%eax\n\t" "movd %2, %%xmm0\n\t" - "andl %%edi, %%eax\n\t" "movd %3, %%xmm5\n\t" - "movl %%eax, %%edx\n\t" "movd %4, %%xmm1\n\t" - "shrl $1, %%eax\n\t" "punpckldq %%xmm0, %%xmm4\n\t" // P - "movl %%edi, %%ecx\n\t" "punpckldq %%xmm1, %%xmm5\n\t" // O - "andl %%edi, %%eax\n\t" "punpcklqdq %%xmm5, %%xmm4\n\t" // OP - "shrl $1, %%ecx\n\t" "pshuflw $0x1b, %%xmm4, %%xmm0\n\t" - "orl %%edx, %%eax\n\t" "pshufhw $0x1b, %%xmm0, %%xmm0\n\t" - "andl %%edi, %%ecx\n\t" "movdqa %%xmm0, %%xmm1\n\t" - "movl %%eax, %%edx\n\t" "psllw $8, %%xmm0\n\t" - "shrl $2, %%eax\n\t" "psrlw $8, %%xmm1\n\t" - "andl %%ecx, %%eax\n\t" "por %%xmm1, %%xmm0\n\t" // rOP - "orl %%eax, %%edx\n\t" - "shrl $2, %%eax\n\t" "movdqa %%xmm4, %%xmm5\n\t" - "andl %%ecx, %%eax\n\t" "punpcklqdq %%xmm0, %%xmm4\n\t" // PP - "orl %%edx, %%eax\n\t" "punpckhqdq %%xmm0, %%xmm5\n\t" // OO - "shrl $1, %%eax\n\t" - /* shift=+1 */ /* shift=-8:+8 */ - "movdqa %%xmm4, %%xmm0\n\t" - "addl %%ebx, %%ebx\n\t" "psllq $8, %%xmm0\n\t" - "andl %%edi, %%ebx\n\t" "pand %%xmm5, %%xmm0\n\t" // 0 m7&o6 m6&o5 .. m1&o0 - "movl %%ebx, %%edx\n\t" "movdqa %%xmm0, %%xmm1\n\t" - "addl %%ebx, %%ebx\n\t" "psllq $8, %%xmm0\n\t" - "movdqa %%xmm5, %%xmm3\n\t" - "andl %%edi, %%ebx\n\t" "pand %%xmm5, %%xmm0\n\t" // 0 0 m7&o6&o5 .. m2&o1&o0 - "psllq $8, %%xmm3\n\t" - "orl %%ebx, %%edx\n\t" "por %%xmm1, %%xmm0\n\t" // 0 m7&o6 (m6&o5)|(m7&o6&o5) .. (m1&o0) - "addl %%ecx, %%ecx\n\t" "pand %%xmm5, %%xmm3\n\t" // 0 o7&o6 o6&o5 o5&o4 o4&o3 .. - "movdqa %%xmm0, %%xmm2\n\t" - "leal (,%%edx,4), %%ebx\n\t" "psllq $16, %%xmm0\n\t" - "andl %%ecx, %%ebx\n\t" "pand %%xmm3, %%xmm0\n\t" // 0 0 0 m7&o6&o5&o4 (m6&o5&o4&o3)|(m7&o6&o5&o4&o3) .. - "orl %%ebx, %%edx\n\t" "por %%xmm0, %%xmm2\n\t" - "shll $2, %%ebx\n\t" "psllq $16, %%xmm0\n\t" - "andl %%ecx, %%ebx\n\t" "pand %%xmm3, %%xmm0\n\t" // 0 0 0 0 0 m7&o6&..&o2 (m6&o5&..&o1)|(m7&o6&..&o1) .. - "orl %%edx, %%ebx\n\t" "por %%xmm0, %%xmm2\n\t" - "addl %%ebx, %%ebx\n\t" "psllq $8, %%xmm2\n\t" - "orl %%eax, %%ebx\n\t" - - "movl %2, %%esi\n\t" - "movl %4, %%edi\n\t" - /* shift=-1 */ /* shift=-9:+7 */ - "andl $0x7e7e7e7e,%%edi\n\t" "pand %5, %%xmm5\n\t" - "movl %%esi, %%eax\n\t" "movdqa %%xmm4, %%xmm0\n\t" - "shrl $1, %%eax\n\t" "psllq $7, %%xmm0\n\t" - "andl %%edi, %%eax\n\t" "pand %%xmm5, %%xmm0\n\t" - "movl %%eax, %%edx\n\t" "movdqa %%xmm0, %%xmm1\n\t" - "shrl $1, %%eax\n\t" "psllq $7, %%xmm0\n\t" - "andl %%edi, %%eax\n\t" "pand %%xmm5, %%xmm0\n\t" - "movl %%edi, %%ecx\n\t" "movdqa %%xmm5, %%xmm3\n\t" - "orl %%edx, %%eax\n\t" "por %%xmm1, %%xmm0\n\t" - "shrl $1, %%ecx\n\t" "psllq $7, %%xmm3\n\t" - "movl %%eax, %%edx\n\t" "movdqa %%xmm0, %%xmm1\n\t" - "andl %%edi, %%ecx\n\t" "pand %%xmm5, %%xmm3\n\t" - "shrl $2, %%eax\n\t" "psllq $14, %%xmm0\n\t" - "andl %%ecx, %%eax\n\t" "pand %%xmm3, %%xmm0\n\t" - "orl %%eax, %%edx\n\t" "por %%xmm0, %%xmm1\n\t" - "shrl $2, %%eax\n\t" "psllq $14, %%xmm0\n\t" - "andl %%ecx, %%eax\n\t" "pand %%xmm3, %%xmm0\n\t" - "orl %%edx, %%eax\n\t" "por %%xmm1, %%xmm0\n\t" - "shrl $1, %%eax\n\t" "psllq $7, %%xmm0\n\t" - "por %%xmm0, %%xmm2\n\t" - /* shift=+1 */ /* shift=-7:+9 */ - "movdqa %%xmm4, %%xmm0\n\t" - "addl %%esi, %%esi\n\t" "psllq $9, %%xmm0\n\t" - "andl %%edi, %%esi\n\t" "pand %%xmm5, %%xmm0\n\t" - "movl %%esi, %%edx\n\t" "movdqa %%xmm0, %%xmm1\n\t" - "addl %%esi, %%esi\n\t" "psllq $9, %%xmm0\n\t" - "andl %%edi, %%esi\n\t" "pand %%xmm5, %%xmm0\n\t" - "movdqa %%xmm5, %%xmm3\n\t" - "orl %%esi, %%edx\n\t" "por %%xmm1, %%xmm0\n\t" - "psllq $9, %%xmm3\n\t" - "movdqa %%xmm0, %%xmm1\n\t" - "addl %%ecx, %%ecx\n\t" "pand %%xmm5, %%xmm3\n\t" - "leal (,%%edx,4), %%esi\n\t" "psllq $18, %%xmm0\n\t" - "andl %%ecx, %%esi\n\t" "pand %%xmm3, %%xmm0\n\t" - "orl %%esi, %%edx\n\t" "por %%xmm0, %%xmm1\n\t" - "shll $2, %%esi\n\t" "psllq $18, %%xmm0\n\t" - "andl %%ecx, %%esi\n\t" "pand %%xmm3, %%xmm0\n\t" - "orl %%edx, %%esi\n\t" "por %%xmm1, %%xmm0\n\t" - "addl %%esi, %%esi\n\t" "psllq $9, %%xmm0\n\t" - "orl %%eax, %%esi\n\t" "por %%xmm0, %%xmm2\n\t" - - "movl %1, %%eax\n\t" "movhlps %%xmm2, %%xmm3\n\t" - "movl %2, %%edx\n\t" "movd %%xmm3, %%edi\n\t" "movd %%xmm2, %%ecx\n\t" - "psrlq $32, %%xmm3\n\t" "psrlq $32, %%xmm2\n\t" - "bswapl %%edi\n\t" "orl %%ecx, %%ebx\n\t" - "orl %3, %%eax\n\t" "orl %%edi, %%esi\n\t" - "orl %4, %%edx\n\t" "movd %%xmm3, %%edi\n\t" "movd %%xmm2, %%ecx\n\t" - "notl %%eax\n\t" "bswapl %%edi\n\t" - "notl %%edx\n\t" "orl %%edi, %%ebx\n\t" "orl %%ecx, %%esi\n\t" - "andl %%esi, %%edx\n\t" - "andl %%ebx, %%eax" - : "=&A" (moves) - : "m" (P), "m" (((unsigned int *)&P)[1]), "m" (O), "m" (((unsigned int *)&O)[1]), "m" (mask7e) - : "ebx", "ecx", "esi", "edi" ); - - return moves; -} - -#endif // hasSSE2 -#endif // x86 - -#if defined(__x86_64__) || defined(_M_X64) -/** - * @brief SSE optimized get_stable_edge - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return a bitboard with (some of) player's stable discs. - * - */ -static unsigned long long get_stable_edge(const unsigned long long P, const unsigned long long O) -{ - // compute the exact stable edges (from precomputed tables) - unsigned int a1a8po, h1h8po; - unsigned long long stable_edge; - - __m128i P0 = _mm_cvtsi64_si128(P); - __m128i O0 = _mm_cvtsi64_si128(O); - __m128i PO = _mm_unpacklo_epi8(O0, P0); - stable_edge = edge_stability[_mm_extract_epi16(PO, 0)] - | ((unsigned long long) edge_stability[_mm_extract_epi16(PO, 7)] << 56); - - PO = _mm_unpacklo_epi64(O0, P0); - a1a8po = _mm_movemask_epi8(_mm_slli_epi64(PO, 7)); - h1h8po = _mm_movemask_epi8(PO); -#if 0 // def __BMI2__ // pdep is slow on AMD - stable_edge |= _pdep_u64(edge_stability[a1a8po], 0x0101010101010101ULL) - | _pdep_u64(edge_stability[h1h8po], 0x8080808080808080ULL); -#else - stable_edge |= A1_A8[edge_stability[a1a8po]] | (A1_A8[edge_stability[h1h8po]] << 7); -#endif - return stable_edge; -} - -/** - * @brief X64 optimized get_stability - * - * SSE pcmpeqb for horizontal get_full_lines. - * CPU rotate for vertical get_full_lines. - * Diag-7 is converted to diag-9 using vertical mirroring. - * - * @param P bitboard with player's discs. - * @param O bitboard with opponent's discs. - * @return the number of stable discs. - */ -#ifdef __AVX2__ - -int get_stability(const unsigned long long P, const unsigned long long O) -{ - unsigned long long disc = (P | O); - unsigned long long P_central = (P & 0x007e7e7e7e7e7e00ULL); - unsigned long long l8, stable; - __m128i l81, l79, v2_stable, v2_old_stable, v2_P_central; - __m256i lr79, v4_disc, v4_stable, v4_full; - const __m128i kff = _mm_set1_epi64x(0xffffffffffffffff); - const __m256i shift1897 = _mm256_set_epi64x(7, 9, 8, 1); -#if 0 // PCMPEQQ - static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }}; // V8SI - static const V4DI m792 = {{ 0x0000008040201008, 0x0000000102040810, 0x1008040201000000, 0x0810204080000000 }}; - static const V4DI m793 = {{ 0x0000804020100804, 0x0000010204081020, 0x2010080402010000, 0x0408102040800000 }}; - static const V4DI m794 = {{ 0x0080402010080402, 0x0001020408102040, 0x4020100804020100, 0x0204081020408000 }}; - static const V2DI m795 = {{ 0x8040201008040201, 0x0102040810204080 }}; - - l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_broadcastq_epi64(l81); - l81 = _mm_cmpeq_epi8(kff, l81); lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4); - lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m792.v4), m792.v4), m792.v4)); - l8 = disc; lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m793.v4), m793.v4), m793.v4)); - l8 &= (l8 >> 8) | (l8 << 56); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi64(_mm256_and_si256(v4_disc, m794.v4), m794.v4), m794.v4)); - l8 &= (l8 >> 16) | (l8 << 48); l79 = _mm_and_si128(_mm_cmpeq_epi64(_mm_and_si128(_mm256_castsi256_si128(v4_disc), m795.v2), m795.v2), m795.v2); - l8 &= (l8 >> 32) | (l8 << 32); l79 = _mm_or_si128(l79, _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79))); - -#elif 0 // PCMPEQD - __m256i lm79; - static const V4DI m790 = {{ 0x80c0e0f0783c1e0f, 0x0103070f1e3c78f0, 0x70381c0e07030100, 0x0e1c3870e0c08000 }}; - static const V4DI m791 = {{ 0x0402010000804020, 0x2040800000010204, 0x0804020180402010, 0x1020408001020408 }}; // V8SI - static const V4DI m792 = {{ 0x2010884440201088, 0x0408112202040811, 0x2211080411080402, 0x4488102088102040 }}; // V8SI - static const V4DI m793 = {{ 0x8844221110884422, 0x1122448808112244, 0x0000000044221108, 0x0000000022448810 }}; // V8SI - - l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_broadcastq_epi64(l81); - l81 = _mm_cmpeq_epi8(kff, l81); lm79 = _mm256_and_si256(v4_disc, m790.v4); - lm79 = _mm256_or_si256(lm79, _mm256_shuffle_epi32(lm79, 0xb1)); - l8 = disc; lr79 = _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m792.v4), m792.v4), m792.v4); - l8 &= (l8 >> 8) | (l8 << 56); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(lm79, m793.v4), m793.v4), m793.v4)); - l8 &= (l8 >> 16) | (l8 << 48); lr79 = _mm256_and_si256(_mm256_or_si256(lr79, _mm256_shuffle_epi32(lr79, 0xb1)), m790.v4); - l8 &= (l8 >> 32) | (l8 << 32); lr79 = _mm256_or_si256(lr79, _mm256_and_si256(_mm256_cmpeq_epi32(_mm256_and_si256(v4_disc, m791.v4), m791.v4), m791.v4)); - l79 = _mm_or_si128(_mm256_extracti128_si256(lr79, 1), _mm256_castsi256_si128(lr79)); - -#else // Kogge-Stone - const __m128i mcpyswap = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); - const __m128i mbswapll = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7); - static const V4DI shiftlr[] = {{{ 9, 7, 7, 9 }}, {{ 18, 14, 14, 18 }}, {{ 36, 28, 28, 36 }}}; - static const V4DI e790 = {{ 0xff80808080808080, 0xff01010101010101, 0xff01010101010101, 0xff80808080808080 }}; - static const V4DI e791 = {{ 0xffffc0c0c0c0c0c0, 0xffff030303030303, 0xffff030303030303, 0xffffc0c0c0c0c0c0 }}; - static const V4DI e792 = {{ 0xfffffffff0f0f0f0, 0xffffffff0f0f0f0f, 0xffffffff0f0f0f0f, 0xfffffffff0f0f0f0 }}; - - l81 = _mm_cvtsi64_si128(disc); v4_disc = _mm256_castsi128_si256(_mm_shuffle_epi8(l81, mcpyswap)); - l81 = _mm_cmpeq_epi8(kff, l81); lr79 = _mm256_permute4x64_epi64(v4_disc, 0x50); // disc, disc, rdisc, rdisc - lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e790.v4, _mm256_srlv_epi64(lr79, shiftlr[0].v4))); - l8 = disc; lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e791.v4, _mm256_srlv_epi64(lr79, shiftlr[1].v4))); - l8 &= (l8 >> 8) | (l8 << 56); lr79 = _mm256_and_si256(lr79, _mm256_or_si256(e792.v4, _mm256_srlv_epi64(lr79, shiftlr[2].v4))); - l8 &= (l8 >> 16) | (l8 << 48); l79 = _mm_shuffle_epi8(_mm256_extracti128_si256(lr79, 1), mbswapll); - l8 &= (l8 >> 32) | (l8 << 32); l79 = _mm_and_si128(l79, _mm256_castsi256_si128(lr79)); - -#endif - l81 = _mm_insert_epi64(l81, l8, 1); - - // compute the exact stable edges (from precomputed tables) - stable = get_stable_edge(P, O); - - // add full lines - v2_stable = _mm_and_si128(l81, l79); - stable |= _mm_cvtsi128_si64(_mm_and_si128(v2_stable, _mm_unpackhi_epi64(v2_stable, v2_stable))) & P_central; - - if (stable == 0) - return 0; - - // now compute the other stable discs (ie discs touching another stable disc in each flipping direction). - v4_full = _mm256_insertf128_si256(_mm256_castsi128_si256(l81), l79, 1); - v2_stable = _mm_cvtsi64_si128(stable); - v2_P_central = _mm_cvtsi64_si128(P_central); - do { - v2_old_stable = v2_stable; - v4_stable = _mm256_broadcastq_epi64(v2_stable); - v4_stable = _mm256_or_si256(_mm256_or_si256(_mm256_srlv_epi64(v4_stable, shift1897), _mm256_sllv_epi64(v4_stable, shift1897)), v4_full); - v2_stable = _mm_and_si128(_mm256_castsi256_si128(v4_stable), _mm256_extracti128_si256(v4_stable, 1)); - v2_stable = _mm_and_si128(v2_stable, _mm_unpackhi_epi64(v2_stable, v2_stable)); - v2_stable = _mm_or_si128(v2_old_stable, _mm_and_si128(v2_stable, v2_P_central)); - } while (!_mm_testc_si128(v2_old_stable, v2_stable)); - - return bit_count(_mm_cvtsi128_si64(v2_stable)); -} - -#else - -int get_stability(const unsigned long long P, const unsigned long long O) -{ - unsigned long long disc = (P | O); - unsigned long long P_central = (P & 0x007e7e7e7e7e7e00ULL); - unsigned long long l8, full_h, full_v, full_d7, full_d9, stable; - unsigned long long stable_h, stable_v, stable_d7, stable_d9, old_stable; -#if 1 // 1 CPU, 3 SSE - __m128i l01, l79, r79; // full lines - const __m128i kff = _mm_set1_epi64x(0xffffffffffffffff); - const __m128i edge = _mm_set1_epi64x(0xff818181818181ff); - const __m128i e791 = _mm_set1_epi64x(0xffffc0c0c0c0c0c0); - const __m128i e792 = _mm_set1_epi64x(0x030303030303ffff); - const __m128i e793 = _mm_set1_epi64x(0x0f0f0f0ff0f0f0f0); - - l01 = l79 = _mm_cvtsi64_si128(disc); r79 = _mm_cvtsi64_si128(vertical_mirror(disc)); - l01 = _mm_cmpeq_epi8(kff, l01); l79 = r79 = _mm_unpacklo_epi64(l79, r79); - full_h = _mm_cvtsi128_si64(l01); l79 = _mm_and_si128(l79, _mm_or_si128(edge, _mm_srli_epi64(l79, 9))); - r79 = _mm_and_si128(r79, _mm_or_si128(edge, _mm_slli_epi64(r79, 9))); - l8 = disc; l79 = _mm_and_si128(l79, _mm_or_si128(e791, _mm_srli_epi64(l79, 18))); - l8 &= (l8 >> 8) | (l8 << 56); r79 = _mm_and_si128(r79, _mm_or_si128(e792, _mm_slli_epi64(r79, 18))); - l8 &= (l8 >> 16) | (l8 << 48); l79 = _mm_and_si128(_mm_and_si128(l79, r79), _mm_or_si128(e793, _mm_or_si128(_mm_srli_epi64(l79, 36), _mm_slli_epi64(r79, 36)))); - l8 &= (l8 >> 32) | (l8 << 32); full_d9 = _mm_cvtsi128_si64(l79); - full_v = l8; full_d7 = vertical_mirror(_mm_cvtsi128_si64(_mm_unpackhi_epi64(l79, l79))); - -#else // 4 CPU - unsigned long long l1, l7, l9, r7, r9; // full lines - static const unsigned long long edge = 0xff818181818181ffULL; - static const unsigned long long k01 = 0x0101010101010101ULL; - static const unsigned long long e7[] = { 0xffff030303030303, 0xc0c0c0c0c0c0ffff, 0xffffffff0f0f0f0f, 0xf0f0f0f0ffffffff }; - static const unsigned long long e9[] = { 0xffffc0c0c0c0c0c0, 0x030303030303ffff, 0x0f0f0f0ff0f0f0f0 }; - - l1 = l7 = r7 = disc; - l1 &= l1 >> 1; l7 &= edge | (l7 >> 7); r7 &= edge | (r7 << 7); - l1 &= l1 >> 2; l7 &= e7[0] | (l7 >> 14); r7 &= e7[1] | (r7 << 14); - l1 &= l1 >> 4; l7 &= e7[2] | (l7 >> 28); r7 &= e7[3] | (r7 << 28); - full_h = ((l1 & k01) * 0xff); full_d7 = l7 & r7; - - l8 = l9 = r9 = disc; - l8 &= (l8 >> 8) | (l8 << 56); l9 &= edge | (l9 >> 9); r9 &= edge | (r9 << 9); - l8 &= (l8 >> 16) | (l8 << 48); l9 &= e9[0] | (l9 >> 18); r9 &= e9[1] | (r9 << 18); - l8 &= (l8 >> 32) | (l8 << 32); full_d9 = l9 & r9 & (e9[2] | (l9 >> 36) | (r9 << 36)); - full_v = l8; - -#endif - // compute the exact stable edges (from precomputed tables) - stable = get_stable_edge(P, O); - - // add full lines - stable |= (full_h & full_v & full_d7 & full_d9 & P_central); - - if (stable == 0) - return 0; - - // now compute the other stable discs (ie discs touching another stable disc in each flipping direction). - do { - old_stable = stable; - stable_h = ((stable >> 1) | (stable << 1) | full_h); - stable_v = ((stable >> 8) | (stable << 8) | full_v); - stable_d7 = ((stable >> 7) | (stable << 7) | full_d7); - stable_d9 = ((stable >> 9) | (stable << 9) | full_d9); - stable |= (stable_h & stable_v & stable_d7 & stable_d9 & P_central); - } while (stable != old_stable); - - return bit_count(stable); -} - -#endif // __AVX2__ -#endif // __x86_64__ - -/** - * @brief SSE translation of board_get_hash_code. - * - * Too many dependencies, effective only on 32bit build. - * For AMD, MMX version in board_mmx.c is faster. - * - * @param p pointer to 16 bytes to hash. - * @return the hash code of the bitboard - */ -#if (defined(USE_GAS_MMX) && !defined(__3dNOW__)) || defined(USE_MSVC_X86) // || defined(__x86_64__) - -unsigned long long board_get_hash_code_sse(const unsigned char *p) -{ - unsigned long long h; -#if defined(hasSSE2) || defined(USE_MSVC_X86) - __m128 h0, h1, h2, h3; - - h0 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[0][p[0]])), (__m64 *) &hash_rank[4][p[4]]); - h1 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[1][p[1]])), (__m64 *) &hash_rank[5][p[5]]); - h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[2][p[2]])), (__m64 *) &hash_rank[6][p[6]]); - h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[3][p[3]])), (__m64 *) &hash_rank[7][p[7]]); - h0 = _mm_xor_ps(h0, h2); h1 = _mm_xor_ps(h1, h3); - h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[8][p[8]])), (__m64 *) &hash_rank[10][p[10]]); - h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[9][p[9]])), (__m64 *) &hash_rank[11][p[11]]); - h0 = _mm_xor_ps(h0, h2); h1 = _mm_xor_ps(h1, h3); - h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[12][p[12]])), (__m64 *) &hash_rank[14][p[14]]); - h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[13][p[13]])), (__m64 *) &hash_rank[15][p[15]]); - h0 = _mm_xor_ps(h0, h2); h1 = _mm_xor_ps(h1, h3); - h0 = _mm_xor_ps(h0, h1); - h0 = _mm_xor_ps(h0, _mm_movehl_ps(h1, h0)); - h = _mm_cvtsi128_si64(_mm_castps_si128(h0)); - -#else - __asm__ volatile ( - "movq %0, %%xmm0\n\t" "movq %1, %%xmm1" - : : "m" (hash_rank[0][p[0]]), "m" (hash_rank[1][p[1]])); - __asm__ volatile ( - "movq %0, %%xmm2\n\t" "movq %1, %%xmm3" - : : "m" (hash_rank[2][p[2]]), "m" (hash_rank[3][p[3]])); - __asm__ volatile ( - "movhps %0, %%xmm0\n\t" "movhps %1, %%xmm1" - : : "m" (hash_rank[4][p[4]]), "m" (hash_rank[5][p[5]])); - __asm__ volatile ( - "movhps %0, %%xmm2\n\t" "movhps %1, %%xmm3" - : : "m" (hash_rank[6][p[6]]), "m" (hash_rank[7][p[7]])); - __asm__ volatile ( - "xorps %%xmm2, %%xmm0\n\t" "xorps %%xmm3, %%xmm1\n\t" - "movq %0, %%xmm2\n\t" "movq %1, %%xmm3" - : : "m" (hash_rank[8][p[8]]), "m" (hash_rank[9][p[9]])); - __asm__ volatile ( - "movhps %0, %%xmm2\n\t" "movhps %1, %%xmm3" - : : "m" (hash_rank[10][p[10]]), "m" (hash_rank[11][p[11]])); - __asm__ volatile ( - "xorps %%xmm2, %%xmm0\n\t" "xorps %%xmm3, %%xmm1\n\t" - "movq %0, %%xmm2\n\t" "movq %1, %%xmm3" - : : "m" (hash_rank[12][p[12]]), "m" (hash_rank[13][p[13]])); - __asm__ volatile ( - "movhps %1, %%xmm2\n\t" "movhps %2, %%xmm3\n\t" - "xorps %%xmm2, %%xmm0\n\t" "xorps %%xmm3, %%xmm1\n\t" - "xorps %%xmm1, %%xmm0\n\t" - "movhlps %%xmm0, %%xmm1\n\t" - "xorps %%xmm1, %%xmm0\n\t" - "movd %%xmm0, %%eax\n\t" - "punpckhdq %%xmm0, %%xmm0\n\t" - "movd %%xmm0, %%edx" - : "=A" (h) : "m" (hash_rank[14][p[14]]), "m" (hash_rank[15][p[15]])); -#endif - - return h; -} - -#endif // USE_GAS_MMX - -#if 0 // def __AVX2__ // experimental - too many instructions - -unsigned long long board_get_hash_code_avx2(const unsigned char *p) -{ - __m128i ix0, ix8, hh; - __m256i hhh; - static const __v16qi rank = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - - ix0 = _mm_loadu_si128((__m128i *) p); - ix8 = _mm_unpackhi_epi8(ix0, (__m128i) rank); - ix0 = _mm_unpacklo_epi8(ix0, (__m128i) rank); - - hhh = _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_blend_epi16(_mm_setzero_si128(), ix0, 0x55), 8); - hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_blend_epi16(_mm_setzero_si128(), ix8, 0x55), 8); - hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_srli_epi32(ix0, 16), 8); - hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_srli_epi32(ix8, 16), 8); - - hh = _mm256_castsi256_si128(hhh) ^ _mm256_extracti128_si256(hhh, 1); - hh ^= _mm_shuffle_epi32(hh, 0x4e); - return hh[0]; -} - -#endif ->>>>>>> 1a7b0ed (flip_bmi2 added; bmi2 version of stability and corner_stability) -======= - - #elif defined(hasNeon) -======= ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) - -void get_full_lines(const unsigned long long disc, unsigned long long full[4]) -{ - unsigned long long l8; - uint8x8_t l01; - uint64x2_t l79, r79; - const uint64x2_t e790 = vdupq_n_u64(0x007f7f7f7f7f7f7f); - const uint64x2_t e791 = vdupq_n_u64(0xfefefefefefefe00); - const uint64x2_t e792 = vdupq_n_u64(0x00003f3f3f3f3f3f); - const uint64x2_t e793 = vdupq_n_u64(0x0f0f0f0ff0f0f0f0); - - l01 = vcreate_u8(disc); l79 = r79 = vreinterpretq_u64_u8(vcombine_u8(l01, vrev64_u8(l01))); - l01 = vceq_u8(l01, vdup_n_u8(0xff)); l79 = vandq_u64(l79, vornq_u64(vshrq_n_u64(l79, 9), e790)); - full[0] = vget_lane_u64(vreinterpret_u64_u8(l01), 0); - r79 = vandq_u64(r79, vornq_u64(vshlq_n_u64(r79, 9), e791)); - l8 = disc; l79 = vbicq_u64(l79, vbicq_u64(e792, vshrq_n_u64(l79, 18))); // De Morgan - l8 &= (l8 >> 8) | (l8 << 56); r79 = vbicq_u64(r79, vshlq_n_u64(vbicq_u64(e792, r79), 18)); - l8 &= (l8 >> 16) | (l8 << 48); l79 = vandq_u64(vandq_u64(l79, r79), vorrq_u64(e793, vsliq_n_u64(vshrq_n_u64(l79, 36), r79, 36))); - l8 &= (l8 >> 32) | (l8 << 32); full[2] = vgetq_lane_u64(l79, 0); - full[1] = l8; full[3] = vertical_mirror(vgetq_lane_u64(l79, 1)); -} - - #else // 1 CPU, 3 SSE - -void get_full_lines(const unsigned long long disc, unsigned long long full[4]) -{ - unsigned long long rdisc = vertical_mirror(disc); - unsigned long long l8; - __m128i l01, l79, r79; // full lines - const __m128i kff = _mm_set1_epi8(-1); - const __m128i e790 = _mm_set1_epi64x(0xff80808080808080); - const __m128i e791 = _mm_set1_epi64x(0x01010101010101ff); - const __m128i e792 = _mm_set1_epi64x(0x00003f3f3f3f3f3f); - const __m128i e793 = _mm_set1_epi64x(0x0f0f0f0ff0f0f0f0); - - l01 = l79 = _mm_cvtsi64_si128(disc); l79 = r79 = _mm_unpacklo_epi64(l79, _mm_cvtsi64_si128(rdisc)); - l01 = _mm_cmpeq_epi8(kff, l01); l79 = _mm_and_si128(l79, _mm_or_si128(e790, _mm_srli_epi64(l79, 9))); - _mm_storel_epi64((__m128i*) &full[0], l01); - r79 = _mm_and_si128(r79, _mm_or_si128(e791, _mm_slli_epi64(r79, 9))); - l8 = disc; l79 = _mm_andnot_si128(_mm_andnot_si128(_mm_srli_epi64(l79, 18), e792), l79); // De Morgan - l8 &= (l8 >> 8) | (l8 << 56); r79 = _mm_andnot_si128(_mm_slli_epi64(_mm_andnot_si128(r79, e792), 18), r79); - l8 &= (l8 >> 16) | (l8 << 48); l79 = _mm_and_si128(_mm_and_si128(l79, r79), _mm_or_si128(e793, _mm_or_si128(_mm_srli_epi64(l79, 36), _mm_slli_epi64(r79, 36)))); - l8 &= (l8 >> 32) | (l8 << 32); _mm_storel_epi64((__m128i *) &full[2], l79); - full[1] = l8; full[3] = vertical_mirror(_mm_cvtsi128_si64(_mm_unpackhi_epi64(l79, l79))); -} - -<<<<<<< HEAD -#endif -<<<<<<< HEAD -#endif // HAS_CPU_64/ANDROID -<<<<<<< HEAD - -/** - * @brief SSE translation of board_get_hash_code. - * - * Too many dependencies, effective only on 32bit build. - * For AMD, MMX version in board_mmx.c is faster. - * - * @param p pointer to 16 bytes to hash. - * @return the hash code of the bitboard - */ -#if (defined(USE_GAS_MMX) && !defined(__3dNOW__)) || defined(USE_MSVC_X86) // || defined(__x86_64__) - -unsigned long long board_get_hash_code_sse(const unsigned char *p) -{ - unsigned long long h; -#if defined(hasSSE2) || defined(USE_MSVC_X86) - __m128 h0, h1, h2, h3; - - h0 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[0][p[0]])), (__m64 *) &hash_rank[4][p[4]]); - h1 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[1][p[1]])), (__m64 *) &hash_rank[5][p[5]]); - h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[2][p[2]])), (__m64 *) &hash_rank[6][p[6]]); - h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[3][p[3]])), (__m64 *) &hash_rank[7][p[7]]); - h0 = _mm_xor_ps(h0, h2); h1 = _mm_xor_ps(h1, h3); - h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[8][p[8]])), (__m64 *) &hash_rank[10][p[10]]); - h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[9][p[9]])), (__m64 *) &hash_rank[11][p[11]]); - h0 = _mm_xor_ps(h0, h2); h1 = _mm_xor_ps(h1, h3); - h2 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[12][p[12]])), (__m64 *) &hash_rank[14][p[14]]); - h3 = _mm_loadh_pi(_mm_castsi128_ps(_mm_loadl_epi64((__m128i *) &hash_rank[13][p[13]])), (__m64 *) &hash_rank[15][p[15]]); - h0 = _mm_xor_ps(h0, h2); h1 = _mm_xor_ps(h1, h3); - h0 = _mm_xor_ps(h0, h1); - h0 = _mm_xor_ps(h0, _mm_movehl_ps(h1, h0)); - h = _mm_cvtsi128_si64(_mm_castps_si128(h0)); - -#else - __asm__ volatile ( - "movq %0, %%xmm0\n\t" "movq %1, %%xmm1" - : : "m" (hash_rank[0][p[0]]), "m" (hash_rank[1][p[1]])); - __asm__ volatile ( - "movq %0, %%xmm2\n\t" "movq %1, %%xmm3" - : : "m" (hash_rank[2][p[2]]), "m" (hash_rank[3][p[3]])); - __asm__ volatile ( - "movhps %0, %%xmm0\n\t" "movhps %1, %%xmm1" - : : "m" (hash_rank[4][p[4]]), "m" (hash_rank[5][p[5]])); - __asm__ volatile ( - "movhps %0, %%xmm2\n\t" "movhps %1, %%xmm3" - : : "m" (hash_rank[6][p[6]]), "m" (hash_rank[7][p[7]])); - __asm__ volatile ( - "xorps %%xmm2, %%xmm0\n\t" "xorps %%xmm3, %%xmm1\n\t" - "movq %0, %%xmm2\n\t" "movq %1, %%xmm3" - : : "m" (hash_rank[8][p[8]]), "m" (hash_rank[9][p[9]])); - __asm__ volatile ( - "movhps %0, %%xmm2\n\t" "movhps %1, %%xmm3" - : : "m" (hash_rank[10][p[10]]), "m" (hash_rank[11][p[11]])); - __asm__ volatile ( - "xorps %%xmm2, %%xmm0\n\t" "xorps %%xmm3, %%xmm1\n\t" - "movq %0, %%xmm2\n\t" "movq %1, %%xmm3" - : : "m" (hash_rank[12][p[12]]), "m" (hash_rank[13][p[13]])); - __asm__ volatile ( - "movhps %1, %%xmm2\n\t" "movhps %2, %%xmm3\n\t" - "xorps %%xmm2, %%xmm0\n\t" "xorps %%xmm3, %%xmm1\n\t" - "xorps %%xmm1, %%xmm0\n\t" - "movhlps %%xmm0, %%xmm1\n\t" - "xorps %%xmm1, %%xmm0\n\t" - "movd %%xmm0, %%eax\n\t" - "punpckhdq %%xmm0, %%xmm0\n\t" - "movd %%xmm0, %%edx" - : "=A" (h) : "m" (hash_rank[14][p[14]]), "m" (hash_rank[15][p[15]])); -#endif - - return h; -} - -#endif // USE_GAS_MMX - -#if 0 // def __AVX2__ // experimental - too many instructions - -unsigned long long board_get_hash_code_avx2(const unsigned char *p) -{ - __m128i ix0, ix8, hh; - __m256i hhh; - static const __v16qi rank = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - - ix0 = _mm_loadu_si128((__m128i *) p); - ix8 = _mm_unpackhi_epi8(ix0, (__m128i) rank); - ix0 = _mm_unpacklo_epi8(ix0, (__m128i) rank); - - hhh = _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_blend_epi16(_mm_setzero_si128(), ix0, 0x55), 8); - hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_blend_epi16(_mm_setzero_si128(), ix8, 0x55), 8); - hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_srli_epi32(ix0, 16), 8); - hhh ^= _mm256_i32gather_epi64((long long *) hash_rank[0], _mm_srli_epi32(ix8, 16), 8); - - hh = _mm256_castsi256_si128(hhh) ^ _mm256_extracti128_si256(hhh, 1); - hh ^= _mm_shuffle_epi32(hh, 0x4e); - return hh[0]; -} - -#endif ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= -======= - #endif -<<<<<<< HEAD ->>>>>>> 264e827 (calc solid stone only when stability cutoff tried) -#endif // hasSSE2/hasNeon -<<<<<<< HEAD ->>>>>>> 21f8809 (Share all full lines between get_stability and Dogaishi hash reduction) -======= -======= -#endif // hasSSE2/__ARM_NEON ->>>>>>> 520040b (Use DISPATCH_NEON, not hasNeon, for android arm32 build) - #ifdef __AVX2__ /** * @brief AVX2 optimized get_stability @@ -2818,4 +952,3 @@ __m128i vectorcall get_moves_and_potential(__m256i PP, __m256i OO) } #endif ->>>>>>> be2ba1c (add AVX get_potential_mobility; revise foreach_bit for CPU32/C99) diff --git a/src/book.c b/src/book.c index 343b2e4..b0784d2 100644 --- a/src/book.c +++ b/src/book.c @@ -197,15 +197,7 @@ static bool position_is_ok(const Position *position) } else { if (/*l->move < A1 ||*/ l->move > H8 || board_is_occupied(&board, l->move) -<<<<<<< HEAD -<<<<<<< HEAD || board_get_move_flip(&board, l->move, &move) == 0) { -======= - || board_get_move(&board, l->move, &move) == 0) { ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - || board_get_move_flip(&board, l->move, &move) == 0) { ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) warn("link %s is wrong\n", move_to_string(l->move, WHITE, s)); position_print(position, &position->board, stdout); return false; @@ -230,15 +222,7 @@ static bool position_is_ok(const Position *position) } } else if (/*l->move < A1 ||*/ l->move > H8 || board_is_occupied(&board, l->move) -<<<<<<< HEAD -<<<<<<< HEAD - || board_get_move_flip(&board, l->move, &move) == 0) { -======= - || board_get_move(&board, l->move, &move) == 0) { ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= || board_get_move_flip(&board, l->move, &move) == 0) { ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) warn("leaf %s is wrong\n", move_to_string(l->move, WHITE, s)); position_print(position, &position->board, stdout); return false; @@ -718,15 +702,7 @@ static void position_search(Position *position, Book *book) if (position->n_link < n_moves || (position->n_link == 0 && n_moves == 0 && position->score.value == -SCORE_INF)) { search_set_board(search, &position->board, BLACK); -<<<<<<< HEAD -<<<<<<< HEAD - search_set_level(search, position->level, search->eval.n_empties); -======= - search_set_level(search, position->level, search->n_empties); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= search_set_level(search, position->level, search->eval.n_empties); ->>>>>>> c8248ad (Move n_empties into Eval; tweak eval_open and eval_set) foreach_link (l, position) { movelist_exclude(&search->movelist, l->move); @@ -768,15 +744,7 @@ static void position_search(Position *position, Book *book) static void position_link(Position *position, Book *book) { int x; -<<<<<<< HEAD -<<<<<<< HEAD unsigned long long moves = board_get_moves(&position->board); -======= - unsigned long long moves = get_moves(position->board.player, position->board.opponent); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - unsigned long long moves = board_get_moves(&position->board); ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) Board next; Link link; Position *child; @@ -1075,42 +1043,17 @@ static void board_feed_hash(Board *board, const Book *book, Search *search, cons const unsigned long long hash_code = board_get_hash_code(board); MoveList movelist; Move *m; -<<<<<<< HEAD -<<<<<<< HEAD - HashStoreData hash_data; -======= - HashStoreData hash_store_data; ->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in) -======= HashStoreData hash_data; ->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame) position = book_probe(book, board); if (position) { const int n_empties = board_count_empties(&position->board); -<<<<<<< HEAD -<<<<<<< HEAD - const int score = position->score.value; - int move = NOMOVE; - - hash_data.data.wl.c.depth = LEVEL[position->level][n_empties].depth; - hash_data.data.wl.c.selectivity = LEVEL[position->level][n_empties].selectivity; - -======= - const int depth = LEVEL[position->level][n_empties].depth; - const int selectivity = LEVEL[position->level][n_empties].selectivity; - const int score = position->score.value; - int move = NOMOVE; - ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= const int score = position->score.value; int move = NOMOVE; hash_data.data.wl.c.depth = LEVEL[position->level][n_empties].depth; hash_data.data.wl.c.selectivity = LEVEL[position->level][n_empties].selectivity; ->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in) position_get_moves(position, board, &movelist); foreach_move(m, movelist) { if (move == NOMOVE) move = m->x; @@ -1118,41 +1061,11 @@ static void board_feed_hash(Board *board, const Book *book, Search *search, cons board_feed_hash(board, book, search, is_pv && m->score == score); board_restore(board, m); } -<<<<<<< HEAD -<<<<<<< HEAD hash_data.data.lower = hash_data.data.upper = score; hash_data.data.move[0] = move; -<<<<<<< HEAD -<<<<<<< HEAD hash_feed(&search->hash_table, board, hash_code, &hash_data); if (is_pv) hash_feed(&search->pv_table, board, hash_code, &hash_data); -======= - hash_feed(&search->hash_table, board, hash_code, depth, selectivity, score, score, move); - if (is_pv) hash_feed(&search->pv_table, board, hash_code, depth, selectivity, score, score, move); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - -<<<<<<< HEAD - hash_store_data.data.lower = hash_store_data.data.upper = score; - hash_store_data.data.move[0] = move; - hash_feed(&search->hash_table, board, hash_code, &hash_store_data); - if (is_pv) hash_feed(&search->pv_table, board, hash_code, &hash_store_data); ->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in) -======= - hash_data.data.lower = hash_data.data.upper = score; - hash_data.data.move[0] = move; - hash_feed(&search->hash_table, board, hash_code, &hash_data); - if (is_pv) hash_feed(&search->pv_table, board, hash_code, &hash_data); ->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame) -======= - hash_feed(&search->hash_table, HBOARD_P(board), hash_code, &hash_data); - if (is_pv) hash_feed(&search->pv_table, HBOARD_P(board), hash_code, &hash_data); ->>>>>>> e88638e (add vectorcall interface to hash functions) -======= - hash_feed(&search->hash_table, board, hash_code, &hash_data); - if (is_pv) hash_feed(&search->pv_table, board, hash_code, &hash_data); ->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes) } } @@ -2384,15 +2297,7 @@ void book_add_game(Book *book, const Game *game) stack[n_moves++] = MOVE_PASS; board_pass(&board); } -<<<<<<< HEAD -<<<<<<< HEAD if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_moves])) { -======= - if (!board_is_occupied(&board, game->move[i]) && board_get_move(&board, game->move[i], &stack[n_moves])) { ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_moves])) { ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) board_update(&board, stack + n_moves); ++n_moves; } else { @@ -2472,15 +2377,7 @@ void book_check_game(Book *book, MoveHash *hash, const Game *game, BookCheckGame stack[n_moves++] = MOVE_PASS; board_pass(&board); } -<<<<<<< HEAD -<<<<<<< HEAD - if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_moves])) { -======= - if (!board_is_occupied(&board, game->move[i]) && board_get_move(&board, game->move[i], &stack[n_moves])) { ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= if (!board_is_occupied(&board, game->move[i]) && board_get_move_flip(&board, game->move[i], &stack[n_moves])) { ->>>>>>> 80ca4b1 (board_get_moves for AVX2; rename board_get_move_flip) board_update(&board, stack + n_moves); ++n_moves; } else { diff --git a/src/cassio.c b/src/cassio.c index df9f452..c262b44 100644 --- a/src/cassio.c +++ b/src/cassio.c @@ -13,23 +13,7 @@ * - With "-follow-cassio" Edax will follow more closely Cassio's search request. By default, it * searches with settings that make it better in tournament mode against Roxane, Cassio, etc. * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2023 -======= - * @date 1998 - 2018 ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= - * @date 1998 - 2020 ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - * @date 1998 - 2022 ->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame) -======= - * @date 1998 - 2023 ->>>>>>> 4087529 (Revise board0 usage; fix unused flips) * @author Richard Delorme * @version 4.5 */ @@ -254,15 +238,7 @@ static void engine_observer(Result *result) static Search* engine_create_search(void) { Search *search; -<<<<<<< HEAD -<<<<<<< HEAD - -======= - ->>>>>>> 1c68bd5 (SSE / AVX optimized eval feature added) -======= ->>>>>>> 0a166fd (Remove 1 element array coding style) search = (Search*) mm_malloc(sizeof (Search)); if (search == NULL) { engine_send("ERROR: Cannot allocate a new search engine."); @@ -326,23 +302,7 @@ static int engine_open(Search *search, const Board *board, const int player, con if (player != search->player || !board_equal(&search->board, board)) { search_set_board(search, board, player); -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD if (hash_get_from_board(&search->pv_table, board, &hash_data)) { -======= - if (hash_get(&search->pv_table, board, board_get_hash_code(board), &hash_data)) { ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - if (hash_get_from_board(&search->pv_table, board, &hash_data)) { ->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame) -======= - if (hash_get_from_board(&search->pv_table, HBOARD_P(board), &hash_data)) { ->>>>>>> 0b8fa13 (More HBOARD hash functions) -======= - if (hash_get_from_board(&search->pv_table, board, &hash_data)) { ->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes) if (hash_data.lower == -SCORE_INF && hash_data.upper < SCORE_INF) score = hash_data.upper; else if (hash_data.upper == +SCORE_INF && hash_data.lower > -SCORE_INF) score = hash_data.lower; else score = (hash_data.upper + hash_data.lower) / 2; @@ -454,36 +414,6 @@ void engine_free(void *v) void feed_all_hash_table(Search *search, Board *board, const int depth, const int selectivity, const int lower, const int upper, const int move) { -<<<<<<< HEAD -<<<<<<< HEAD - HashStoreData hash_data; - const unsigned long long hash_code = board_get_hash_code(board); - -<<<<<<< HEAD - hash_data.data.wl.c.depth = depth; - hash_data.data.wl.c.selectivity = selectivity; - hash_data.data.move[0] = move; - hash_data.data.lower = lower; - hash_data.data.upper = upper; - hash_feed(&search->hash_table, board, hash_code, &hash_data); - hash_feed(&search->pv_table, board, hash_code, &hash_data); -======= - hash_feed(&search->hash_table, board, hash_code, depth, selectivity, lower, upper, move); - hash_feed(&search->pv_table, board, hash_code, depth, selectivity, lower, upper, move); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - HashStoreData hash_store_data; - const unsigned long long hash_code = board_get_hash_code(board); - - hash_store_data.data.wl.c.depth = depth; - hash_store_data.data.wl.c.selectivity = selectivity; - hash_store_data.data.move[0] = move; - hash_store_data.data.lower = lower; - hash_store_data.data.upper = upper; - hash_feed(&search->hash_table, board, hash_code, &hash_store_data); - hash_feed(&search->pv_table, board, hash_code, &hash_store_data); ->>>>>>> d1c50ef (Structured hash_store parameters; AVXLASTFLIP changed to opt-in) -======= HashStoreData hash_data; const unsigned long long hash_code = board_get_hash_code(board); @@ -492,19 +422,8 @@ void feed_all_hash_table(Search *search, Board *board, const int depth, const in hash_data.data.move[0] = move; hash_data.data.lower = lower; hash_data.data.upper = upper; -<<<<<<< HEAD -<<<<<<< HEAD - hash_feed(&search->hash_table, board, hash_code, &hash_data); - hash_feed(&search->pv_table, board, hash_code, &hash_data); ->>>>>>> dea1c69 (Use same hash_data for R/W; reduce movelist in NWS_endgame) -======= - hash_feed(&search->hash_table, HBOARD_P(board), hash_code, &hash_data); - hash_feed(&search->pv_table, HBOARD_P(board), hash_code, &hash_data); ->>>>>>> e88638e (add vectorcall interface to hash functions) -======= hash_feed(&search->hash_table, board, hash_code, &hash_data); hash_feed(&search->pv_table, board, hash_code, &hash_data); ->>>>>>> e31cd1d (Drop HBOARD opt; little gain and too many changes) } /** @@ -625,28 +544,12 @@ static bool skip_search(Engine *engine, int *old_score) if (alpha < hash_data.lower) alpha = *old_score = hash_data.lower; if (beta > hash_data.upper) beta = *old_score = hash_data.upper; // skip search ? -<<<<<<< HEAD -<<<<<<< HEAD - if (hash_data.wl.c.depth >= search->depth && hash_data.wl.c.selectivity >= search->selectivity && alpha >= beta) { -======= - if (hash_data.depth >= search->depth && hash_data.selectivity >= search->selectivity && alpha >= beta) { ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= if (hash_data.wl.c.depth >= search->depth && hash_data.wl.c.selectivity >= search->selectivity && alpha >= beta) { ->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union) if (hash_data.move[0] != NOMOVE) movelist_sort_bestmove(movelist, hash_data.move[0]); else if (hash_data.lower > SCORE_MIN) return false; bestmove = movelist_first(movelist); bestmove->score = *old_score; -<<<<<<< HEAD -<<<<<<< HEAD - record_best_move(search, bestmove, options.alpha, options.beta, search->depth); -======= - record_best_move(search, &search->board, bestmove, options.alpha, options.beta, search->depth); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= record_best_move(search, bestmove, options.alpha, options.beta, search->depth); ->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame) bound = search->result->bound + bestmove->x; if (bound->lower != bound->upper || is_pv_ok(search, bestmove->x, search->depth)) { @@ -657,32 +560,14 @@ static bool skip_search(Engine *engine, int *old_score) cassio_debug("Edax does not skip the search : BAD PV!\n"); } } else { -<<<<<<< HEAD -<<<<<<< HEAD if (hash_data.wl.c.depth < search->depth || hash_data.wl.c.selectivity < search->selectivity) { cassio_debug("Edax does not skip the search: Level %d@%d < %d@%d\n", hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, search->depth, selectivity_table[search->selectivity].percent); -======= - if (hash_data.depth < search->depth || hash_data.selectivity < search->selectivity) { - cassio_debug("Edax does not skip the search: Level %d@%d < %d@%d\n", hash_data.depth, selectivity_table[hash_data.selectivity].percent, search->depth, selectivity_table[search->selectivity].percent); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= - if (hash_data.wl.c.depth < search->depth || hash_data.wl.c.selectivity < search->selectivity) { - cassio_debug("Edax does not skip the search: Level %d@%d < %d@%d\n", hash_data.wl.c.depth, selectivity_table[hash_data.wl.c.selectivity].percent, search->depth, selectivity_table[search->selectivity].percent); ->>>>>>> a556e46 (HashData and HashStoreData rearranged, TYPE_PUNING now uses union) } else { cassio_debug("Edax does not skip the search: unsolved score alpha %d < beta %d\n", alpha, beta); } } } else { -<<<<<<< HEAD -<<<<<<< HEAD - cassio_debug("Edax does not skip the search: Position %s (hash=%llx) not found\n", board_to_string(&search->board, search->player, b), hash_code); -======= - cassio_debug("Edax does not skip the search: Position %s (hash=%llx) not found\n", board_to_string(&search->board, search->player, b), board_get_hash_code(&search->board)); ->>>>>>> 0a166fd (Remove 1 element array coding style) -======= cassio_debug("Edax does not skip the search: Position %s (hash=%llx) not found\n", board_to_string(&search->board, search->player, b), hash_code); ->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame) } return false; diff --git a/src/const.h b/src/const.h index 52234db..5b025d3 100644 --- a/src/const.h +++ b/src/const.h @@ -3,23 +3,7 @@ * * Constants as macros, enums, or global consts. * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2024 -======= - * @date 1998 - 2020 ->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c) -======= - * @date 1998 - 2021 ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= - * @date 1998 - 2023 ->>>>>>> d63619f (Change NodeType to char; next node_type TLU to trinary Op) -======= - * @date 1998 - 2024 ->>>>>>> a09308f (Renew version string and copyright year) * @author Richard Delorme * @version 4.5 */ @@ -95,75 +79,12 @@ enum { CUT_NODE, ALL_NODE }; -<<<<<<< HEAD -<<<<<<< HEAD -typedef unsigned char NodeType; -======= -typedef char NodeType; ->>>>>>> d63619f (Change NodeType to char; next node_type TLU to trinary Op) -======= typedef unsigned char NodeType; ->>>>>>> 2ea1e4f (Change NodeType to unsigned char to fix gcc warning) #define VERSION 4 -<<<<<<< HEAD -<<<<<<< HEAD -#define RELEASE 5 -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -#define VERSION_STRING "4.5.3" -#define EDAX_NAME "Edax 4.5.3" -======= -#define RELEASE 4 -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -#define VERSION_STRING "4.4.5" -#define EDAX_NAME "Edax 4.4.5" ->>>>>>> 5124720 (-eval-file options added as documented; minor fix on console output) -======= -#define VERSION_STRING "4.4.6" -#define EDAX_NAME "Edax 4.4.6" ->>>>>>> cd90dbb (Enable 32bit AVX build; optimize loop in board print; set version to 4.4.6) -======= -#define VERSION_STRING "4.4.7" -#define EDAX_NAME "Edax 4.4.7" ->>>>>>> 9ad160e (4.4.7 AVX/shuffle optimization in endgame_sse.c) -======= -#define VERSION_STRING "4.4.8" -#define EDAX_NAME "Edax 4.4.8" ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -======= -======= #define RELEASE 5 ->>>>>>> fdb3c8a (SWAR vector eval update; more restore in search_restore_midgame) -#define VERSION_STRING "4.5.0" -#define EDAX_NAME "Edax 4.5.0" ->>>>>>> 34a2291 (4.5.0: Use CRC32c for board hash) -======= -#define VERSION_STRING "4.5.1" -#define EDAX_NAME "Edax 4.5.1" ->>>>>>> ff1c5db (skip hash access if n_moves <= 1 in NWS_endgame) -======= -#define VERSION_STRING "4.5.2" -#define EDAX_NAME "Edax 4.5.2" ->>>>>>> a9633d5 (Initial 4.5.2; some reformats) -======= -#define VERSION_STRING "4.5.1" -#define EDAX_NAME "Edax 4.5.1" ->>>>>>> 4087529 (Revise board0 usage; fix unused flips) -======= -#define VERSION_STRING "4.5.2" -#define EDAX_NAME "Edax 4.5.2" ->>>>>>> a09308f (Renew version string and copyright year) -======= #define VERSION_STRING "4.5.3" #define EDAX_NAME "Edax 4.5.3" ->>>>>>> d8589d2 (Init 4.5.3: abandon size_reduced_movelist which confuses gcc warn) #define BOOK 0x424f4f4b #define EDAX 0x45444158 #define EVAL 0x4556414c diff --git a/src/count_last_flip_32.c b/src/count_last_flip_32.c index 0c8d63c..492c266 100644 --- a/src/count_last_flip_32.c +++ b/src/count_last_flip_32.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file count_last_flip_32.c * @@ -35,14 +31,6 @@ * */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#include "board.h" - ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) #define LODWORD(l) ((unsigned int)(l)) #define HIDWORD(l) ((unsigned int)((l)>>32)) @@ -119,15 +107,7 @@ static int count_last_flip_A1(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x01010101u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 25]; -======= - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x01010100u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 25]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x01010101u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 25]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 1) & 0x7f]; n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x08040200u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 25]; @@ -144,15 +124,7 @@ static int count_last_flip_B1(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x02020202u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 25]; -======= - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x02020200u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 25]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x02020202u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 25]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 2) & 0x3f]; n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x10080400u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 26]; @@ -169,15 +141,7 @@ static int count_last_flip_C1(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 25]; -======= - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x04040400u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 25]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 25]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_2[LODWORD(P) & 0xff]; n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x20110A04u) + (HIDWORD(P) & 0x00008040u)) * 0x01010101u) >> 24]; // A3C1H6 @@ -194,15 +158,7 @@ static int count_last_flip_D1(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 25]; -======= - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x08080800u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 25]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 25]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_3[LODWORD(P) & 0xff]; n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x41221408u) + (HIDWORD(P) & 0x00000080u)) * 0x01010101u) >> 24]; // A4D1H5 @@ -219,15 +175,7 @@ static int count_last_flip_E1(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 25]; -======= - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x10101000u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 25]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 25]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_4[LODWORD(P) & 0xff]; n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x82442810u) + (HIDWORD(P) & 0x00000001u)) * 0x01010101u) >> 24]; // A5E1H4 @@ -244,15 +192,7 @@ static int count_last_flip_F1(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x20202020u) >> 4) + (HIDWORD(P) & 0x20202020u)) * 0x00810204u) >> 25]; -======= - n_flipped = COUNT_FLIP_R[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) >> 4) & 0x02020200u)) * 0x00810204u) >> 25]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x20202020u) >> 4) + (HIDWORD(P) & 0x20202020u)) * 0x00810204u) >> 25]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_5[LODWORD(P) & 0xff]; n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x04885020u) + (HIDWORD(P) & 0x00000102u)) * 0x01010101u) >> 24]; // A6F1H3 @@ -269,15 +209,7 @@ static int count_last_flip_G1(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x40404040u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 25]; -======= - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x40404000u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 25]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x40404040u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 25]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_L[(LODWORD(P) << 1) & 0x7e]; n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x08102000u) + (HIDWORD(P) & 0x00010204u)) * 0x02020202u) >> 24]; @@ -294,15 +226,7 @@ static int count_last_flip_H1(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x80808080u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 25]; -======= - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x80808000u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 25]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x80808080u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 25]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_L[LODWORD(P) & 0x7f]; n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x10204000u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24]; @@ -1309,1245 +1233,3 @@ int (*count_last_flip[])(const unsigned long long) = { count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8, count_last_flip_pass, }; -<<<<<<< HEAD -======= -/** - * @file count_last_flip_32.c - * - * - * A function is provided to count the number of fipped disc of the last move - * for each square of the board. These functions are gathered into an array of - * functions, so that a fast access to each function is allowed. The generic - * form of the function take as input the player bitboard and return twice - * the number of flipped disc of the last move. - * - * The basic principle is to read into an array a precomputed result. Doing - * this is easy for a single line ; as we can use arrays of the form: - * - COUNT_FLIP[square where we play][8-bits disc pattern]. -* The problem is thus to convert any line of a 64-bits disc pattern into an - * 8-bits disc pattern. A fast way to do this is to select the right line, - * with a bit-mask, to gather the masked-bits into a continuous set by a simple - * multiplication and to right-shift the result to scale it into a number - * between 0 and 255. - * Once we get our 8-bits disc patterns, we directly get the number of - * flipped discs from the precomputed array, and add them from each flipping - * lines. - * For optimization purpose, the value returned is twice the number of flipped - * disc, to facilitate the computation of disc difference. - * - * With 135 degree merge, instead of Valery ClaudePierre's modification. - * - * @date 1998 - 2017 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 - * - */ - -#include "board.h" - -#define LODWORD(l) ((unsigned int)(l)) -#define HIDWORD(l) ((unsigned int)((l)>>32)) - -/** precomputed count flip array */ -static const char COUNT_FLIP_R[128] = { - 0, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 10, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 12, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 10, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0 -}; - -static const char COUNT_FLIP_2[256] = { - 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 8, 10, 8, 8, 8, 10, 8, 8, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0 -}; - -static const char COUNT_FLIP_3[256] = { - 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 6, 10, 8, 8, 6, 6, 6, 6, 6, 10, 8, 8, 6, 6, 6, 6, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0 -}; - -static const char COUNT_FLIP_4[256] = { - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const char COUNT_FLIP_5[256] = { - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const char COUNT_FLIP_L[128] = { - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/** - * Count last flipped discs when playing on square A1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A1(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x01010100u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 25]; - n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 1) & 0x7f]; - n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x08040200u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 25]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B1(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x02020200u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 25]; - n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 2) & 0x3f]; - n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x10080400u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 26]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C1(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x04040400u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 25]; - n_flipped += COUNT_FLIP_2[LODWORD(P) & 0xff]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x20110A04u) + (HIDWORD(P) & 0x00008040u)) * 0x01010101u) >> 24]; // A3C1H6 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D1(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x08080800u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 25]; - n_flipped += COUNT_FLIP_3[LODWORD(P) & 0xff]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x41221408u) + (HIDWORD(P) & 0x00000080u)) * 0x01010101u) >> 24]; // A4D1H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E1(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x10101000u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 25]; - n_flipped += COUNT_FLIP_4[LODWORD(P) & 0xff]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x82442810u) + (HIDWORD(P) & 0x00000001u)) * 0x01010101u) >> 24]; // A5E1H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F1(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) >> 4) & 0x02020200u)) * 0x00810204u) >> 25]; - n_flipped += COUNT_FLIP_5[LODWORD(P) & 0xff]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x04885020u) + (HIDWORD(P) & 0x00000102u)) * 0x01010101u) >> 24]; // A6F1H3 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G1(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x40404000u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 25]; - n_flipped += COUNT_FLIP_L[(LODWORD(P) << 1) & 0x7e]; - n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x08102000u) + (HIDWORD(P) & 0x00010204u)) * 0x02020202u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H1(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x80808000u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 25]; - n_flipped += COUNT_FLIP_L[LODWORD(P) & 0x7f]; - n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x10204000u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A2(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x01010000u) + ((HIDWORD(P) & 0x01010101u) << 4)) * 0x01020408u) >> 26]; - n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 9) & 0x7f]; - n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x04020000u) + (HIDWORD(P) & 0x40201008u)) * 0x01010101u) >> 25]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B2(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x02020000u) + ((HIDWORD(P) & 0x02020202u) << 4)) * 0x00810204u) >> 26]; - n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 10) & 0x3f]; - n_flipped += COUNT_FLIP_R[(((LODWORD(P) & 0x08040000u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 26]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C2(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x04040000u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 26]; - n_flipped += COUNT_FLIP_2[(LODWORD(P) >> 8) & 0xff]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x110A0400u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 24]; // A4C2H7 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D2(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((LODWORD(P) & 0x08080000u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 26]; - n_flipped += COUNT_FLIP_3[(LODWORD(P) >> 8) & 0xff]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x22140800u) + (HIDWORD(P) & 0x00008041u)) * 0x01010101u) >> 24]; // A5D2H6 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E2(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x10100000u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 26]; - n_flipped += COUNT_FLIP_4[(LODWORD(P) >> 8) & 0xff]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x44281000u) + (HIDWORD(P) & 0x00000182u)) * 0x01010101u) >> 24]; // A6E2H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F2(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20200000u) >> 4)) * 0x00810204u) >> 26]; - n_flipped += COUNT_FLIP_5[(LODWORD(P) >> 8) & 0xff]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x88502000u) + (HIDWORD(P) & 0x00010204u)) * 0x01010101u) >> 24]; // A7F2H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G2(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x40400000u) >> 4) + (HIDWORD(P) & 0x40404040u)) * 0x00408102u) >> 26]; - n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 7) & 0x7e]; - n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x10200000u) + (HIDWORD(P) & 0x01020408u)) * 0x02020202u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H2(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_R[((((LODWORD(P) & 0x80800000u) >> 4) + (HIDWORD(P) & 0x80808080u)) * 0x00204081u) >> 26]; - n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 8) & 0x7f]; - n_flipped += COUNT_FLIP_L[(((LODWORD(P) & 0x20400000u) + (HIDWORD(P) & 0x02040810u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((LODWORD(P) & 0x02010101u) * 0x01020404u + (HIDWORD(P) & 0x20100804u) * 0x04040404u) >> 24]; // A1A3F8 - n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 17) & 0x7f]; - n_flipped += COUNT_FLIP_5[((LODWORD(P) & 0x01010204u) * 0x20202010u + (HIDWORD(P) & 0x01010101u) * 0x08040201u) >> 24]; // C1A3A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((LODWORD(P) & 0x04020202u) * 0x00810202u + (HIDWORD(P) & 0x40201008u) * 0x02020202u) >> 24]; // B1B3G8 - n_flipped += COUNT_FLIP_R[(LODWORD(P) >> 18) & 0x3f]; - n_flipped += COUNT_FLIP_5[((LODWORD(P) & 0x02020408u) * 0x10101008u + ((HIDWORD(P) & 0x02020202u) >> 1) * 0x08040201u) >> 24]; // D1B3B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 24]; - n_flipped += COUNT_FLIP_2[(LODWORD(P) >> 16) & 0xff]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x02040810u) + (HIDWORD(P) & 0x00000001u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x08040201u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 24]; - n_flipped += COUNT_FLIP_3[(LODWORD(P) >> 16) & 0xff]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x04081020u) + (HIDWORD(P) & 0x00000102u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x10080402u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 24]; - n_flipped += COUNT_FLIP_4[(LODWORD(P) >> 16) & 0xff]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x08102040u) + (HIDWORD(P) & 0x00010204u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x20100804u) + (HIDWORD(P) & 0x00008040u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24]; - n_flipped += COUNT_FLIP_5[(LODWORD(P) >> 16) & 0xff]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x10204080u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x40201008u) + (HIDWORD(P) & 0x00000080u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[(((LODWORD(P) & 0x40402010u) >> 4) * 0x01010102u + (HIDWORD(P) & 0x40404040u) * 0x00408102u) >> 24]; // E1G3G8 - n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 15) & 0x7e]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x20404040u) >> 1) * 0x04020101u + ((HIDWORD(P) & 0x02040810u) >> 1) * 0x01010101u) >> 24]; // G1G3B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[(((LODWORD(P) & 0x80804020u) >> 4) * 0x00808081u + (HIDWORD(P) & 0x80808080u) * 0x00204081u) >> 24]; // F1H3H8 - n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 16) & 0x7f]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x40808080u) >> 2) * 0x04020101u + ((HIDWORD(P) & 0x04081020u) >> 2) * 0x01010101u) >> 24]; // H1H3C8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((LODWORD(P) & 0x01010101u) * 0x01020408u + (HIDWORD(P) & 0x10080402u) * 0x08080808u) >> 24]; // A1A4E8 - n_flipped += COUNT_FLIP_R[LODWORD(P) >> 25]; - n_flipped += COUNT_FLIP_4[((LODWORD(P) & 0x01020408u) * 0x10101010u + (HIDWORD(P) & 0x01010101u) * 0x08040201u) >> 24]; // D1A4A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((LODWORD(P) & 0x02020202u) * 0x00810204u + (HIDWORD(P) & 0x20100804u) * 0x04040404u) >> 24]; // B1B4F8 - n_flipped += COUNT_FLIP_R[LODWORD(P) >> 26]; - n_flipped += COUNT_FLIP_4[((LODWORD(P) & 0x02040810u) * 0x08080808u + ((HIDWORD(P) & 0x02020202u) >> 1) * 0x08040201u) >> 24]; // E1B4B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 24]; - n_flipped += COUNT_FLIP_2[LODWORD(P) >> 24]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x04081020u) + (HIDWORD(P) & 0x00000102u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x04020100u) + (HIDWORD(P) & 0x40201008u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 24]; - n_flipped += COUNT_FLIP_3[LODWORD(P) >> 24]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x08102040u) + (HIDWORD(P) & 0x00010204u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x08040201u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 24]; - n_flipped += COUNT_FLIP_4[LODWORD(P) >> 24]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x10204080u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x10080402u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24]; - n_flipped += COUNT_FLIP_5[LODWORD(P) >> 24]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x20408000u) + (HIDWORD(P) & 0x02040810u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x20100804u) + (HIDWORD(P) & 0x00008040u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[(((LODWORD(P) & 0x40201008u) >> 3) * 0x01010101u + (HIDWORD(P) & 0x40404040u) * 0x00408102u) >> 24]; // D1G4G8 - n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 23) & 0x7e]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x40404040u) >> 2) * 0x08040201u + ((HIDWORD(P) & 0x04081020u) >> 2) * 0x01010101u) >> 24]; // G1G4C8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[(((LODWORD(P) & 0x80402010u) >> 4) * 0x01010101u + (HIDWORD(P) & 0x80808080u) * 0x00204081u) >> 24]; // E1H4H8 - n_flipped += COUNT_FLIP_L[(LODWORD(P) >> 24) & 0x7f]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x80808080u) >> 3) * 0x08040201u + ((HIDWORD(P) & 0x08102040u) >> 3) * 0x01010101u) >> 24]; // H1H4D8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((LODWORD(P) & 0x01010101u) * 0x01020408u + (HIDWORD(P) & 0x08040201u) * 0x10101010u) >> 24]; // A1A5D8 - n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 1) & 0x7f]; - n_flipped += COUNT_FLIP_3[((LODWORD(P) & 0x02040810u) * 0x08080808u + (HIDWORD(P) & 0x01010101u) * 0x08040201u) >> 24]; // E1A5A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((LODWORD(P) & 0x02020202u) * 0x00810204u + (HIDWORD(P) & 0x10080402u) * 0x08080808u) >> 24]; // B1B5E8 - n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 2) & 0x3f]; - n_flipped += COUNT_FLIP_3[((LODWORD(P) & 0x04081020u) * 0x04040404u + ((HIDWORD(P) & 0x02020202u) >> 1) * 0x08040201u) >> 24]; // F1B5B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 24]; - n_flipped += COUNT_FLIP_2[HIDWORD(P) & 0xff]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x08102040u) + (HIDWORD(P) & 0x00010204u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x02010000u) + (HIDWORD(P) & 0x20100804u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 24]; - n_flipped += COUNT_FLIP_3[HIDWORD(P) & 0xff]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x10204080u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x04020100u) + (HIDWORD(P) & 0x40201008u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 24]; - n_flipped += COUNT_FLIP_4[HIDWORD(P) & 0xff]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x20408000u) + (HIDWORD(P) & 0x02040810u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x08040201u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24]; - n_flipped += COUNT_FLIP_5[HIDWORD(P) & 0xff]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x40800000u) + (HIDWORD(P) & 0x04081020u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x10080402u) + (HIDWORD(P) & 0x00804020u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[(((LODWORD(P) & 0x20100804u) >> 2) * 0x01010101u + (HIDWORD(P) & 0x40404040u) * 0x00408102u) >> 24]; // C1G5G8 - n_flipped += COUNT_FLIP_L[(HIDWORD(P) << 1) & 0x7e]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x40404040u) >> 3) * 0x10080402u + ((HIDWORD(P) & 0x08102040u) >> 3) * 0x01010101u) >> 24]; // G1G5D8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[(((LODWORD(P) & 0x40201008u) >> 3) * 0x01010101u + (HIDWORD(P) & 0x80808080u) * 0x00204081u) >> 24]; // D1H5H8 - n_flipped += COUNT_FLIP_L[HIDWORD(P) & 0x7f]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x80808080u) >> 4) * 0x10080402u + ((HIDWORD(P) & 0x10204080u) >> 4) * 0x01010101u) >> 24]; // H1H5E8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((LODWORD(P) & 0x01010101u) * 0x01020408u + (HIDWORD(P) & 0x04020101u) * 0x10202020u) >> 24]; // A1A6C8 - n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 9) & 0x7f]; - n_flipped += COUNT_FLIP_2[((LODWORD(P) & 0x04081020u) * 0x04040404u + (HIDWORD(P) & 0x01010102u) * 0x04040201u) >> 24]; // F1A6A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((LODWORD(P) & 0x02020202u) * 0x00810204u + (HIDWORD(P) & 0x08040202u) * 0x08101010u) >> 24]; // B1B6D8 - n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 10) & 0x3f]; - n_flipped += COUNT_FLIP_2[((LODWORD(P) & 0x08102040u) * 0x02020202u + ((HIDWORD(P) & 0x02020204u) >> 1) * 0x04040201u) >> 24]; // G1B6B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x04040404u) << 4)) * 0x00408102u) >> 24]; - n_flipped += COUNT_FLIP_2[(HIDWORD(P) >> 8) & 0xff]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x10204080u) + (HIDWORD(P) & 0x01020408u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x01000000u) + (HIDWORD(P) & 0x10080402u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[(((LODWORD(P) & 0x08080808u) + ((HIDWORD(P) & 0x08080808u) << 4)) * 0x00204081u) >> 24]; - n_flipped += COUNT_FLIP_3[(HIDWORD(P) >> 8) & 0xff]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x20408000u) + (HIDWORD(P) & 0x02040810u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_3[(((LODWORD(P) & 0x02010000u) + (HIDWORD(P) & 0x20100804u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((((LODWORD(P) & 0x10101010u) >> 4) + (HIDWORD(P) & 0x10101010u)) * 0x01020408u) >> 24]; - n_flipped += COUNT_FLIP_4[(HIDWORD(P) >> 8) & 0xff]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x40800000u) + (HIDWORD(P) & 0x04081020u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_4[(((LODWORD(P) & 0x04020100u) + (HIDWORD(P) & 0x40201008u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[(((HIDWORD(P) & 0x20202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24]; - n_flipped += COUNT_FLIP_5[(HIDWORD(P) >> 8) & 0xff]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x80000000u) + (HIDWORD(P) & 0x08102040u)) * 0x01010101u) >> 24]; - n_flipped += COUNT_FLIP_5[(((LODWORD(P) & 0x08040201u) + (HIDWORD(P) & 0x80402010u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[(((LODWORD(P) & 0x10080402u) >> 1) * 0x01010101u + (HIDWORD(P) & 0x40404020u) * 0x00808102u) >> 24]; // B1G6G8 - n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 7) & 0x7e]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x40404040u) >> 4) * 0x20100804u + ((HIDWORD(P) & 0x10204040u) >> 4 ) * 0x02010101u) >> 24]; // G1G6E8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[(((LODWORD(P) & 0x20100804u) >> 2) * 0x01010101u + (HIDWORD(P) & 0x80808040u) * 0x00404081u) >> 24]; // C1H6H8 - n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 8) & 0x7f]; - n_flipped += COUNT_FLIP_2[(((LODWORD(P) & 0x80808080u) >> 5) * 0x20100804u + ((HIDWORD(P) & 0x20408080u) >> 5) * 0x02010101u) >> 24]; // H1H6F8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((((HIDWORD(P) & 0x00000101u) << 4) + (LODWORD(P) & 0x01010101u)) * 0x02040810u) >> 24]; - n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 17) & 0x7f]; - n_flipped += COUNT_FLIP_R[(((HIDWORD(P) & 0x00000204u) + (LODWORD(P) & 0x08102040u)) * 0x01010101u) >> 25]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((((HIDWORD(P) & 0x00000202u) << 4) + (LODWORD(P) & 0x02020202u)) * 0x01020408u) >> 24]; - n_flipped += COUNT_FLIP_R[(HIDWORD(P) >> 18) & 0x3f]; - n_flipped += COUNT_FLIP_R[(((HIDWORD(P) & 0x00000408u) + (LODWORD(P) & 0x10204080u)) * 0x01010101u) >> 26]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x00000404u) << 4)) * 0x00810204u) >> 24]; - n_flipped += COUNT_FLIP_2[(HIDWORD(P) >> 16) & 0xff]; - n_flipped += COUNT_FLIP_2[(((HIDWORD(P) & 0x00040A11u) + (LODWORD(P) & 0x20408000u)) * 0x01010101u) >> 24]; // A5C7H2 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((((HIDWORD(P) & 0x00000808u) << 4) + (LODWORD(P) & 0x08080808u)) * 0x00408102u) >> 24]; - n_flipped += COUNT_FLIP_3[(HIDWORD(P) >> 16) & 0xff]; - n_flipped += COUNT_FLIP_3[(((HIDWORD(P) & 0x00081422u) + (LODWORD(P) & 0x41800000u)) * 0x01010101u) >> 24]; // A4D7H3 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((HIDWORD(P) & 0x00001010u) + ((LODWORD(P) & 0x10101010u) >> 4)) * 0x02040810u) >> 24]; - n_flipped += COUNT_FLIP_4[(HIDWORD(P) >> 16) & 0xff]; - n_flipped += COUNT_FLIP_4[(((HIDWORD(P) & 0x00102844u) + (LODWORD(P) & 0x82010000u)) * 0x01010101u) >> 24]; // A3E7H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((HIDWORD(P) & 0x00002020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x01020408u) >> 24]; - n_flipped += COUNT_FLIP_5[(HIDWORD(P) >> 16) & 0xff]; - n_flipped += COUNT_FLIP_5[(((HIDWORD(P) & 0x00205088u) + (LODWORD(P) & 0x04020100u)) * 0x01010101u) >> 24]; // A2F7H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((HIDWORD(P) & 0x00004040u) + ((LODWORD(P) & 0x40404040u) >> 4)) * 0x00810204u) >> 24]; - n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 15) & 0x7e]; - n_flipped += COUNT_FLIP_L[(((HIDWORD(P) & 0x00002010u) + (LODWORD(P) & 0x08040201u)) * 0x02020202u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((HIDWORD(P) & 0x00008080u) + ((LODWORD(P) & 0x80808080u) >> 4)) * 0x00408102u) >> 24]; - n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 16) & 0x7f]; - n_flipped += COUNT_FLIP_L[(((HIDWORD(P) & 0x00004020u) + (LODWORD(P) & 0x10080402u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((((HIDWORD(P) & 0x00010101u) << 4) + (LODWORD(P) & 0x01010101u)) * 0x01020408u) >> 24]; - n_flipped += COUNT_FLIP_R[HIDWORD(P) >> 25]; - n_flipped += COUNT_FLIP_R[(((HIDWORD(P) & 0x00020408u) + (LODWORD(P) & 0x10204080u)) * 0x01010101u) >> 25]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((((HIDWORD(P) & 0x00020202u) << 4) + (LODWORD(P) & 0x02020202u)) * 0x00810204u) >> 24]; - n_flipped += COUNT_FLIP_R[HIDWORD(P) >> 26]; - n_flipped += COUNT_FLIP_R[(((HIDWORD(P) & 0x00040810u) + (LODWORD(P) & 0x20408000u)) * 0x01010101u) >> 26]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((LODWORD(P) & 0x04040404u) + ((HIDWORD(P) & 0x00040404u) << 4)) * 0x00408102u) >> 24]; - n_flipped += COUNT_FLIP_2[HIDWORD(P) >> 24]; - n_flipped += COUNT_FLIP_2[(((HIDWORD(P) & 0x040A1120u) + (LODWORD(P) & 0x40800000u)) * 0x01010101u) >> 24]; // A6C8H3 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((((HIDWORD(P) & 0x00080808u) << 4) + (LODWORD(P) & 0x08080808u)) * 0x00204081u) >> 24]; - n_flipped += COUNT_FLIP_3[HIDWORD(P) >> 24]; - n_flipped += COUNT_FLIP_3[(((HIDWORD(P) & 0x08142241u) + (LODWORD(P) & 0x80000000u)) * 0x01010101u) >> 24]; // A5D8H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((HIDWORD(P) & 0x00101010u) + ((LODWORD(P) & 0x10101010u) >> 4)) * 0x01020408u) >> 24]; - n_flipped += COUNT_FLIP_4[HIDWORD(P) >> 24]; - n_flipped += COUNT_FLIP_4[(((HIDWORD(P) & 0x10284482u) + (LODWORD(P) & 0x01000000u)) * 0x01010101u) >> 24]; // A4E8H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((HIDWORD(P) & 0x00202020u) + ((LODWORD(P) & 0x20202020u) >> 4)) * 0x00810204u) >> 24]; - n_flipped += COUNT_FLIP_5[HIDWORD(P) >> 24]; - n_flipped += COUNT_FLIP_5[(((HIDWORD(P) & 0x00508804u) + (LODWORD(P) & 0x02010000u)) * 0x01010101u) >> 24]; // A3F8H6 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((HIDWORD(P) & 0x00404040u) + ((LODWORD(P) & 0x40404040u) >> 4)) * 0x00408102u) >> 24]; - n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 23) & 0x7e]; - n_flipped += COUNT_FLIP_L[(((HIDWORD(P) & 0x00201008u) + (LODWORD(P) & 0x04020100u)) * 0x02020202u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[(((HIDWORD(P) & 0x00808080u) + ((LODWORD(P) & 0x80808080) >> 4)) * 0x00204081u) >> 24]; - n_flipped += COUNT_FLIP_L[(HIDWORD(P) >> 24) & 0x7f]; - n_flipped += COUNT_FLIP_L[(((HIDWORD(P) & 0x00402010u) + (LODWORD(P) & 0x08040201u)) * 0x01010101u) >> 24]; - - return n_flipped; -} - -/** - * Count last flipped discs when plassing. - * - * @param P player's disc pattern (unused). - * @return zero. - */ -static int count_last_flip_pass(const unsigned long long P) -{ - (void) P; // useless code to shut-up compiler warning - return 0; -} - -/** Array of functions to count flipped discs of the last move */ -int (*count_last_flip[])(const unsigned long long) = { - count_last_flip_A1, count_last_flip_B1, count_last_flip_C1, count_last_flip_D1, - count_last_flip_E1, count_last_flip_F1, count_last_flip_G1, count_last_flip_H1, - count_last_flip_A2, count_last_flip_B2, count_last_flip_C2, count_last_flip_D2, - count_last_flip_E2, count_last_flip_F2, count_last_flip_G2, count_last_flip_H2, - count_last_flip_A3, count_last_flip_B3, count_last_flip_C3, count_last_flip_D3, - count_last_flip_E3, count_last_flip_F3, count_last_flip_G3, count_last_flip_H3, - count_last_flip_A4, count_last_flip_B4, count_last_flip_C4, count_last_flip_D4, - count_last_flip_E4, count_last_flip_F4, count_last_flip_G4, count_last_flip_H4, - count_last_flip_A5, count_last_flip_B5, count_last_flip_C5, count_last_flip_D5, - count_last_flip_E5, count_last_flip_F5, count_last_flip_G5, count_last_flip_H5, - count_last_flip_A6, count_last_flip_B6, count_last_flip_C6, count_last_flip_D6, - count_last_flip_E6, count_last_flip_F6, count_last_flip_G6, count_last_flip_H6, - count_last_flip_A7, count_last_flip_B7, count_last_flip_C7, count_last_flip_D7, - count_last_flip_E7, count_last_flip_F7, count_last_flip_G7, count_last_flip_H7, - count_last_flip_A8, count_last_flip_B8, count_last_flip_C8, count_last_flip_D8, - count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8, - count_last_flip_pass, -}; ->>>>>>> e558fdb (Some cleanups for clang / android build) -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/count_last_flip_avx512cd.c b/src/count_last_flip_avx512cd.c index a407b07..54de932 100644 --- a/src/count_last_flip_avx512cd.c +++ b/src/count_last_flip_avx512cd.c @@ -7,15 +7,7 @@ * For optimization purpose, the value returned is twice the number of flipped * disc, to facilitate the computation of disc difference. * -<<<<<<< HEAD -<<<<<<< HEAD * @date 2023 - 2024 -======= - * @date 2023 ->>>>>>> 52949e1 (Add build options and files for new count_last_flips) -======= - * @date 2023 - 2024 ->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut) * @author Toshihiko Okuhara * @version 4.5 * @@ -23,15 +15,7 @@ #include "bit.h" -<<<<<<< HEAD -<<<<<<< HEAD -extern const V8DI lrmask[66]; -======= -extern const V4DI lmask_v4[66], rmask_v4[66]; ->>>>>>> 52949e1 (Add build options and files for new count_last_flips) -======= extern const V8DI lrmask[66]; ->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut) /** * Count last flipped discs when playing on the last empty. @@ -44,8 +28,6 @@ extern const V8DI lrmask[66]; int last_flip(int pos, unsigned long long P) { __m256i PP = _mm256_set1_epi64x(P); -<<<<<<< HEAD -<<<<<<< HEAD __m256i flip, outflank, eraser, rmask, lmask; __m128i flip2; @@ -56,34 +38,6 @@ int last_flip(int pos, unsigned long long P) flip = _mm256_maskz_add_epi64(_mm256_test_epi64_mask(PP, lmask), outflank, _mm256_set1_epi64x(-1)); // flip = _mm256_and_si256(_mm256_andnot_si256(outflank, flip), lmask); flip = _mm256_ternarylogic_epi64(outflank, flip, lmask, 0x08); -<<<<<<< HEAD - - // right: look for player bit with lzcnt - rmask = lrmask[pos].v4[1]; - eraser = _mm256_srlv_epi64(_mm256_set1_epi64x(-1), - _mm256_maskz_lzcnt_epi64(_mm256_test_epi64_mask(PP, rmask), _mm256_and_si256(PP, rmask))); - // flip = _mm256_or_si256(flip, _mm256_andnot_si256(eraser, rmask)); - flip = _mm256_ternarylogic_epi64(flip, eraser, rmask, 0xf2); - - flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1)); - return 2 * bit_count(_mm_cvtsi128_si64(_mm_or_si128(flip2, _mm_unpackhi_epi64(flip2, flip2)))); -======= - __m256i flip, outflank, rmask, lmask; -======= - __m256i flip, outflank, eraser, rmask, lmask; ->>>>>>> ba1be42 (AVX512 last flip with lastflip_highcut) - __m128i flip2; - - // left: look for player LS1B - lmask = lrmask[pos].v4[0]; - outflank = _mm256_and_si256(PP, lmask); - // set below LS1B if P is in lmask - // flip = _mm256_andnot_si256(outflank, _mm256_add_epi64(outflank, _mm256_set1_epi64x(-1))); - // flip = _mm256_maskz_and_epi64(_mm256_test_epi64_mask(PP, lmask), flip, lmask); - flip = _mm256_maskz_ternarylogic_epi64(_mm256_test_epi64_mask(PP, lmask), - outflank, _mm256_add_epi64(outflank, _mm256_set1_epi64x(-1)), lmask, 0x08); -======= ->>>>>>> eb84eb8 (Revise avx512 mask usage to ease ternarylogic opt) // right: look for player bit with lzcnt rmask = lrmask[pos].v4[1]; @@ -93,11 +47,5 @@ int last_flip(int pos, unsigned long long P) flip = _mm256_ternarylogic_epi64(flip, eraser, rmask, 0xf2); flip2 = _mm_or_si128(_mm256_castsi256_si128(flip), _mm256_extracti128_si256(flip, 1)); -<<<<<<< HEAD - flip2 = _mm_or_si128(flip2, _mm_shuffle_epi32(flip2, 0x4e)); - return 2 * bit_count(_mm_cvtsi128_si64(flip2)); ->>>>>>> 52949e1 (Add build options and files for new count_last_flips) -======= return 2 * bit_count(_mm_cvtsi128_si64(_mm_or_si128(flip2, _mm_unpackhi_epi64(flip2, flip2)))); ->>>>>>> eb84eb8 (Revise avx512 mask usage to ease ternarylogic opt) } diff --git a/src/count_last_flip_bitscan.c b/src/count_last_flip_bitscan.c index dca921a..6cae24a 100644 --- a/src/count_last_flip_bitscan.c +++ b/src/count_last_flip_bitscan.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file count_last_flip_bitscan.c * @@ -83,8 +79,6 @@ static const char COUNT_FLIP_5[256] = { 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -<<<<<<< HEAD -<<<<<<< HEAD #include "bit_intrinsics.h" #ifdef lzcnt_u64 @@ -98,46 +92,15 @@ static inline int count_H_flip_left (unsigned long long P, int pos, int mask) { return (lzcnt_u32((P << (8 - pos)) & (mask << 1)) & 0x07) * 2; else return (lzcnt_u32((P >> (pos - 8)) & (mask << 1)) & 0x07) * 2; -======= -#include "bit.h" -======= -#include "bit_intrinsics.h" ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - -#ifdef lzcnt_u64 - -static inline int count_V_flip_reverse (unsigned long long P, int ofs) { - return (lzcnt_u64(P << ofs) & 0x38) >> 2; -} - -static inline int count_H_flip_left (unsigned long long P, int pos, int mask) { -<<<<<<< HEAD - return (_lzcnt_u32((P >> (pos - 8)) & (mask << 1)) & 0x07) * 2; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - if (pos < 8) - return (lzcnt_u32((P << (8 - pos)) & (mask << 1)) & 0x07) * 2; - else - return (lzcnt_u32((P >> (pos - 8)) & (mask << 1)) & 0x07) * 2; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) } #else -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) // with guardian bit to avoid __builtin_clz(0) // Not used static inline int count_V_flip_reverse (unsigned long long P, int ofs) { return ((__builtin_clzll((P << ofs) | 1) + 1) & 0x38) >> 2; } -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static const char COUNT_FLIP_L[128] = { 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -145,54 +108,15 @@ static const char COUNT_FLIP_L[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -<<<<<<< HEAD -<<<<<<< HEAD -static inline int count_H_flip_left (unsigned long long P, int pos, int mask) { - if (pos < 8) - return COUNT_FLIP_L[(P << (7 - pos)) & mask]; - else - return COUNT_FLIP_L[(P >> (pos - 7)) & mask]; -======= -static inline int count_H1_flip_left (unsigned long long P, int pos, int mask) { - return COUNT_FLIP_L[(P << (7 - pos)) & mask]; -} - -static inline int count_H_flip_left (unsigned long long P, int pos, int mask) { - return COUNT_FLIP_L[(P >> (pos - 7)) & mask]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= static inline int count_H_flip_left (unsigned long long P, int pos, int mask) { if (pos < 8) return COUNT_FLIP_L[(P << (7 - pos)) & mask]; else return COUNT_FLIP_L[(P >> (pos - 7)) & mask]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) } #endif -<<<<<<< HEAD -<<<<<<< HEAD -#ifdef tzcnt_u32 - -static inline int count_H_flip_right (unsigned long long P, int pos) { - if (pos >= 56) - return (tzcnt_u32(P >> (pos + 1)) & 0x07) * 2; - else if ((pos >= 24) && (pos < 32)) - return (tzcnt_u32((unsigned int) P >> (pos + 1)) & 0x07) * 2; - else - return (tzcnt_u32((P >> (pos + 1)) & (0x7f >> (pos & 0x07))) & 0x07) * 2; -======= -#if (defined(__BMI__) || defined(__AVX2__)) && !(defined(__GNUC__) && (__GNUC__ < 6)) // GCC Bug 78037 - -static inline int count_H_flip_right (unsigned long long P, int pos, int mask) { - return (_tzcnt_u32((P >> (pos + 1)) & mask) & 0x07) * 2; -} - -static inline int count_H8_flip_right (unsigned long long P, int pos) { - return (_tzcnt_u32(P >> (pos + 1)) & 0x07) * 2; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #ifdef tzcnt_u32 static inline int count_H_flip_right (unsigned long long P, int pos) { @@ -202,7 +126,6 @@ static inline int count_H_flip_right (unsigned long long P, int pos) { return (tzcnt_u32((unsigned int) P >> (pos + 1)) & 0x07) * 2; else return (tzcnt_u32((P >> (pos + 1)) & (0x7f >> (pos & 0x07))) & 0x07) * 2; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) } #else @@ -214,10 +137,6 @@ static const char COUNT_FLIP_R[128] = { 10, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0 }; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static inline int count_H_flip_right (unsigned long long P, int pos) { if (pos >= 56) return COUNT_FLIP_R[P >> (pos + 1)]; @@ -225,37 +144,14 @@ static inline int count_H_flip_right (unsigned long long P, int pos) { return COUNT_FLIP_R[(unsigned int) P >> (pos + 1)]; else return COUNT_FLIP_R[(P >> (pos + 1)) & (0x7f >> (pos & 0x07))]; -<<<<<<< HEAD -======= -static inline int count_H_flip_right (unsigned long long P, int pos, int mask) { - return COUNT_FLIP_R[(P >> (pos + 1)) & mask]; -} - -static inline int count_H8_flip_right (unsigned long long P, int pos) { - return COUNT_FLIP_R[P >> (pos + 1)]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) } #endif -<<<<<<< HEAD -<<<<<<< HEAD -#ifndef lzcnt_u64 - -/** - * Count last flipped discs when playing on square A1/A2. -======= -/** - * Count last flipped discs when playing on square A1. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #ifndef lzcnt_u64 /** * Count last flipped discs when playing on square A1/A2. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -265,33 +161,15 @@ static int count_last_flip_A1(const unsigned long long P) int n_flipped; unsigned long long P_v, P_d9; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) P_v = P & 0x0101010101010100; n_flipped = ((P_v & -P_v) * 0x000020406080a0c0) >> 60; n_flipped += count_H_flip_right(P, 0); P_d9 = P & 0x8040201008040200; n_flipped += (((P_d9 & -P_d9) >> 1) * 0x000010100c080503) >> 60; -<<<<<<< HEAD -======= - P_v = P & 0x0101010101010100ULL; - n_flipped = ((P_v & -P_v) * 0x000020406080a0c0ULL) >> 60; - n_flipped += count_H_flip_right(P, 0, 0x7f); - P_d9 = P & 0x8040201008040200ULL; - n_flipped += (((P_d9 & -P_d9) >> 1) * 0x000010100c080503ULL) >> 60; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_A2(const unsigned long long P) { return count_last_flip_A1(P >> 8); } @@ -304,17 +182,8 @@ static int count_last_flip_A7(const unsigned long long P) { return count_last_flip_A1(vertical_mirror(P) >> 8); } -<<<<<<< HEAD /** * Count last flipped discs when playing on square B1/B2. -======= -/** - * Count last flipped discs when playing on square B1. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -/** - * Count last flipped discs when playing on square B1/B2. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -324,33 +193,15 @@ static int count_last_flip_B1(const unsigned long long P) int n_flipped; unsigned long long P_v, P_d9; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) P_v = P & 0x0202020202020200; n_flipped = ((P_v & -P_v) * 0x0000102030405060) >> 60; n_flipped += count_H_flip_right(P, 1); P_d9 = P & 0x0080402010080400; n_flipped += ((P_d9 & -P_d9) * 0x0000040403020140) >> 60; -<<<<<<< HEAD -======= - P_v = P & 0x0202020202020200ULL; - n_flipped = ((P_v & -P_v) * 0x0000102030405060ULL) >> 60; - n_flipped += count_H_flip_right(P, 1, 0x3f); - P_d9 = P & 0x0080402010080400ULL; - n_flipped += ((P_d9 & -P_d9) * 0x0000040403020140ULL) >> 60; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_B2(const unsigned long long P) { return count_last_flip_B1(P >> 8); } @@ -363,17 +214,8 @@ static int count_last_flip_B7(const unsigned long long P) { return count_last_flip_B1(vertical_mirror(P) >> 8); } -<<<<<<< HEAD -/** - * Count last flipped discs when playing on square C1/C2. -======= -/** - * Count last flipped discs when playing on square C1. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= /** * Count last flipped discs when playing on square C1/C2. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -383,32 +225,14 @@ static int count_last_flip_C1(const unsigned long long P) int n_flipped; unsigned long long P_v; -<<<<<<< HEAD -<<<<<<< HEAD - P_v = P & 0x0404040404040400; - n_flipped = ((P_v & -P_v) * 0x0000081018202830) >> 60; - n_flipped += COUNT_FLIP_2[P & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04) * 0x0101010101010101) >> 56]; // A3C1H6 -======= - P_v = P & 0x0404040404040400ULL; - n_flipped = ((P_v & -P_v) * 0x0000081018202830ULL) >> 60; - n_flipped += COUNT_FLIP_2[P & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04ULL) * 0x0101010101010101ULL) >> 56]; // A3C1H6 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= P_v = P & 0x0404040404040400; n_flipped = ((P_v & -P_v) * 0x0000081018202830) >> 60; n_flipped += COUNT_FLIP_2[P & 0xff]; n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04) * 0x0101010101010101) >> 56]; // A3C1H6 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_C2(const unsigned long long P) { return count_last_flip_C1(P >> 8); } @@ -421,17 +245,8 @@ static int count_last_flip_C7(const unsigned long long P) { return count_last_flip_C1(vertical_mirror(P) >> 8); } -<<<<<<< HEAD -/** - * Count last flipped discs when playing on square D1/D2. -======= -/** - * Count last flipped discs when playing on square D1. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= /** * Count last flipped discs when playing on square D1/D2. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -441,32 +256,14 @@ static int count_last_flip_D1(const unsigned long long P) int n_flipped; unsigned long long P_v; -<<<<<<< HEAD -<<<<<<< HEAD - P_v = P & 0x0808080808080800; - n_flipped = ((P_v & -P_v) * 0x000004080c101418) >> 60; - n_flipped += COUNT_FLIP_3[P & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408) * 0x0101010101010101) >> 56]; // A4D1H5 -======= - P_v = P & 0x0808080808080800ULL; - n_flipped = ((P_v & -P_v) * 0x000004080c101418ULL) >> 60; - n_flipped += COUNT_FLIP_3[P & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408ULL) * 0x0101010101010101ULL) >> 56]; // A4D1H5 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= P_v = P & 0x0808080808080800; n_flipped = ((P_v & -P_v) * 0x000004080c101418) >> 60; n_flipped += COUNT_FLIP_3[P & 0xff]; n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408) * 0x0101010101010101) >> 56]; // A4D1H5 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_D2(const unsigned long long P) { return count_last_flip_D1(P >> 8); } @@ -479,17 +276,8 @@ static int count_last_flip_D7(const unsigned long long P) { return count_last_flip_D1(vertical_mirror(P) >> 8); } -<<<<<<< HEAD -/** - * Count last flipped discs when playing on square E1/E2. -======= -/** - * Count last flipped discs when playing on square E1. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= /** * Count last flipped discs when playing on square E1/E2. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -499,32 +287,14 @@ static int count_last_flip_E1(const unsigned long long P) int n_flipped; unsigned long long P_v; -<<<<<<< HEAD -<<<<<<< HEAD - P_v = P & 0x1010101010101000; - n_flipped = ((P_v & -P_v) * 0x0000020406080a0c) >> 60; - n_flipped += COUNT_FLIP_4[P & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810) * 0x0101010101010101) >> 56]; // A5E1H4 -======= - P_v = P & 0x1010101010101000ULL; - n_flipped = ((P_v & -P_v) * 0x0000020406080a0cULL) >> 60; - n_flipped += COUNT_FLIP_4[P & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810ULL) * 0x0101010101010101ULL) >> 56]; // A5E1H4 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= P_v = P & 0x1010101010101000; n_flipped = ((P_v & -P_v) * 0x0000020406080a0c) >> 60; n_flipped += COUNT_FLIP_4[P & 0xff]; n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810) * 0x0101010101010101) >> 56]; // A5E1H4 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_E2(const unsigned long long P) { return count_last_flip_E1(P >> 8); } @@ -537,17 +307,8 @@ static int count_last_flip_E7(const unsigned long long P) { return count_last_flip_E1(vertical_mirror(P) >> 8); } -<<<<<<< HEAD -/** - * Count last flipped discs when playing on square F1/F2. -======= -/** - * Count last flipped discs when playing on square F1. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= /** * Count last flipped discs when playing on square F1/F2. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -557,32 +318,14 @@ static int count_last_flip_F1(const unsigned long long P) int n_flipped; unsigned long long P_v; -<<<<<<< HEAD -<<<<<<< HEAD - P_v = P & 0x2020202020202000; - n_flipped = ((P_v & -P_v) * 0x0000010203040506) >> 60; - n_flipped += COUNT_FLIP_5[P & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020) * 0x0101010101010101) >> 56]; // A6F1H3 -======= - P_v = P & 0x2020202020202000ULL; - n_flipped = ((P_v & -P_v) * 0x0000010203040506ULL) >> 60; - n_flipped += COUNT_FLIP_5[P & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020ULL) * 0x0101010101010101ULL) >> 56]; // A6F1H3 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= P_v = P & 0x2020202020202000; n_flipped = ((P_v & -P_v) * 0x0000010203040506) >> 60; n_flipped += COUNT_FLIP_5[P & 0xff]; n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020) * 0x0101010101010101) >> 56]; // A6F1H3 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_F2(const unsigned long long P) { return count_last_flip_F1(P >> 8); } @@ -595,17 +338,8 @@ static int count_last_flip_F7(const unsigned long long P) { return count_last_flip_F1(vertical_mirror(P) >> 8); } -<<<<<<< HEAD -/** - * Count last flipped discs when playing on square G1/G2. -======= -/** - * Count last flipped discs when playing on square G1. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= /** * Count last flipped discs when playing on square G1/G2. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -615,33 +349,15 @@ static int count_last_flip_G1(const unsigned long long P) int n_flipped; unsigned long long P_v, P_d7; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) P_v = P & 0x4040404040404000; n_flipped = ((P_v & -P_v) * 0x0000008101820283) >> 60; n_flipped += count_H_flip_left(P, 6, 0x7e); P_d7 = P & 0x0001020408102000; n_flipped += ((P_d7 & -P_d7) * 0x000002081840a000) >> 60; -<<<<<<< HEAD -======= - P_v = P & 0x4040404040404000ULL; - n_flipped = ((P_v & -P_v) * 0x0000008101820283ULL) >> 60; - n_flipped += count_H1_flip_left(P, 6, 0x7e); - P_d7 = P & 0x0001020408102000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x000002081840a000ULL) >> 60; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_G2(const unsigned long long P) { return count_last_flip_G1(P >> 8); } @@ -654,17 +370,8 @@ static int count_last_flip_G7(const unsigned long long P) { return count_last_flip_G1(vertical_mirror(P) >> 8); } -<<<<<<< HEAD -/** - * Count last flipped discs when playing on square H1/H2. -======= -/** - * Count last flipped discs when playing on square H1. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= /** * Count last flipped discs when playing on square H1/H2. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -674,31 +381,15 @@ static int count_last_flip_H1(const unsigned long long P) int n_flipped; unsigned long long P_v, P_d7; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) P_v = P & 0x8080808080808000; n_flipped = (((P_v & -P_v) >> 1) * 0x0000008101820283) >> 60; n_flipped += count_H_flip_left(P, 7, 0x7f); P_d7 = P & 0x0102040810204000; n_flipped += ((P_d7 & -P_d7) * 0x000001040c2050c0) >> 60; -<<<<<<< HEAD -======= - P_v = P & 0x8080808080808000ULL; - n_flipped = (((P_v & -P_v) >> 1) * 0x0000008101820283ULL) >> 60; - n_flipped += count_H1_flip_left(P, 7, 0x7f); - P_d7 = P & 0x0102040810204000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x000001040c2050c0ULL) >> 60; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD static int count_last_flip_H2(const unsigned long long P) { return count_last_flip_H1(P >> 8); } @@ -712,690 +403,258 @@ static int count_last_flip_H7(const unsigned long long P) { } #endif // no lzcnt_u64 -======= + /** - * Count last flipped discs when playing on square A2. + * Count last flipped discs when playing on square C3. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_A2(const unsigned long long P) +static int count_last_flip_C3(const unsigned long long P) { int n_flipped; - unsigned long long P_v, P_d9; - P_v = P & 0x0101010101010000ULL; - n_flipped = ((P_v & -P_v) * 0x00000020406080a0ULL) >> 60; - n_flipped += count_H_flip_right(P, 8, 0x7f); - P_d9 = P & 0x4020100804020000ULL; - n_flipped += (((P_d9 & -P_d9) >> 1) * 0x00000010100c0805ULL) >> 60; + n_flipped = COUNT_FLIP_2[((P & 0x0404040404040404) * 0x0040810204081020) >> 56]; + n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff]; + n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810) * 0x0101010101010101) >> 56]; + n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201) * 0x0101010101010101) >> 56]; return n_flipped; -======= -static int count_last_flip_H2(const unsigned long long P) { - return count_last_flip_H1(P >> 8); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -} - -static int count_last_flip_H8(const unsigned long long P) { - return count_last_flip_H1(vertical_mirror(P)); -} - -static int count_last_flip_H7(const unsigned long long P) { - return count_last_flip_H1(vertical_mirror(P) >> 8); } -<<<<<<< HEAD /** - * Count last flipped discs when playing on square D2. + * Count last flipped discs when playing on square D3. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_D2(const unsigned long long P) +static int count_last_flip_D3(const unsigned long long P) { int n_flipped; - unsigned long long P_v; - P_v = P & 0x0808080808080000ULL; - n_flipped = ((P_v & -P_v) * 0x00000004080c1014ULL) >> 60; - n_flipped += COUNT_FLIP_3[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000804122140800ULL) * 0x0101010101010101ULL) >> 56]; // A5D2H6 + n_flipped = COUNT_FLIP_2[((P & 0x0808080808080808) * 0x0020408102040810) >> 56]; + n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff]; + n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020) * 0x0101010101010101) >> 56]; + n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402) * 0x0101010101010101) >> 56]; return n_flipped; } /** - * Count last flipped discs when playing on square E2. + * Count last flipped discs when playing on square E3. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_E2(const unsigned long long P) +static int count_last_flip_E3(const unsigned long long P) { int n_flipped; - unsigned long long P_v; - P_v = P & 0x1010101010100000ULL; - n_flipped = ((P_v & -P_v) * 0x000000020406080aULL) >> 60; - n_flipped += COUNT_FLIP_4[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0000018244281000ULL) * 0x0101010101010101ULL) >> 56]; // A6E2H5 + n_flipped = COUNT_FLIP_2[((P & 0x1010101010101010) * 0x0010204081020408) >> 56]; + n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff]; + n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040) * 0x0101010101010101) >> 56]; + n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804) * 0x0101010101010101) >> 56]; return n_flipped; } /** - * Count last flipped discs when playing on square F2. + * Count last flipped discs when playing on square F3. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_F2(const unsigned long long P) +static int count_last_flip_F3(const unsigned long long P) { int n_flipped; - unsigned long long P_v; - P_v = P & 0x2020202020200000ULL; - n_flipped = ((P_v & -P_v) * 0x0000000102030405ULL) >> 60; - n_flipped += COUNT_FLIP_5[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0001020488502000ULL) * 0x0101010101010101ULL) >> 56]; // A7F2H4 + n_flipped = COUNT_FLIP_2[((P & 0x2020202020202020) * 0x0008102040810204) >> 56]; + n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff]; + n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080) * 0x0101010101010101) >> 56]; + n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008) * 0x0101010101010101) >> 56]; return n_flipped; } /** - * Count last flipped discs when playing on square G2. + * Count last flipped discs when playing on square A4. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_G2(const unsigned long long P) +static int count_last_flip_A4(const unsigned long long P) { int n_flipped; - unsigned long long P_v, P_d7; - P_v = P & 0x4040404040400000ULL; - n_flipped = (((P_v & -P_v) >> 1) * 0x0000000102030405ULL) >> 60; - n_flipped += count_H_flip_left(P, 14, 0x7e); - P_d7 = P & 0x0102040810200000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x00000002081840a0ULL) >> 60; + n_flipped = COUNT_FLIP_3[((P & 0x1008040201010101) * 0x0102040808080808) >> 56]; // A1A4E8 + n_flipped += count_H_flip_right(P, 24); + n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408) * 0x1010101008040201) >> 56]; // D1A4A8 return n_flipped; } /** - * Count last flipped discs when playing on square H2. + * Count last flipped discs when playing on square B4. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_H2(const unsigned long long P) +static int count_last_flip_B4(const unsigned long long P) { int n_flipped; - unsigned long long P_v, P_d7; - P_v = P & 0x8080808080800000ULL; - n_flipped = (((P_v & -P_v) >> 2) * 0x0000000102030405ULL) >> 60; - n_flipped += count_H_flip_left(P, 15, 0x7f); - P_d7 = P & 0x0204081020400000ULL; - n_flipped += (((P_d7 & -P_d7) >> 2) * 0x0000000410308143ULL) >> 60; + n_flipped = COUNT_FLIP_3[((P & 0x2010080402020202) * 0x0081020404040404) >> 56]; // B1B4F8 + n_flipped += count_H_flip_right(P, 25); + n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810) >> 1) * 0x1010101008040201) >> 56]; // E1B4B8 return n_flipped; } /** - * Count last flipped discs when playing on square A3. + * Count last flipped discs when playing on square C4. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_A3(const unsigned long long P) +static int count_last_flip_C4(const unsigned long long P) { int n_flipped; - n_flipped = COUNT_FLIP_2[((P & 0x2010080402010101ULL) * 0x0102040404040404ULL) >> 56]; // A1A3F8 - n_flipped += count_H_flip_right(P, 16, 0x7f); - n_flipped += COUNT_FLIP_5[((P & 0x0101010101010204ULL) * 0x2020201008040201ULL) >> 56]; // C1A3A8 + n_flipped = COUNT_FLIP_3[((P & 0x0404040404040404) * 0x0040810204081020) >> 56]; + n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff]; + n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020) * 0x0101010101010101) >> 56]; + n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100) * 0x0101010101010101) >> 56]; return n_flipped; } /** - * Count last flipped discs when playing on square B3. + * Count last flipped discs when playing on square D4. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_B3(const unsigned long long P) +static int count_last_flip_D4(const unsigned long long P) { int n_flipped; - n_flipped = COUNT_FLIP_2[((P & 0x4020100804020202ULL) * 0x0081020202020202ULL) >> 56]; // B1B3G8 - n_flipped += count_H_flip_right(P, 17, 0x3f); - n_flipped += COUNT_FLIP_5[(((P & 0x0202020202020408ULL) >> 1) * 0x2020201008040201ULL) >> 56]; // D1B3B8 + n_flipped = COUNT_FLIP_3[((P & 0x0808080808080808) * 0x0020408102040810) >> 56]; + n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff]; + n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040) * 0x0101010101010101) >> 56]; + n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201) * 0x0101010101010101) >> 56]; return n_flipped; } ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -#endif // no lzcnt_u64 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) /** - * Count last flipped discs when playing on square C3. + * Count last flipped discs when playing on square E4. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_C3(const unsigned long long P) +static int count_last_flip_E4(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_2[((P & 0x0404040404040404) * 0x0040810204081020) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201) * 0x0101010101010101) >> 56]; -======= - n_flipped = COUNT_FLIP_2[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_2[((P & 0x0404040404040404) * 0x0040810204081020) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201) * 0x0101010101010101) >> 56]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) + n_flipped = COUNT_FLIP_3[((P & 0x1010101010101010) * 0x0010204081020408) >> 56]; + n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff]; + n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080) * 0x0101010101010101) >> 56]; + n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402) * 0x0101010101010101) >> 56]; return n_flipped; } /** - * Count last flipped discs when playing on square D3. + * Count last flipped discs when playing on square F4. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_D3(const unsigned long long P) +static int count_last_flip_F4(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_2[((P & 0x0808080808080808) * 0x0020408102040810) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402) * 0x0101010101010101) >> 56]; -======= - n_flipped = COUNT_FLIP_2[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_2[((P & 0x0808080808080808) * 0x0020408102040810) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402) * 0x0101010101010101) >> 56]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) + n_flipped = COUNT_FLIP_3[((P & 0x2020202020202020) * 0x0008102040810204) >> 56]; + n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff]; + n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000) * 0x0101010101010101) >> 56]; + n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804) * 0x0101010101010101) >> 56]; return n_flipped; } /** - * Count last flipped discs when playing on square E3. + * Count last flipped discs when playing on square G4. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_E3(const unsigned long long P) +static int count_last_flip_G4(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_2[((P & 0x1010101010101010) * 0x0010204081020408) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804) * 0x0101010101010101) >> 56]; -======= - n_flipped = COUNT_FLIP_2[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_2[((P & 0x1010101010101010) * 0x0010204081020408) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804) * 0x0101010101010101) >> 56]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) + n_flipped = COUNT_FLIP_3[((P & 0x4040404040201008) * 0x0020202020408102) >> 56]; // D1G4G8 + n_flipped += count_H_flip_left(P, 30, 0x7e); + n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040) >> 2) * 0x0804020101010101) >> 56]; // G1G4C8 return n_flipped; } /** - * Count last flipped discs when playing on square F3. + * Count last flipped discs when playing on square H4. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_F3(const unsigned long long P) +static int count_last_flip_H4(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_2[((P & 0x2020202020202020) * 0x0008102040810204) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008) * 0x0101010101010101) >> 56]; -======= - n_flipped = COUNT_FLIP_2[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008ULL) * 0x0101010101010101ULL) >> 56]; + n_flipped = COUNT_FLIP_3[((P & 0x8080808080402010) * 0x0010101010204081) >> 56]; // E1H4H8 + n_flipped += count_H_flip_left(P, 31, 0x7f); + n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080) >> 3) * 0x0804020101010101) >> 56]; // H1H4D8 return n_flipped; } /** - * Count last flipped discs when playing on square G3. + * Count last flipped discs when playing on square A5. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_G3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x4040404040402010ULL) * 0x0010101020408102ULL) >> 56]; // E1G3G8 - n_flipped += count_H_flip_left(P, 22, 0x7e); - n_flipped += COUNT_FLIP_5[(((P & 0x0204081020404040ULL) >> 1) * 0x0402010101010101ULL) >> 56]; // G1G3B8 - - return n_flipped; +static int count_last_flip_A5(const unsigned long long P) { + return count_last_flip_A4(vertical_mirror(P)); } /** - * Count last flipped discs when playing on square H3. + * Count last flipped discs when playing on square B5. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_H3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x8080808080804020ULL) * 0x0008080810204081ULL) >> 56]; // F1H3H8 - n_flipped += count_H_flip_left(P, 23, 0x7f); - n_flipped += COUNT_FLIP_5[(((P & 0x0408102040808080ULL) >> 2) * 0x0402010101010101ULL) >> 56]; // H1H3C8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_2[((P & 0x2020202020202020) * 0x0008102040810204) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008) * 0x0101010101010101) >> 56]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; +static int count_last_flip_B5(const unsigned long long P) { + return count_last_flip_B4(vertical_mirror(P)); } /** - * Count last flipped discs when playing on square A4. + * Count last flipped discs when playing on square C5. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_A4(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_3[((P & 0x1008040201010101) * 0x0102040808080808) >> 56]; // A1A4E8 - n_flipped += count_H_flip_right(P, 24); - n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408) * 0x1010101008040201) >> 56]; // D1A4A8 -======= - n_flipped = COUNT_FLIP_3[((P & 0x1008040201010101ULL) * 0x0102040808080808ULL) >> 56]; // A1A4E8 - n_flipped += count_H_flip_right(P, 24, 0x7f); - n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408ULL) * 0x1010101008040201ULL) >> 56]; // D1A4A8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_3[((P & 0x1008040201010101) * 0x0102040808080808) >> 56]; // A1A4E8 - n_flipped += count_H_flip_right(P, 24); - n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408) * 0x1010101008040201) >> 56]; // D1A4A8 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; +static int count_last_flip_C5(const unsigned long long P) { + return count_last_flip_C4(vertical_mirror(P)); } /** - * Count last flipped discs when playing on square B4. + * Count last flipped discs when playing on square D5. * * @param P player's disc pattern. * @return flipped disc count. */ -static int count_last_flip_B4(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_3[((P & 0x2010080402020202) * 0x0081020404040404) >> 56]; // B1B4F8 - n_flipped += count_H_flip_right(P, 25); - n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810) >> 1) * 0x1010101008040201) >> 56]; // E1B4B8 -======= - n_flipped = COUNT_FLIP_3[((P & 0x2010080402020202ULL) * 0x0081020404040404ULL) >> 56]; // B1B4F8 - n_flipped += count_H_flip_right(P, 25, 0x3f); - n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810ULL) >> 1) * 0x1010101008040201ULL) >> 56]; // E1B4B8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_3[((P & 0x2010080402020202) * 0x0081020404040404) >> 56]; // B1B4F8 - n_flipped += count_H_flip_right(P, 25); - n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810) >> 1) * 0x1010101008040201) >> 56]; // E1B4B8 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C4(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_3[((P & 0x0404040404040404) * 0x0040810204081020) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100) * 0x0101010101010101) >> 56]; -======= - n_flipped = COUNT_FLIP_3[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_3[((P & 0x0404040404040404) * 0x0040810204081020) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100) * 0x0101010101010101) >> 56]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D4(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_3[((P & 0x0808080808080808) * 0x0020408102040810) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201) * 0x0101010101010101) >> 56]; -======= - n_flipped = COUNT_FLIP_3[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_3[((P & 0x0808080808080808) * 0x0020408102040810) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201) * 0x0101010101010101) >> 56]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E4(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_3[((P & 0x1010101010101010) * 0x0010204081020408) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402) * 0x0101010101010101) >> 56]; -======= - n_flipped = COUNT_FLIP_3[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_3[((P & 0x1010101010101010) * 0x0010204081020408) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402) * 0x0101010101010101) >> 56]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F4(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_3[((P & 0x2020202020202020) * 0x0008102040810204) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804) * 0x0101010101010101) >> 56]; -======= - n_flipped = COUNT_FLIP_3[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_3[((P & 0x2020202020202020) * 0x0008102040810204) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000) * 0x0101010101010101) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804) * 0x0101010101010101) >> 56]; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G4(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_3[((P & 0x4040404040201008) * 0x0020202020408102) >> 56]; // D1G4G8 - n_flipped += count_H_flip_left(P, 30, 0x7e); - n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040) >> 2) * 0x0804020101010101) >> 56]; // G1G4C8 -======= - n_flipped = COUNT_FLIP_3[((P & 0x4040404040201008ULL) * 0x0020202020408102ULL) >> 56]; // D1G4G8 - n_flipped += count_H_flip_left(P, 30, 0x7e); - n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040ULL) >> 2) * 0x0804020101010101ULL) >> 56]; // G1G4C8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_3[((P & 0x4040404040201008) * 0x0020202020408102) >> 56]; // D1G4G8 - n_flipped += count_H_flip_left(P, 30, 0x7e); - n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040) >> 2) * 0x0804020101010101) >> 56]; // G1G4C8 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H4(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP_3[((P & 0x8080808080402010) * 0x0010101010204081) >> 56]; // E1H4H8 - n_flipped += count_H_flip_left(P, 31, 0x7f); - n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080) >> 3) * 0x0804020101010101) >> 56]; // H1H4D8 -======= - n_flipped = COUNT_FLIP_3[((P & 0x8080808080402010ULL) * 0x0010101010204081ULL) >> 56]; // E1H4H8 - n_flipped += count_H_flip_left(P, 31, 0x7f); - n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080ULL) >> 3) * 0x0804020101010101ULL) >> 56]; // H1H4D8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped = COUNT_FLIP_3[((P & 0x8080808080402010) * 0x0010101010204081) >> 56]; // E1H4H8 - n_flipped += count_H_flip_left(P, 31, 0x7f); - n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080) >> 3) * 0x0804020101010101) >> 56]; // H1H4D8 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_A5(const unsigned long long P) { - return count_last_flip_A4(vertical_mirror(P)); -======= -static int count_last_flip_A5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0804020101010101ULL) * 0x0102040810101010ULL) >> 56]; // A1A5D8 - n_flipped += count_H_flip_right(P, 32, 0x7f); - n_flipped += COUNT_FLIP_3[((P & 0x0101010102040810ULL) * 0x0808080808040201ULL) >> 56]; // E1A5A8 - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_A5(const unsigned long long P) { - return count_last_flip_A4(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -} - -/** - * Count last flipped discs when playing on square B5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_B5(const unsigned long long P) { - return count_last_flip_B4(vertical_mirror(P)); -======= -static int count_last_flip_B5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x1008040202020202ULL) * 0x0081020408080808ULL) >> 56]; // B1B5E8 - n_flipped += count_H_flip_right(P, 33, 0x3f); - n_flipped += COUNT_FLIP_3[(((P & 0x0202020204081020ULL) >> 1) * 0x0808080808040201ULL) >> 56]; // F1B5B8 - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_B5(const unsigned long long P) { - return count_last_flip_B4(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -} - -/** - * Count last flipped discs when playing on square C5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_C5(const unsigned long long P) { - return count_last_flip_C4(vertical_mirror(P)); -======= -static int count_last_flip_C5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_C5(const unsigned long long P) { - return count_last_flip_C4(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -} - -/** - * Count last flipped discs when playing on square D5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_D5(const unsigned long long P) { - return count_last_flip_D4(vertical_mirror(P)); -======= -static int count_last_flip_D5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_D5(const unsigned long long P) { - return count_last_flip_D4(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -} +static int count_last_flip_D5(const unsigned long long P) { + return count_last_flip_D4(vertical_mirror(P)); +} /** * Count last flipped discs when playing on square E5. @@ -1403,26 +662,8 @@ static int count_last_flip_D5(const unsigned long long P) { * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_E5(const unsigned long long P) { - return count_last_flip_E4(vertical_mirror(P)); -======= -static int count_last_flip_E5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= static int count_last_flip_E5(const unsigned long long P) { return count_last_flip_E4(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) } /** @@ -1431,26 +672,8 @@ static int count_last_flip_E5(const unsigned long long P) { * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_F5(const unsigned long long P) { - return count_last_flip_F4(vertical_mirror(P)); -======= -static int count_last_flip_F5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= static int count_last_flip_F5(const unsigned long long P) { return count_last_flip_F4(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) } /** @@ -1459,25 +682,8 @@ static int count_last_flip_F5(const unsigned long long P) { * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD static int count_last_flip_G5(const unsigned long long P) { return count_last_flip_G4(vertical_mirror(P)); -======= -static int count_last_flip_G5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x4040404020100804ULL) * 0x0040404040408102ULL) >> 56]; // C1G5G8 - n_flipped += count_H_flip_left(P, 38, 0x7e); - n_flipped += COUNT_FLIP_3[(((P & 0x0810204040404040ULL) >> 3) * 0x1008040201010101ULL) >> 56]; // G1G5D8 - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_G5(const unsigned long long P) { - return count_last_flip_G4(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) } /** @@ -1486,37 +692,12 @@ static int count_last_flip_G5(const unsigned long long P) { * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD static int count_last_flip_H5(const unsigned long long P) { return count_last_flip_H4(vertical_mirror(P)); } /** * Count last flipped discs when playing on square A3/A6. -======= -static int count_last_flip_H5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x8080808040201008ULL) * 0x0020202020204081ULL) >> 56]; // D1H5H8 - n_flipped += count_H_flip_left(P, 39, 0x7f); - n_flipped += COUNT_FLIP_3[(((P & 0x1020408080808080ULL) >> 4) * 0x1008040201010101ULL) >> 56]; // H1H5E8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A6. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_H5(const unsigned long long P) { - return count_last_flip_H4(vertical_mirror(P)); -} - -/** - * Count last flipped discs when playing on square A3/A6. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -1525,10 +706,6 @@ static int count_last_flip_A6(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) #ifdef __ARM_FEATURE_CLZ // shorter on arm n_flipped = count_V_flip_reverse((P & 0x0000000101010101), 31); n_flipped += count_V_flip_reverse((P & 0x0000000204081020), 24); @@ -1538,37 +715,16 @@ static int count_last_flip_A6(const unsigned long long P) n_flipped += COUNT_FLIP_2[((P & 0x0101010204081020) * 0x0404040404040201) >> 56]; // F1A6A8 #endif n_flipped += count_H_flip_right(P, 40); -<<<<<<< HEAD -======= - n_flipped = COUNT_FLIP_5[((P & 0x0402010101010101ULL) * 0x0102040810202020ULL) >> 56]; // A1A6C8 - n_flipped += count_H_flip_right(P, 40, 0x7f); - n_flipped += COUNT_FLIP_2[((P & 0x0101010204081020ULL) * 0x0404040404040201ULL) >> 56]; // F1A6A8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_A3(const unsigned long long P) { return count_last_flip_A6(vertical_mirror(P)); } -<<<<<<< HEAD /** * Count last flipped discs when playing on square B3/B6. -======= -/** - * Count last flipped discs when playing on square B6. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -/** - * Count last flipped discs when playing on square B3/B6. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. @@ -1577,10 +733,6 @@ static int count_last_flip_B6(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) #ifdef __ARM_FEATURE_CLZ n_flipped = count_V_flip_reverse((P & 0x0000000202020202), 30); n_flipped += count_V_flip_reverse((P & 0x0000000408102040), 23); @@ -1590,492 +742,123 @@ static int count_last_flip_B6(const unsigned long long P) n_flipped += COUNT_FLIP_2[(((P & 0x0202020408102040) >> 1) * 0x0404040404040201) >> 56]; // G1B6B8 #endif n_flipped += count_H_flip_right(P, 41); -<<<<<<< HEAD -======= - n_flipped = COUNT_FLIP_5[((P & 0x0804020202020202ULL) * 0x0081020408101010ULL) >> 56]; // B1B6D8 - n_flipped += count_H_flip_right(P, 41, 0x3f); - n_flipped += COUNT_FLIP_2[(((P & 0x0202020408102040ULL) >> 1) * 0x0404040404040201ULL) >> 56]; // G1B6B8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -static int count_last_flip_B3(const unsigned long long P) { - return count_last_flip_B6(vertical_mirror(P)); -} - -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -/** - * Count last flipped discs when playing on square C6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_C6(const unsigned long long P) { - return count_last_flip_C3(vertical_mirror(P)); -======= -static int count_last_flip_C6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x1008040201000000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_C6(const unsigned long long P) { - return count_last_flip_C3(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -} - -/** - * Count last flipped discs when playing on square D6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_D6(const unsigned long long P) { - return count_last_flip_D3(vertical_mirror(P)); -======= -static int count_last_flip_D6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_D6(const unsigned long long P) { - return count_last_flip_D3(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -} - -/** - * Count last flipped discs when playing on square E6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_E6(const unsigned long long P) { - return count_last_flip_E3(vertical_mirror(P)); -======= -static int count_last_flip_E6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_E6(const unsigned long long P) { - return count_last_flip_E3(vertical_mirror(P)); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -} - -/** - * Count last flipped discs when playing on square F6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -static int count_last_flip_F6(const unsigned long long P) { - return count_last_flip_F3(vertical_mirror(P)); -} - -/** - * Count last flipped discs when playing on square G3/G6. -======= -static int count_last_flip_F6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0810204080000000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; return n_flipped; } -/** - * Count last flipped discs when playing on square G6. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -static int count_last_flip_F6(const unsigned long long P) { - return count_last_flip_F3(vertical_mirror(P)); -} - -/** - * Count last flipped discs when playing on square G3/G6. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G6(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -#ifdef __ARM_FEATURE_CLZ - n_flipped = count_V_flip_reverse((P & 0x0000004040404040), 23); - n_flipped += count_V_flip_reverse((P & 0x0000002010080402), 24); - n_flipped += (((P >> 62) & ~(P >> 54) & 1) + ((P >> 60) & ~(P >> 53) & 1)) * 2; -#else - n_flipped = COUNT_FLIP_5[((P & 0x4040402010080402) * 0x0080808080808102) >> 56]; // B1G6G8 - n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040) >> 4) * 0x2010080402010101) >> 56]; // G1G6E8 -#endif -<<<<<<< HEAD - n_flipped += count_H_flip_left(P, 46, 0x7e); -======= - n_flipped = COUNT_FLIP_5[((P & 0x4040402010080402ULL) * 0x0080808080808102ULL) >> 56]; // B1G6G8 - n_flipped += count_H_flip_left(P, 46, 0x7e); - n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040ULL) >> 4) * 0x2010080402010101ULL) >> 56]; // G1G6E8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped += count_H_flip_left(P, 46, 0x7e); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -static int count_last_flip_G3(const unsigned long long P) { - return count_last_flip_G6(vertical_mirror(P)); -} - -<<<<<<< HEAD -/** - * Count last flipped discs when playing on square H3/H6. -======= -/** - * Count last flipped discs when playing on square H6. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -/** - * Count last flipped discs when playing on square H3/H6. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H6(const unsigned long long P) -{ - int n_flipped; - -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -#ifdef __ARM_FEATURE_CLZ - n_flipped = count_V_flip_reverse((P & 0x0000008080808080), 24); - n_flipped += count_V_flip_reverse((P & 0x0000004020100804), 25); - n_flipped += (((P >> 63) & ~(P >> 55) & 1) + ((P >> 61) & ~(P >> 54) & 1)) * 2; -#else - n_flipped = COUNT_FLIP_5[((P & 0x8080804020100804) * 0x0040404040404081) >> 56]; // C1H6H8 - n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080) >> 5) * 0x2010080402010101) >> 56]; // H1H6F8 -#endif -<<<<<<< HEAD - n_flipped += count_H_flip_left(P, 47, 0x7f); -======= - n_flipped = COUNT_FLIP_5[((P & 0x8080804020100804ULL) * 0x0040404040404081ULL) >> 56]; // C1H6H8 - n_flipped += count_H_flip_left(P, 47, 0x7f); - n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080ULL) >> 5) * 0x2010080402010101ULL) >> 56]; // H1H6F8 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flipped += count_H_flip_left(P, 47, 0x7f); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - - return n_flipped; -} - -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) -static int count_last_flip_H3(const unsigned long long P) { - return count_last_flip_H6(vertical_mirror(P)); -} - -#ifdef lzcnt_u64 - -<<<<<<< HEAD -/** - * Count last flipped discs when playing on square A7/A8. -======= -/** - * Count last flipped discs when playing on square A7. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -/** - * Count last flipped discs when playing on square A7/A8. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_A7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000010101010101ULL), 23); - n_flipped += count_H_flip_right(P, 48, 0x7f); - n_flipped += count_V_flip_reverse((P & 0x0000020408102040ULL), 16); - - return n_flipped; -} -#else -static int count_last_flip_A7(const unsigned long long P) { - return count_last_flip_A2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square B7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_B7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000020202020202ULL), 22); - n_flipped += count_H_flip_right(P, 49, 0x3f); - n_flipped += count_V_flip_reverse((P & 0x0000040810204080ULL), 15); - - return n_flipped; -} -#else -static int count_last_flip_B7(const unsigned long long P) { - return count_last_flip_B2(vertical_mirror(P)); +static int count_last_flip_B3(const unsigned long long P) { + return count_last_flip_B6(vertical_mirror(P)); } -#endif /** - * Count last flipped discs when playing on square C7. + * Count last flipped discs when playing on square C6. * * @param P player's disc pattern. * @return flipped disc count. */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_C7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000040404040404ULL), 21); - n_flipped += COUNT_FLIP_2[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x00040A1120408000ULL) * 0x0101010101010101ULL) >> 56]; // A5C7H2 - - return n_flipped; -} -#else -static int count_last_flip_C7(const unsigned long long P) { - return count_last_flip_C2(vertical_mirror(P)); +static int count_last_flip_C6(const unsigned long long P) { + return count_last_flip_C3(vertical_mirror(P)); } -#endif /** - * Count last flipped discs when playing on square D7. + * Count last flipped discs when playing on square D6. * * @param P player's disc pattern. * @return flipped disc count. */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_D7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000080808080808ULL), 20); - n_flipped += COUNT_FLIP_3[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0008142241800000ULL) * 0x0101010101010101ULL) >> 56]; // A4D7H3 - - return n_flipped; -} -#else -static int count_last_flip_D7(const unsigned long long P) { - return count_last_flip_D2(vertical_mirror(P)); +static int count_last_flip_D6(const unsigned long long P) { + return count_last_flip_D3(vertical_mirror(P)); } -#endif /** - * Count last flipped discs when playing on square E7. + * Count last flipped discs when playing on square E6. * * @param P player's disc pattern. * @return flipped disc count. */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_E7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000101010101010ULL), 19); - n_flipped += COUNT_FLIP_4[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0010284482010000ULL) * 0x0101010101010101ULL) >> 56]; // A3E7H4 - - return n_flipped; -} -#else -static int count_last_flip_E7(const unsigned long long P) { - return count_last_flip_E2(vertical_mirror(P)); +static int count_last_flip_E6(const unsigned long long P) { + return count_last_flip_E3(vertical_mirror(P)); } -#endif /** - * Count last flipped discs when playing on square F7. + * Count last flipped discs when playing on square F6. * * @param P player's disc pattern. * @return flipped disc count. */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_F7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000202020202020ULL), 18); - n_flipped += COUNT_FLIP_5[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0020508804020100ULL) * 0x0101010101010101ULL) >> 56]; // A2F7H5 - - return n_flipped; -} -#else -static int count_last_flip_F7(const unsigned long long P) { - return count_last_flip_F2(vertical_mirror(P)); +static int count_last_flip_F6(const unsigned long long P) { + return count_last_flip_F3(vertical_mirror(P)); } -#endif /** - * Count last flipped discs when playing on square G7. + * Count last flipped discs when playing on square G3/G6. * * @param P player's disc pattern. * @return flipped disc count. */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_G7(const unsigned long long P) +static int count_last_flip_G6(const unsigned long long P) { int n_flipped; - n_flipped = count_V_flip_reverse((P & 0x0000404040404040ULL), 17); - n_flipped += count_H_flip_left(P, 54, 0x7e); - n_flipped += count_V_flip_reverse((P & 0x0000201008040201ULL), 18); +#ifdef __ARM_FEATURE_CLZ + n_flipped = count_V_flip_reverse((P & 0x0000004040404040), 23); + n_flipped += count_V_flip_reverse((P & 0x0000002010080402), 24); + n_flipped += (((P >> 62) & ~(P >> 54) & 1) + ((P >> 60) & ~(P >> 53) & 1)) * 2; +#else + n_flipped = COUNT_FLIP_5[((P & 0x4040402010080402) * 0x0080808080808102) >> 56]; // B1G6G8 + n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040) >> 4) * 0x2010080402010101) >> 56]; // G1G6E8 +#endif + n_flipped += count_H_flip_left(P, 46, 0x7e); return n_flipped; } -#else -static int count_last_flip_G7(const unsigned long long P) { - return count_last_flip_G2(vertical_mirror(P)); + +static int count_last_flip_G3(const unsigned long long P) { + return count_last_flip_G6(vertical_mirror(P)); } -#endif /** - * Count last flipped discs when playing on square H7. + * Count last flipped discs when playing on square H3/H6. * * @param P player's disc pattern. * @return flipped disc count. */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_H7(const unsigned long long P) +static int count_last_flip_H6(const unsigned long long P) { int n_flipped; - n_flipped = count_V_flip_reverse((P & 0x0000808080808080ULL), 16); - n_flipped += count_H_flip_left(P, 55, 0x7f); - n_flipped += count_V_flip_reverse((P & 0x0000402010080402ULL), 17); +#ifdef __ARM_FEATURE_CLZ + n_flipped = count_V_flip_reverse((P & 0x0000008080808080), 24); + n_flipped += count_V_flip_reverse((P & 0x0000004020100804), 25); + n_flipped += (((P >> 63) & ~(P >> 55) & 1) + ((P >> 61) & ~(P >> 54) & 1)) * 2; +#else + n_flipped = COUNT_FLIP_5[((P & 0x8080804020100804) * 0x0040404040404081) >> 56]; // C1H6H8 + n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080) >> 5) * 0x2010080402010101) >> 56]; // H1H6F8 +#endif + n_flipped += count_H_flip_left(P, 47, 0x7f); return n_flipped; } -#else -static int count_last_flip_H7(const unsigned long long P) { - return count_last_flip_H2(vertical_mirror(P)); + +static int count_last_flip_H3(const unsigned long long P) { + return count_last_flip_H6(vertical_mirror(P)); } -#endif + +#ifdef lzcnt_u64 /** - * Count last flipped discs when playing on square A8. + * Count last flipped discs when playing on square A7/A8. * * @param P player's disc pattern. * @return flipped disc count. */ -#if defined(__LZCNT__) || defined(__AVX2__) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_A8(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = count_V_flip_reverse((P & 0x0101010101010101), 15); - n_flipped += count_H_flip_right(P, 56); - n_flipped += count_V_flip_reverse((P & 0x0002040810204080), 8); - - return n_flipped; -} - -static int count_last_flip_A7(const unsigned long long P) { - return count_last_flip_A8(P << 8); -} - -static int count_last_flip_A1(const unsigned long long P) { - return count_last_flip_A8(vertical_mirror(P)); -} - -static int count_last_flip_A2(const unsigned long long P) { - return count_last_flip_A8(vertical_mirror(P) << 8); -} - -/** - * Count last flipped discs when playing on square B7/B8. -======= - n_flipped = count_V_flip_reverse((P & 0x0001010101010101ULL), 15); - n_flipped += count_H8_flip_right(P, 56); - n_flipped += count_V_flip_reverse((P & 0x0002040810204080ULL), 8); -======= n_flipped = count_V_flip_reverse((P & 0x0101010101010101), 15); n_flipped += count_H_flip_right(P, 56); n_flipped += count_V_flip_reverse((P & 0x0002040810204080), 8); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } @@ -2093,59 +876,18 @@ static int count_last_flip_A2(const unsigned long long P) { } /** -<<<<<<< HEAD - * Count last flipped discs when playing on square B8. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= * Count last flipped discs when playing on square B7/B8. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if defined(__LZCNT__) || defined(__AVX2__) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_B8(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = count_V_flip_reverse((P & 0x0202020202020202), 14); - n_flipped += count_H_flip_right(P, 57); - n_flipped += count_V_flip_reverse((P & 0x0004081020408000), 7); - - return n_flipped; -} - -static int count_last_flip_B7(const unsigned long long P) { - return count_last_flip_B8(P << 8); -} - -static int count_last_flip_B1(const unsigned long long P) { - return count_last_flip_B8(vertical_mirror(P)); -} - -static int count_last_flip_B2(const unsigned long long P) { - return count_last_flip_B8(vertical_mirror(P) << 8); -} - -/** - * Count last flipped discs when playing on square C7/C8. -======= - n_flipped = count_V_flip_reverse((P & 0x0002020202020202ULL), 14); - n_flipped += count_H8_flip_right(P, 57); - n_flipped += count_V_flip_reverse((P & 0x0004081020408000ULL), 7); -======= n_flipped = count_V_flip_reverse((P & 0x0202020202020202), 14); n_flipped += count_H_flip_right(P, 57); n_flipped += count_V_flip_reverse((P & 0x0004081020408000), 7); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) return n_flipped; } @@ -2163,55 +905,16 @@ static int count_last_flip_B2(const unsigned long long P) { } /** -<<<<<<< HEAD - * Count last flipped discs when playing on square C8. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= * Count last flipped discs when playing on square C7/C8. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if defined(__LZCNT__) || defined(__AVX2__) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_C8(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = count_V_flip_reverse((P & 0x0404040404040404), 13); - n_flipped += COUNT_FLIP_2[P >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x040A112040800000) * 0x0101010101010101) >> 56]; // A6C8H3 - - return n_flipped; -} - -static int count_last_flip_C7(const unsigned long long P) { - return count_last_flip_C8(P << 8); -} - -static int count_last_flip_C1(const unsigned long long P) { - return count_last_flip_C8(vertical_mirror(P)); -} - -static int count_last_flip_C2(const unsigned long long P) { - return count_last_flip_C8(vertical_mirror(P) << 8); -} - -/** - * Count last flipped discs when playing on square D7/D8. -======= - n_flipped = count_V_flip_reverse((P & 0x0004040404040404ULL), 13); -======= n_flipped = count_V_flip_reverse((P & 0x0404040404040404), 13); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_2[P >> 56]; n_flipped += COUNT_FLIP_2[((P & 0x040A112040800000) * 0x0101010101010101) >> 56]; // A6C8H3 @@ -2231,55 +934,16 @@ static int count_last_flip_C2(const unsigned long long P) { } /** -<<<<<<< HEAD - * Count last flipped discs when playing on square D8. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= * Count last flipped discs when playing on square D7/D8. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if defined(__LZCNT__) || defined(__AVX2__) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_D8(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = count_V_flip_reverse((P & 0x0808080808080808), 12); - n_flipped += COUNT_FLIP_3[P >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x0814224180000000) * 0x0101010101010101) >> 56]; // A5D8H4 - - return n_flipped; -} - -static int count_last_flip_D7(const unsigned long long P) { - return count_last_flip_D8(P << 8); -} - -static int count_last_flip_D1(const unsigned long long P) { - return count_last_flip_D8(vertical_mirror(P)); -} - -static int count_last_flip_D2(const unsigned long long P) { - return count_last_flip_D8(vertical_mirror(P) << 8); -} - -/** - * Count last flipped discs when playing on square E7/E8. -======= - n_flipped = count_V_flip_reverse((P & 0x0008080808080808ULL), 12); -======= n_flipped = count_V_flip_reverse((P & 0x0808080808080808), 12); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_3[P >> 56]; n_flipped += COUNT_FLIP_3[((P & 0x0814224180000000) * 0x0101010101010101) >> 56]; // A5D8H4 @@ -2299,55 +963,16 @@ static int count_last_flip_D2(const unsigned long long P) { } /** -<<<<<<< HEAD - * Count last flipped discs when playing on square E8. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= * Count last flipped discs when playing on square E7/E8. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if defined(__LZCNT__) || defined(__AVX2__) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_E8(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = count_V_flip_reverse((P & 0x1010101010101010), 11); - n_flipped += COUNT_FLIP_4[P >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x1028448201000000) * 0x0101010101010101) >> 56]; // A4E8H5 - - return n_flipped; -} - -static int count_last_flip_E7(const unsigned long long P) { - return count_last_flip_E8(P << 8); -} - -static int count_last_flip_E1(const unsigned long long P) { - return count_last_flip_E8(vertical_mirror(P)); -} - -static int count_last_flip_E2(const unsigned long long P) { - return count_last_flip_E8(vertical_mirror(P) << 8); -} - -/** - * Count last flipped discs when playing on square F7/F8. -======= - n_flipped = count_V_flip_reverse((P & 0x0010101010101010ULL), 11); -======= n_flipped = count_V_flip_reverse((P & 0x1010101010101010), 11); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_4[P >> 56]; n_flipped += COUNT_FLIP_4[((P & 0x1028448201000000) * 0x0101010101010101) >> 56]; // A4E8H5 @@ -2367,55 +992,16 @@ static int count_last_flip_E2(const unsigned long long P) { } /** -<<<<<<< HEAD - * Count last flipped discs when playing on square F8. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= * Count last flipped discs when playing on square F7/F8. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if defined(__LZCNT__) || defined(__AVX2__) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_F8(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = count_V_flip_reverse((P & 0x2020202020202020), 10); - n_flipped += COUNT_FLIP_5[P >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0050880402010000) * 0x0101010101010101) >> 56]; // A3F8H6 - - return n_flipped; -} - -static int count_last_flip_F7(const unsigned long long P) { - return count_last_flip_F8(P << 8); -} - -static int count_last_flip_F1(const unsigned long long P) { - return count_last_flip_F8(vertical_mirror(P)); -} - -static int count_last_flip_F2(const unsigned long long P) { - return count_last_flip_F8(vertical_mirror(P) << 8); -} - -/** - * Count last flipped discs when playing on square G7/G8. -======= - n_flipped = count_V_flip_reverse((P & 0x0020202020202020ULL), 10); -======= n_flipped = count_V_flip_reverse((P & 0x2020202020202020), 10); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += COUNT_FLIP_5[P >> 56]; n_flipped += COUNT_FLIP_5[((P & 0x0050880402010000) * 0x0101010101010101) >> 56]; // A3F8H6 @@ -2435,55 +1021,16 @@ static int count_last_flip_F2(const unsigned long long P) { } /** -<<<<<<< HEAD - * Count last flipped discs when playing on square G8. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= * Count last flipped discs when playing on square G7/G8. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if defined(__LZCNT__) || defined(__AVX2__) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_G8(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = count_V_flip_reverse((P & 0x4040404040404040), 9); - n_flipped += count_H_flip_left(P, 62, 0x7e); - n_flipped += count_V_flip_reverse((P & 0x0020100804020100), 10); - - return n_flipped; -} - -static int count_last_flip_G7(const unsigned long long P) { - return count_last_flip_G8(P << 8); -} - -static int count_last_flip_G1(const unsigned long long P) { - return count_last_flip_G8(vertical_mirror(P)); -} - -static int count_last_flip_G2(const unsigned long long P) { - return count_last_flip_G8(vertical_mirror(P) << 8); -} - -/** - * Count last flipped discs when playing on square H7/H8. -======= - n_flipped = count_V_flip_reverse((P & 0x0040404040404040ULL), 9); -======= n_flipped = count_V_flip_reverse((P & 0x4040404040404040), 9); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += count_H_flip_left(P, 62, 0x7e); n_flipped += count_V_flip_reverse((P & 0x0020100804020100), 10); @@ -2503,54 +1050,16 @@ static int count_last_flip_G2(const unsigned long long P) { } /** -<<<<<<< HEAD - * Count last flipped discs when playing on square H8. ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= * Count last flipped discs when playing on square H7/H8. ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) * * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -======= -#if defined(__LZCNT__) || defined(__AVX2__) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) static int count_last_flip_H8(const unsigned long long P) { int n_flipped; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = count_V_flip_reverse((P & 0x8080808080808080), 8); - n_flipped += count_H_flip_left(P, 63, 0x7f); - n_flipped += count_V_flip_reverse((P & 0x0040201008040201), 9); - - return n_flipped; -} - -static int count_last_flip_H7(const unsigned long long P) { - return count_last_flip_H8(P << 8); -} - -static int count_last_flip_H1(const unsigned long long P) { - return count_last_flip_H8(vertical_mirror(P)); -} - -static int count_last_flip_H2(const unsigned long long P) { - return count_last_flip_H8(vertical_mirror(P) << 8); -} - -#endif // lzcnt_u64 -======= - n_flipped = count_V_flip_reverse((P & 0x0080808080808080ULL), 8); -======= n_flipped = count_V_flip_reverse((P & 0x8080808080808080), 8); ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) n_flipped += count_H_flip_left(P, 63, 0x7f); n_flipped += count_V_flip_reverse((P & 0x0040201008040201), 9); @@ -2560,10 +1069,6 @@ static int count_last_flip_H2(const unsigned long long P) { static int count_last_flip_H7(const unsigned long long P) { return count_last_flip_H8(P << 8); } -<<<<<<< HEAD -#endif ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= static int count_last_flip_H1(const unsigned long long P) { return count_last_flip_H8(vertical_mirror(P)); @@ -2574,7 +1079,6 @@ static int count_last_flip_H2(const unsigned long long P) { } #endif // lzcnt_u64 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) /** * Count last flipped discs when plassing. @@ -2608,1442 +1112,3 @@ int (*count_last_flip[])(const unsigned long long) = { count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8, count_last_flip_pass, }; -<<<<<<< HEAD -<<<<<<< HEAD -======= -/** - * @file count_last_flip_bitscan.c - * - * - * A function is provided to count the number of fipped disc of the last move - * for each square of the board. These functions are gathered into an array of - * functions, so that a fast access to each function is allowed. The generic - * form of the function take as input the player bitboard and return twice - * the number of flipped disc of the last move. - * - * The basic principle is to read into an array a precomputed result. Doing - * this is easy for a single line ; as we can use arrays of the form: - * - COUNT_FLIP[square where we play][8-bits disc pattern]. - * The problem is thus to convert any line of a 64-bits disc pattern into an - * 8-bits disc pattern. A fast way to do this is to select the right line, - * with a bit-mask, to gather the masked-bits into a continuous set by a simple - * multiplication and to right-shift the result to scale it into a number - * between 0 and 255. - * Once we get our 8-bits disc patterns, we directly get the number of - * flipped discs from the precomputed array, and add them from each flipping - * lines. - * For optimization purpose, the value returned is twice the number of flipped - * disc, to facilitate the computation of disc difference. - * - * With 135 degree merge, instead of Valery ClaudePierre's modification. - * - * For top to bottom flip, LS1B isolation (http://chessprogramming.wikispaces.com/ - * General+Setwise+Operations) is used to get the outflank bit. - * - * @date 1998 - 2018 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 - * - */ - -/** precomputed count flip array */ -static const char COUNT_FLIP_2[256] = { - 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 8, 10, 8, 8, 8, 10, 8, 8, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0 -}; - -static const char COUNT_FLIP_3[256] = { - 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 6, 10, 8, 8, 6, 6, 6, 6, 6, 10, 8, 8, 6, 6, 6, 6, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0 -}; - -static const char COUNT_FLIP_4[256] = { - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const char COUNT_FLIP_5[256] = { - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -#include "bit.h" - -#if defined(__LZCNT__) || defined(__AVX2__) -static inline int count_V_flip_reverse (unsigned long long P, int ofs) { - return (_lzcnt_u64(P << ofs) >> 2) & 0x0E; -} -#elif defined(_MSC_VER) // Not used -static inline int count_V_flip_reverse (unsigned long long P, int ofs) { - unsigned long i; - return (((_BitScanReverse64(&i, (P << ofs)) ? (int) i : 127) ^ 63) >> 2) & 0x0E; -} -#else -// with guardian bit to avoid __builtin_clz(0) // Not used -static inline int count_V_flip_reverse (unsigned long long P, int ofs) { - return ((__builtin_clzll((P << ofs) | 1) + 1) >> 2) & 0x0E; -} -#endif - -#if defined(__LZCNT__) || defined(__AVX2__) - -static inline int count_H1_flip_left (unsigned long long P, int pos, int mask) { - return (_lzcnt_u32((P << (8 - pos)) & (mask << 1)) & 0x07) * 2; -} - -static inline int count_H_flip_left (unsigned long long P, int pos, int mask) { - return (_lzcnt_u32((P >> (pos - 8)) & (mask << 1)) & 0x07) * 2; -} - -#else - -static const char COUNT_FLIP_L[128] = { - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static inline int count_H1_flip_left (unsigned long long P, int pos, int mask) { - return COUNT_FLIP_L[(P << (7 - pos)) & mask]; -} - -static inline int count_H_flip_left (unsigned long long P, int pos, int mask) { - return COUNT_FLIP_L[(P >> (pos - 7)) & mask]; -} - -#endif - -#if (defined(__BMI__) || defined(__AVX2__)) && !(defined(__GNUC__) && (__GNUC__ < 6)) // GCC Bug 78037 - -static inline int count_H_flip_right (unsigned long long P, int pos, int mask) { - return (_tzcnt_u32((P >> (pos + 1)) & mask) & 0x07) * 2; -} - -static inline int count_H8_flip_right (unsigned long long P, int pos) { - return (_tzcnt_u32(P >> (pos + 1)) & 0x07) * 2; -} - -#else - -static const char COUNT_FLIP_R[128] = { - 0, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 10, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 12, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 10, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0 -}; - -static inline int count_H_flip_right (unsigned long long P, int pos, int mask) { - return COUNT_FLIP_R[(P >> (pos + 1)) & mask]; -} - -static inline int count_H8_flip_right (unsigned long long P, int pos) { - return COUNT_FLIP_R[P >> (pos + 1)]; -} - -#endif - -/** - * Count last flipped discs when playing on square A1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d9; - - P_v = P & 0x0101010101010100ULL; - n_flipped = ((P_v & -P_v) * 0x000020406080a0c0ULL) >> 60; - n_flipped += count_H_flip_right(P, 0, 0x7f); - P_d9 = P & 0x8040201008040200ULL; - n_flipped += (((P_d9 & -P_d9) >> 1) * 0x000010100c080503ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d9; - - P_v = P & 0x0202020202020200ULL; - n_flipped = ((P_v & -P_v) * 0x0000102030405060ULL) >> 60; - n_flipped += count_H_flip_right(P, 1, 0x3f); - P_d9 = P & 0x0080402010080400ULL; - n_flipped += ((P_d9 & -P_d9) * 0x0000040403020140ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x0404040404040400ULL; - n_flipped = ((P_v & -P_v) * 0x0000081018202830ULL) >> 60; - n_flipped += COUNT_FLIP_2[P & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04ULL) * 0x0101010101010101ULL) >> 56]; // A3C1H6 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x0808080808080800ULL; - n_flipped = ((P_v & -P_v) * 0x000004080c101418ULL) >> 60; - n_flipped += COUNT_FLIP_3[P & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408ULL) * 0x0101010101010101ULL) >> 56]; // A4D1H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x1010101010101000ULL; - n_flipped = ((P_v & -P_v) * 0x0000020406080a0cULL) >> 60; - n_flipped += COUNT_FLIP_4[P & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810ULL) * 0x0101010101010101ULL) >> 56]; // A5E1H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x2020202020202000ULL; - n_flipped = ((P_v & -P_v) * 0x0000010203040506ULL) >> 60; - n_flipped += COUNT_FLIP_5[P & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020ULL) * 0x0101010101010101ULL) >> 56]; // A6F1H3 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d7; - - P_v = P & 0x4040404040404000ULL; - n_flipped = ((P_v & -P_v) * 0x0000008101820283ULL) >> 60; - n_flipped += count_H1_flip_left(P, 6, 0x7e); - P_d7 = P & 0x0001020408102000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x000002081840a000ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d7; - - P_v = P & 0x8080808080808000ULL; - n_flipped = (((P_v & -P_v) >> 1) * 0x0000008101820283ULL) >> 60; - n_flipped += count_H1_flip_left(P, 7, 0x7f); - P_d7 = P & 0x0102040810204000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x000001040c2050c0ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d9; - - P_v = P & 0x0101010101010000ULL; - n_flipped = ((P_v & -P_v) * 0x00000020406080a0ULL) >> 60; - n_flipped += count_H_flip_right(P, 8, 0x7f); - P_d9 = P & 0x4020100804020000ULL; - n_flipped += (((P_d9 & -P_d9) >> 1) * 0x00000010100c0805ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d9; - - P_v = P & 0x0202020202020000ULL; - n_flipped = ((P_v & -P_v) * 0x0000001020304050ULL) >> 60; - n_flipped += count_H_flip_right(P, 9, 0x3f); - P_d9 = P & 0x8040201008040000ULL; - n_flipped += (((P_d9 & -P_d9) >> 2) * 0x00000010100c0805ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x0404040404040000ULL; - n_flipped = ((P_v & -P_v) * 0x0000000810182028ULL) >> 60; - n_flipped += COUNT_FLIP_2[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x00804020110A0400ULL) * 0x0101010101010101ULL) >> 56]; // A4C2H7 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x0808080808080000ULL; - n_flipped = ((P_v & -P_v) * 0x00000004080c1014ULL) >> 60; - n_flipped += COUNT_FLIP_3[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000804122140800ULL) * 0x0101010101010101ULL) >> 56]; // A5D2H6 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x1010101010100000ULL; - n_flipped = ((P_v & -P_v) * 0x000000020406080aULL) >> 60; - n_flipped += COUNT_FLIP_4[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0000018244281000ULL) * 0x0101010101010101ULL) >> 56]; // A6E2H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x2020202020200000ULL; - n_flipped = ((P_v & -P_v) * 0x0000000102030405ULL) >> 60; - n_flipped += COUNT_FLIP_5[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0001020488502000ULL) * 0x0101010101010101ULL) >> 56]; // A7F2H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d7; - - P_v = P & 0x4040404040400000ULL; - n_flipped = (((P_v & -P_v) >> 1) * 0x0000000102030405ULL) >> 60; - n_flipped += count_H_flip_left(P, 14, 0x7e); - P_d7 = P & 0x0102040810200000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x00000002081840a0ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d7; - - P_v = P & 0x8080808080800000ULL; - n_flipped = (((P_v & -P_v) >> 2) * 0x0000000102030405ULL) >> 60; - n_flipped += count_H_flip_left(P, 15, 0x7f); - P_d7 = P & 0x0204081020400000ULL; - n_flipped += (((P_d7 & -P_d7) >> 2) * 0x0000000410308143ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x2010080402010101ULL) * 0x0102040404040404ULL) >> 56]; // A1A3F8 - n_flipped += count_H_flip_right(P, 16, 0x7f); - n_flipped += COUNT_FLIP_5[((P & 0x0101010101010204ULL) * 0x2020201008040201ULL) >> 56]; // C1A3A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x4020100804020202ULL) * 0x0081020202020202ULL) >> 56]; // B1B3G8 - n_flipped += count_H_flip_right(P, 17, 0x3f); - n_flipped += COUNT_FLIP_5[(((P & 0x0202020202020408ULL) >> 1) * 0x2020201008040201ULL) >> 56]; // D1B3B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x4040404040402010ULL) * 0x0010101020408102ULL) >> 56]; // E1G3G8 - n_flipped += count_H_flip_left(P, 22, 0x7e); - n_flipped += COUNT_FLIP_5[(((P & 0x0204081020404040ULL) >> 1) * 0x0402010101010101ULL) >> 56]; // G1G3B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x8080808080804020ULL) * 0x0008080810204081ULL) >> 56]; // F1H3H8 - n_flipped += count_H_flip_left(P, 23, 0x7f); - n_flipped += COUNT_FLIP_5[(((P & 0x0408102040808080ULL) >> 2) * 0x0402010101010101ULL) >> 56]; // H1H3C8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x1008040201010101ULL) * 0x0102040808080808ULL) >> 56]; // A1A4E8 - n_flipped += count_H_flip_right(P, 24, 0x7f); - n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408ULL) * 0x1010101008040201ULL) >> 56]; // D1A4A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x2010080402020202ULL) * 0x0081020404040404ULL) >> 56]; // B1B4F8 - n_flipped += count_H_flip_right(P, 25, 0x3f); - n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810ULL) >> 1) * 0x1010101008040201ULL) >> 56]; // E1B4B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x4040404040201008ULL) * 0x0020202020408102ULL) >> 56]; // D1G4G8 - n_flipped += count_H_flip_left(P, 30, 0x7e); - n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040ULL) >> 2) * 0x0804020101010101ULL) >> 56]; // G1G4C8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x8080808080402010ULL) * 0x0010101010204081ULL) >> 56]; // E1H4H8 - n_flipped += count_H_flip_left(P, 31, 0x7f); - n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080ULL) >> 3) * 0x0804020101010101ULL) >> 56]; // H1H4D8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0804020101010101ULL) * 0x0102040810101010ULL) >> 56]; // A1A5D8 - n_flipped += count_H_flip_right(P, 32, 0x7f); - n_flipped += COUNT_FLIP_3[((P & 0x0101010102040810ULL) * 0x0808080808040201ULL) >> 56]; // E1A5A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x1008040202020202ULL) * 0x0081020408080808ULL) >> 56]; // B1B5E8 - n_flipped += count_H_flip_right(P, 33, 0x3f); - n_flipped += COUNT_FLIP_3[(((P & 0x0202020204081020ULL) >> 1) * 0x0808080808040201ULL) >> 56]; // F1B5B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x4040404020100804ULL) * 0x0040404040408102ULL) >> 56]; // C1G5G8 - n_flipped += count_H_flip_left(P, 38, 0x7e); - n_flipped += COUNT_FLIP_3[(((P & 0x0810204040404040ULL) >> 3) * 0x1008040201010101ULL) >> 56]; // G1G5D8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x8080808040201008ULL) * 0x0020202020204081ULL) >> 56]; // D1H5H8 - n_flipped += count_H_flip_left(P, 39, 0x7f); - n_flipped += COUNT_FLIP_3[(((P & 0x1020408080808080ULL) >> 4) * 0x1008040201010101ULL) >> 56]; // H1H5E8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0402010101010101ULL) * 0x0102040810202020ULL) >> 56]; // A1A6C8 - n_flipped += count_H_flip_right(P, 40, 0x7f); - n_flipped += COUNT_FLIP_2[((P & 0x0101010204081020ULL) * 0x0404040404040201ULL) >> 56]; // F1A6A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0804020202020202ULL) * 0x0081020408101010ULL) >> 56]; // B1B6D8 - n_flipped += count_H_flip_right(P, 41, 0x3f); - n_flipped += COUNT_FLIP_2[(((P & 0x0202020408102040ULL) >> 1) * 0x0404040404040201ULL) >> 56]; // G1B6B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x1008040201000000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0810204080000000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x4040402010080402ULL) * 0x0080808080808102ULL) >> 56]; // B1G6G8 - n_flipped += count_H_flip_left(P, 46, 0x7e); - n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040ULL) >> 4) * 0x2010080402010101ULL) >> 56]; // G1G6E8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x8080804020100804ULL) * 0x0040404040404081ULL) >> 56]; // C1H6H8 - n_flipped += count_H_flip_left(P, 47, 0x7f); - n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080ULL) >> 5) * 0x2010080402010101ULL) >> 56]; // H1H6F8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_A7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000010101010101ULL), 23); - n_flipped += count_H_flip_right(P, 48, 0x7f); - n_flipped += count_V_flip_reverse((P & 0x0000020408102040ULL), 16); - - return n_flipped; -} -#else -static int count_last_flip_A7(const unsigned long long P) { - return count_last_flip_A2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square B7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_B7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000020202020202ULL), 22); - n_flipped += count_H_flip_right(P, 49, 0x3f); - n_flipped += count_V_flip_reverse((P & 0x0000040810204080ULL), 15); - - return n_flipped; -} -#else -static int count_last_flip_B7(const unsigned long long P) { - return count_last_flip_B2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square C7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_C7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000040404040404ULL), 21); - n_flipped += COUNT_FLIP_2[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x00040A1120408000ULL) * 0x0101010101010101ULL) >> 56]; // A5C7H2 - - return n_flipped; -} -#else -static int count_last_flip_C7(const unsigned long long P) { - return count_last_flip_C2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square D7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_D7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000080808080808ULL), 20); - n_flipped += COUNT_FLIP_3[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0008142241800000ULL) * 0x0101010101010101ULL) >> 56]; // A4D7H3 - - return n_flipped; -} -#else -static int count_last_flip_D7(const unsigned long long P) { - return count_last_flip_D2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square E7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_E7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000101010101010ULL), 19); - n_flipped += COUNT_FLIP_4[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0010284482010000ULL) * 0x0101010101010101ULL) >> 56]; // A3E7H4 - - return n_flipped; -} -#else -static int count_last_flip_E7(const unsigned long long P) { - return count_last_flip_E2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square F7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_F7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000202020202020ULL), 18); - n_flipped += COUNT_FLIP_5[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0020508804020100ULL) * 0x0101010101010101ULL) >> 56]; // A2F7H5 - - return n_flipped; -} -#else -static int count_last_flip_F7(const unsigned long long P) { - return count_last_flip_F2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square G7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_G7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000404040404040ULL), 17); - n_flipped += count_H_flip_left(P, 54, 0x7e); - n_flipped += count_V_flip_reverse((P & 0x0000201008040201ULL), 18); - - return n_flipped; -} -#else -static int count_last_flip_G7(const unsigned long long P) { - return count_last_flip_G2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square H7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_H7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0000808080808080ULL), 16); - n_flipped += count_H_flip_left(P, 55, 0x7f); - n_flipped += count_V_flip_reverse((P & 0x0000402010080402ULL), 17); - - return n_flipped; -} -#else -static int count_last_flip_H7(const unsigned long long P) { - return count_last_flip_H2(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square A8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_A8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0001010101010101ULL), 15); - n_flipped += count_H8_flip_right(P, 56); - n_flipped += count_V_flip_reverse((P & 0x0002040810204080ULL), 8); - - return n_flipped; -} -#else -static int count_last_flip_A8(const unsigned long long P) { - return count_last_flip_A1(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square B8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_B8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0002020202020202ULL), 14); - n_flipped += count_H8_flip_right(P, 57); - n_flipped += count_V_flip_reverse((P & 0x0004081020408000ULL), 7); - - return n_flipped; -} -#else -static int count_last_flip_B8(const unsigned long long P) { - return count_last_flip_B1(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square C8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_C8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0004040404040404ULL), 13); - n_flipped += COUNT_FLIP_2[P >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x040A112040800000ULL) * 0x0101010101010101ULL) >> 56]; // A6C8H3 - - return n_flipped; -} -#else -static int count_last_flip_C8(const unsigned long long P) { - return count_last_flip_C1(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square D8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_D8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0008080808080808ULL), 12); - n_flipped += COUNT_FLIP_3[P >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x0814224180000000ULL) * 0x0101010101010101ULL) >> 56]; // A5D8H4 - - return n_flipped; -} -#else -static int count_last_flip_D8(const unsigned long long P) { - return count_last_flip_D1(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square E8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_E8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0010101010101010ULL), 11); - n_flipped += COUNT_FLIP_4[P >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x1028448201000000ULL) * 0x0101010101010101ULL) >> 56]; // A4E8H5 - - return n_flipped; -} -#else -static int count_last_flip_E8(const unsigned long long P) { - return count_last_flip_E1(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square F8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_F8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0020202020202020ULL), 10); - n_flipped += COUNT_FLIP_5[P >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0050880402010000ULL) * 0x0101010101010101ULL) >> 56]; // A3F8H6 - - return n_flipped; -} -#else -static int count_last_flip_F8(const unsigned long long P) { - return count_last_flip_F1(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square G8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_G8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0040404040404040ULL), 9); - n_flipped += count_H_flip_left(P, 62, 0x7e); - n_flipped += count_V_flip_reverse((P & 0x0020100804020100ULL), 10); - - return n_flipped; -} -#else -static int count_last_flip_G8(const unsigned long long P) { - return count_last_flip_G1(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when playing on square H8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -#if defined(__LZCNT__) || defined(__AVX2__) -static int count_last_flip_H8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = count_V_flip_reverse((P & 0x0080808080808080ULL), 8); - n_flipped += count_H_flip_left(P, 63, 0x7f); - n_flipped += count_V_flip_reverse((P & 0x0040201008040201ULL), 9); - - return n_flipped; -} -#else -static int count_last_flip_H8(const unsigned long long P) { - return count_last_flip_H1(vertical_mirror(P)); -} -#endif - -/** - * Count last flipped discs when plassing. - * - * @param P player's disc pattern (unused). - * @return zero. - */ -static int count_last_flip_pass(const unsigned long long P) -{ - (void) P; // useless code to shut-up compiler warning - return 0; -} - -/** Array of functions to count flipped discs of the last move */ -int (*count_last_flip[])(const unsigned long long) = { - count_last_flip_A1, count_last_flip_B1, count_last_flip_C1, count_last_flip_D1, - count_last_flip_E1, count_last_flip_F1, count_last_flip_G1, count_last_flip_H1, - count_last_flip_A2, count_last_flip_B2, count_last_flip_C2, count_last_flip_D2, - count_last_flip_E2, count_last_flip_F2, count_last_flip_G2, count_last_flip_H2, - count_last_flip_A3, count_last_flip_B3, count_last_flip_C3, count_last_flip_D3, - count_last_flip_E3, count_last_flip_F3, count_last_flip_G3, count_last_flip_H3, - count_last_flip_A4, count_last_flip_B4, count_last_flip_C4, count_last_flip_D4, - count_last_flip_E4, count_last_flip_F4, count_last_flip_G4, count_last_flip_H4, - count_last_flip_A5, count_last_flip_B5, count_last_flip_C5, count_last_flip_D5, - count_last_flip_E5, count_last_flip_F5, count_last_flip_G5, count_last_flip_H5, - count_last_flip_A6, count_last_flip_B6, count_last_flip_C6, count_last_flip_D6, - count_last_flip_E6, count_last_flip_F6, count_last_flip_G6, count_last_flip_H6, - count_last_flip_A7, count_last_flip_B7, count_last_flip_C7, count_last_flip_D7, - count_last_flip_E7, count_last_flip_F7, count_last_flip_G7, count_last_flip_H7, - count_last_flip_A8, count_last_flip_B8, count_last_flip_C8, count_last_flip_D8, - count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8, - count_last_flip_pass, -}; - ->>>>>>> b3f048d (copyright changes) -======= - ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) diff --git a/src/count_last_flip_bmi.c b/src/count_last_flip_bmi.c index e515227..fc80a5d 100644 --- a/src/count_last_flip_bmi.c +++ b/src/count_last_flip_bmi.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file count_last_flip_bmi.c * @@ -174,180 +170,3 @@ int last_flip(int pos, unsigned long long P) return n_flipped; } -<<<<<<< HEAD -======= -/** - * @file count_last_flip_bmi.c - * - * - * A function is provided to count the number of fipped disc of the last move. - * - * This implementation uses BMI1 instructions, lzcnt and tzcnt. - * For optimization purpose, the value returned is twice the number of flipped - * disc, to facilitate the computation of disc difference. - * - * @date 1998 - 2018 - * @author Toshihiko Okuhara - * @version 4.4 - * - */ - -#include "bit.h" - -/** precomputed count flip array */ -static const unsigned char COUNT_FLIP[8][256] = { - { - 0, 0, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 12, 12, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 10, 10, 10, 10, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - }, - { - 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 8, 10, 8, 8, 8, 10, 8, 8, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - }, - { - 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 6, 10, 8, 8, 6, 6, 6, 6, 6, 10, 8, 8, 6, 6, 6, 6, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - }, - { - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, -}; - -/* bit masks for diagonal lines */ -static const unsigned long long mask_d[2][64] = { - { - 0x0000000000000001ULL, 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, - 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, - 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, - 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, - 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, - 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, - 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, - 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, - 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, - 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, - 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, - 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, - 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, - 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, - 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, - 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL - }, - { - 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, - 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, 0x0000000000000080ULL, - 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, - 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, - 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, - 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, - 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, - 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, - 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, - 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, - 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, - 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, - 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, - 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, - 0x0100000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, - 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL - } -}; - -/** - * Count last flipped discs when playing on the last empty. - * - * @param pos the last empty square. - * @param P player's disc pattern. - * @return flipped disc count. - */ - -int last_flip(int pos, unsigned long long P) -{ - unsigned long long P8, P7, P9; - int n_flipped; - int x = pos & 7; - int y = pos & 0x38; - int ry = y ^ 0x38; - - n_flipped = COUNT_FLIP[x][(unsigned char) (P >> y)]; - - P8 = P & (0x0101010101010101ULL << x); - P7 = P & mask_d[0][pos]; - P9 = P & mask_d[1][pos]; - - n_flipped += ((((int) __tzcnt_u64((P8 >> y) >> 8) + (int) __lzcnt64((P8 << ry) << 8)) & 0x38) - + ((int) __tzcnt_u64((P7 >> y) >> 8) & 0x38) - + ((int) __tzcnt_u64((P9 >> y) >> 8) & 0x38) - + ((int) __lzcnt64((P7 << ry) << 8) & 0x38) - + ((int) __lzcnt64((P9 << ry) << 8) & 0x38)) >> 2; - - return n_flipped; -} ->>>>>>> f24cc06 (avoid BMI2 for AMD; more lzcnt/tzcnt in count_last_flip_bitscan) -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/count_last_flip_bmi2.c b/src/count_last_flip_bmi2.c index 2ff3573..f85c0e4 100644 --- a/src/count_last_flip_bmi2.c +++ b/src/count_last_flip_bmi2.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file count_last_flip_bmi2.c * @@ -21,45 +17,18 @@ * For optimization purpose, the value returned is twice the number of flipped * disc, to facilitate the computation of disc difference. * -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2023 * @author Richard Delorme * @author Toshihiko Okuhara * @version 4.5 -======= - * @date 1998 - 2018 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - * @date 1998 - 2023 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.5 ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) * */ #include "bit.h" -<<<<<<< HEAD -<<<<<<< HEAD -#include - -/** precomputed count flip array */ -const uint8_t COUNT_FLIP[8][256] = { -======= - -/** precomputed count flip array */ -const unsigned char COUNT_FLIP[8][256] = { ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #include /** precomputed count flip array */ const uint8_t COUNT_FLIP[8][256] = { ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) { 0, 0, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, @@ -220,240 +189,16 @@ const unsigned long long mask_x[64][4] = { inline int last_flip(int pos, unsigned long long P) { -<<<<<<< HEAD -<<<<<<< HEAD - uint_fast8_t n_flipped; -======= - unsigned char n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= uint_fast8_t n_flipped; ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) int x = pos & 7; int y = pos >> 3; P &= mask_x[pos][3]; // mask out unrelated bits to make dummy 0 bits for outside // n_flipped = COUNT_FLIP[x][_bextr_u64(P, pos & 0x38, 8)]; -<<<<<<< HEAD -<<<<<<< HEAD - n_flipped = COUNT_FLIP[x][(P >> (pos & 0x38)) & 0xFF]; -======= - n_flipped = COUNT_FLIP[x][(unsigned char) (P >> (pos & 0x38))]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= n_flipped = COUNT_FLIP[x][(P >> (pos & 0x38)) & 0xFF]; ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][0])]; n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][1])]; n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][2])]; return n_flipped; } -<<<<<<< HEAD -======= -/** - * @file count_last_flip_bmi2.c - * - * - * A function is provided to count the number of fipped disc of the last move. - * - * The basic principle is to read into an array a precomputed result. Doing - * this is easy for a single line ; as we can use arrays of the form: - * - COUNT_FLIP[square where we play][8-bits disc pattern]. - * The problem is thus to convert any line of a 64-bits disc pattern into an - * 8-bits disc pattern. A fast way to do this is to select the right line, - * with a bit-mask, to gather the masked-bits into a continuous set by the - * BMI2 PEXT instruction. - * Once we get our 8-bits disc patterns, we directly get the number of - * flipped discs from the precomputed array, and add them from each flipping - * lines. - * For optimization purpose, the value returned is twice the number of flipped - * disc, to facilitate the computation of disc difference. - * - * @date 1998 - 2018 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 - * - */ - -#include "bit.h" - -/** precomputed count flip array */ -const unsigned char COUNT_FLIP[8][256] = { - { - 0, 0, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 12, 12, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 10, 10, 10, 10, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - }, - { - 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 8, 10, 8, 8, 8, 10, 8, 8, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - }, - { - 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 6, 10, 8, 8, 6, 6, 6, 6, 6, 10, 8, 8, 6, 6, 6, 6, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - }, - { - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, -}; - -/* bit masks for diagonal/vertical/all lines */ -const unsigned long long mask_x[64][4] = { - { 0x0000000000000001ULL, 0x8040201008040201ULL, 0x0101010101010101ULL, 0x81412111090503ffULL }, - { 0x0000000000000102ULL, 0x0080402010080402ULL, 0x0202020202020202ULL, 0x02824222120a07ffULL }, - { 0x0000000000010204ULL, 0x0000804020100804ULL, 0x0404040404040404ULL, 0x0404844424150effULL }, - { 0x0000000001020408ULL, 0x0000008040201008ULL, 0x0808080808080808ULL, 0x08080888492a1cffULL }, - { 0x0000000102040810ULL, 0x0000000080402010ULL, 0x1010101010101010ULL, 0x10101011925438ffULL }, - { 0x0000010204081020ULL, 0x0000000000804020ULL, 0x2020202020202020ULL, 0x2020212224a870ffULL }, - { 0x0001020408102040ULL, 0x0000000000008040ULL, 0x4040404040404040ULL, 0x404142444850e0ffULL }, - { 0x0102040810204080ULL, 0x0000000000000080ULL, 0x8080808080808080ULL, 0x8182848890a0c0ffULL }, - { 0x0000000000000102ULL, 0x4020100804020104ULL, 0x0101010101010101ULL, 0x412111090503ff03ULL }, - { 0x0000000000010204ULL, 0x8040201008040201ULL, 0x0202020202020202ULL, 0x824222120a07ff07ULL }, - { 0x0000000001020408ULL, 0x0080402010080402ULL, 0x0404040404040404ULL, 0x04844424150eff0eULL }, - { 0x0000000102040810ULL, 0x0000804020100804ULL, 0x0808080808080808ULL, 0x080888492a1cff1cULL }, - { 0x0000010204081020ULL, 0x0000008040201008ULL, 0x1010101010101010ULL, 0x101011925438ff38ULL }, - { 0x0001020408102040ULL, 0x0000000080402010ULL, 0x2020202020202020ULL, 0x20212224a870ff70ULL }, - { 0x0102040810204080ULL, 0x0000000000804020ULL, 0x4040404040404040ULL, 0x4142444850e0ffe0ULL }, - { 0x0204081020408001ULL, 0x0000000000008040ULL, 0x8080808080808080ULL, 0x82848890a0c0ffc0ULL }, - { 0x0000000000010204ULL, 0x201008040201000aULL, 0x0101010101010101ULL, 0x2111090503ff0305ULL }, - { 0x0000000001020408ULL, 0x4020100804020101ULL, 0x0202020202020202ULL, 0x4222120a07ff070aULL }, - { 0x0000000102040810ULL, 0x8040201008040201ULL, 0x0404040404040404ULL, 0x844424150eff0e15ULL }, - { 0x0000010204081020ULL, 0x0080402010080402ULL, 0x0808080808080808ULL, 0x0888492a1cff1c2aULL }, - { 0x0001020408102040ULL, 0x0000804020100804ULL, 0x1010101010101010ULL, 0x1011925438ff3854ULL }, - { 0x0102040810204080ULL, 0x0000008040201008ULL, 0x2020202020202020ULL, 0x212224a870ff70a8ULL }, - { 0x0204081020408001ULL, 0x0000000080402010ULL, 0x4040404040404040ULL, 0x42444850e0ffe050ULL }, - { 0x0408102040800003ULL, 0x0000000000804020ULL, 0x8080808080808080ULL, 0x848890a0c0ffc0a0ULL }, - { 0x0000000001020408ULL, 0x1008040201000016ULL, 0x0101010101010101ULL, 0x11090503ff030509ULL }, - { 0x0000000102040810ULL, 0x2010080402010005ULL, 0x0202020202020202ULL, 0x22120a07ff070a12ULL }, - { 0x0000010204081020ULL, 0x4020100804020101ULL, 0x0404040404040404ULL, 0x4424150eff0e1524ULL }, - { 0x0001020408102040ULL, 0x8040201008040201ULL, 0x0808080808080808ULL, 0x88492a1cff1c2a49ULL }, - { 0x0102040810204080ULL, 0x0080402010080402ULL, 0x1010101010101010ULL, 0x11925438ff385492ULL }, - { 0x0204081020408001ULL, 0x0000804020100804ULL, 0x2020202020202020ULL, 0x2224a870ff70a824ULL }, - { 0x0408102040800003ULL, 0x0000008040201008ULL, 0x4040404040404040ULL, 0x444850e0ffe05048ULL }, - { 0x0810204080000007ULL, 0x0000000080402010ULL, 0x8080808080808080ULL, 0x8890a0c0ffc0a090ULL }, - { 0x0000000102040810ULL, 0x080402010000002eULL, 0x0101010101010101ULL, 0x090503ff03050911ULL }, - { 0x0000010204081020ULL, 0x100804020100000dULL, 0x0202020202020202ULL, 0x120a07ff070a1222ULL }, - { 0x0001020408102040ULL, 0x2010080402010003ULL, 0x0404040404040404ULL, 0x24150eff0e152444ULL }, - { 0x0102040810204080ULL, 0x4020100804020101ULL, 0x0808080808080808ULL, 0x492a1cff1c2a4988ULL }, - { 0x0204081020408002ULL, 0x8040201008040201ULL, 0x1010101010101010ULL, 0x925438ff38549211ULL }, - { 0x0408102040800005ULL, 0x0080402010080402ULL, 0x2020202020202020ULL, 0x24a870ff70a82422ULL }, - { 0x081020408000000bULL, 0x0000804020100804ULL, 0x4040404040404040ULL, 0x4850e0ffe0504844ULL }, - { 0x1020408000000017ULL, 0x0000008040201008ULL, 0x8080808080808080ULL, 0x90a0c0ffc0a09088ULL }, - { 0x0000010204081020ULL, 0x040201000000005eULL, 0x0101010101010101ULL, 0x0503ff0305091121ULL }, - { 0x0001020408102040ULL, 0x080402010000001dULL, 0x0202020202020202ULL, 0x0a07ff070a122242ULL }, - { 0x0102040810204080ULL, 0x100804020100000bULL, 0x0404040404040404ULL, 0x150eff0e15244484ULL }, - { 0x0204081020408001ULL, 0x2010080402010003ULL, 0x0808080808080808ULL, 0x2a1cff1c2a498808ULL }, - { 0x0408102040800003ULL, 0x4020100804020101ULL, 0x1010101010101010ULL, 0x5438ff3854921110ULL }, - { 0x081020408000000eULL, 0x8040201008040201ULL, 0x2020202020202020ULL, 0xa870ff70a8242221ULL }, - { 0x102040800000001dULL, 0x0080402010080402ULL, 0x4040404040404040ULL, 0x50e0ffe050484442ULL }, - { 0x204080000000003bULL, 0x0000804020100804ULL, 0x8080808080808080ULL, 0xa0c0ffc0a0908884ULL }, - { 0x0001020408102040ULL, 0x02010000000000beULL, 0x0101010101010101ULL, 0x03ff030509112141ULL }, - { 0x0102040810204080ULL, 0x040201000000003dULL, 0x0202020202020202ULL, 0x07ff070a12224282ULL }, - { 0x0204081020408001ULL, 0x080402010000001bULL, 0x0404040404040404ULL, 0x0eff0e1524448404ULL }, - { 0x0408102040800003ULL, 0x1008040201000007ULL, 0x0808080808080808ULL, 0x1cff1c2a49880808ULL }, - { 0x0810204080000007ULL, 0x2010080402010003ULL, 0x1010101010101010ULL, 0x38ff385492111010ULL }, - { 0x102040800000000fULL, 0x4020100804020101ULL, 0x2020202020202020ULL, 0x70ff70a824222120ULL }, - { 0x204080000000003eULL, 0x8040201008040201ULL, 0x4040404040404040ULL, 0xe0ffe05048444241ULL }, - { 0x408000000000007dULL, 0x0080402010080402ULL, 0x8080808080808080ULL, 0xc0ffc0a090888482ULL }, - { 0x0102040810204080ULL, 0x010000000000027eULL, 0x0101010101010101ULL, 0xff03050911214181ULL }, - { 0x0204081020408001ULL, 0x020100000000007dULL, 0x0202020202020202ULL, 0xff070a1222428202ULL }, - { 0x0408102040800003ULL, 0x040201000000003bULL, 0x0404040404040404ULL, 0xff0e152444840404ULL }, - { 0x0810204080000007ULL, 0x0804020100000017ULL, 0x0808080808080808ULL, 0xff1c2a4988080808ULL }, - { 0x102040800000000fULL, 0x1008040201000007ULL, 0x1010101010101010ULL, 0xff38549211101010ULL }, - { 0x204080000000001fULL, 0x2010080402010003ULL, 0x2020202020202020ULL, 0xff70a82422212020ULL }, - { 0x408000000000003fULL, 0x4020100804020101ULL, 0x4040404040404040ULL, 0xffe0504844424140ULL }, - { 0x800000000000017eULL, 0x8040201008040201ULL, 0x8080808080808080ULL, 0xffc0a09088848281ULL } -}; - -/** - * Count last flipped discs when playing on the last empty. - * - * @param pos the last empty square. - * @param P player's disc pattern. - * @return flipped disc count. - */ - -inline int last_flip(int pos, unsigned long long P) -{ - unsigned char n_flipped; - int x = pos & 7; - int y = pos >> 3; - - P &= mask_x[pos][3]; // mask out unrelated bits to make dummy 0 bits for outside - // n_flipped = COUNT_FLIP[x][_bextr_u64(P, pos & 0x38, 8)]; - n_flipped = COUNT_FLIP[x][(unsigned char) (P >> (pos & 0x38))]; - n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][0])]; - n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][1])]; - n_flipped += COUNT_FLIP[y][_pext_u64(P, mask_x[pos][2])]; - - return n_flipped; -} ->>>>>>> feb7fa7 (count_last_flip_bmi2 and transpose_avx2 added) -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/count_last_flip_carry_64.c b/src/count_last_flip_carry_64.c index 969bcf8..9d409a2 100644 --- a/src/count_last_flip_carry_64.c +++ b/src/count_last_flip_carry_64.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file count_last_flip_carry_64.c * @@ -1278,1284 +1274,3 @@ int (*count_last_flip[])(const unsigned long long) = { count_last_flip_pass, }; -<<<<<<< HEAD -======= -/** - * @file count_last_flip_carry_64.c - * - * - * A function is provided to count the number of fipped disc of the last move - * for each square of the board. These functions are gathered into an array of - * functions, so that a fast access to each function is allowed. The generic - * form of the function take as input the player bitboard and return twice - * the number of flipped disc of the last move. - * - * The basic principle is to read into an array a precomputed result. Doing - * this is easy for a single line ; as we can use arrays of the form: - * - COUNT_FLIP[square where we play][8-bits disc pattern]. - * The problem is thus to convert any line of a 64-bits disc pattern into an - * 8-bits disc pattern. A fast way to do this is to select the right line, - * with a bit-mask, to gather the masked-bits into a continuous set by a simple - * multiplication and to right-shift the result to scale it into a number - * between 0 and 255. - * Once we get our 8-bits disc patterns, we directly get the number of - * flipped discs from the precomputed array, and add them from each flipping - * lines. - * For optimization purpose, the value returned is twice the number of flipped - * disc, to facilitate the computation of disc difference. - * - * With 135 degree merge, instead of Valery ClaudePierre's modification. - * - * For top to bottom flip, LS1B isolation (http://chessprogramming.wikispaces.com/ - * General+Setwise+Operations) is used to get the outflank bit. - * - * @date 1998 - 2018 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 - * - */ - -/** precomputed count flip array */ -static const char COUNT_FLIP_R[128] = { - 0, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 10, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 12, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 10, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0, - 8, 0, 2, 0, 4, 0, 2, 0, 6, 0, 2, 0, 4, 0, 2, 0 -}; - -static const char COUNT_FLIP_2[256] = { - 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 8, 10, 8, 8, 8, 10, 8, 8, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0 -}; - -static const char COUNT_FLIP_3[256] = { - 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 6, 10, 8, 8, 6, 6, 6, 6, 6, 10, 8, 8, 6, 6, 6, 6, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0 -}; - -static const char COUNT_FLIP_4[256] = { - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const char COUNT_FLIP_5[256] = { - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const char COUNT_FLIP_L[128] = { - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/** - * Count last flipped discs when playing on square A1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d9; - - P_v = P & 0x0101010101010100ULL; - n_flipped = ((P_v & -P_v) * 0x000020406080a0c0ULL) >> 60; - n_flipped += COUNT_FLIP_R[(P >> 1) & 0x7f]; - P_d9 = P & 0x8040201008040200ULL; - n_flipped += (((P_d9 & -P_d9) >> 1) * 0x000010100c080503ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d9; - - P_v = P & 0x0202020202020200ULL; - n_flipped = ((P_v & -P_v) * 0x0000102030405060ULL) >> 60; - n_flipped += COUNT_FLIP_R[(P >> 2) & 0x3f]; - P_d9 = P & 0x0080402010080400ULL; - n_flipped += ((P_d9 & -P_d9) * 0x0000040403020140ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x0404040404040400ULL; - n_flipped = ((P_v & -P_v) * 0x0000081018202830ULL) >> 60; - n_flipped += COUNT_FLIP_2[P & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000804020110A04ULL) * 0x0101010101010101ULL) >> 56]; // A3C1H6 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x0808080808080800ULL; - n_flipped = ((P_v & -P_v) * 0x000004080c101418ULL) >> 60; - n_flipped += COUNT_FLIP_3[P & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000008041221408ULL) * 0x0101010101010101ULL) >> 56]; // A4D1H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x1010101010101000ULL; - n_flipped = ((P_v & -P_v) * 0x0000020406080a0cULL) >> 60; - n_flipped += COUNT_FLIP_4[P & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0000000182442810ULL) * 0x0101010101010101ULL) >> 56]; // A5E1H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x2020202020202000ULL; - n_flipped = ((P_v & -P_v) * 0x0000010203040506ULL) >> 60; - n_flipped += COUNT_FLIP_5[P & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0000010204885020ULL) * 0x0101010101010101ULL) >> 56]; // A6F1H3 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d7; - - P_v = P & 0x4040404040404000ULL; - n_flipped = ((P_v & -P_v) * 0x0000008101820283ULL) >> 60; - n_flipped += COUNT_FLIP_L[(P << 1) & 0x7e]; - P_d7 = P & 0x0001020408102000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x000002081840a000ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H1. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H1(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d7; - - P_v = P & 0x8080808080808000ULL; - n_flipped = (((P_v & -P_v) >> 1) * 0x0000008101820283ULL) >> 60; - n_flipped += COUNT_FLIP_L[P & 0x7f]; - P_d7 = P & 0x0102040810204000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x000001040c2050c0ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d9; - - P_v = P & 0x0101010101010000ULL; - n_flipped = ((P_v & -P_v) * 0x00000020406080a0ULL) >> 60; - n_flipped += COUNT_FLIP_R[(P >> 9) & 0x7f]; - P_d9 = P & 0x4020100804020000ULL; - n_flipped += (((P_d9 & -P_d9) >> 1) * 0x00000010100c0805ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d9; - - P_v = P & 0x0202020202020000ULL; - n_flipped = ((P_v & -P_v) * 0x0000001020304050ULL) >> 60; - n_flipped += COUNT_FLIP_R[(P >> 10) & 0x3f]; - P_d9 = P & 0x8040201008040000ULL; - n_flipped += (((P_d9 & -P_d9) >> 2) * 0x00000010100c0805ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x0404040404040000ULL; - n_flipped = ((P_v & -P_v) * 0x0000000810182028ULL) >> 60; - n_flipped += COUNT_FLIP_2[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x00804020110A0400ULL) * 0x0101010101010101ULL) >> 56]; // A4C2H7 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x0808080808080000ULL; - n_flipped = ((P_v & -P_v) * 0x00000004080c1014ULL) >> 60; - n_flipped += COUNT_FLIP_3[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000804122140800ULL) * 0x0101010101010101ULL) >> 56]; // A5D2H6 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x1010101010100000ULL; - n_flipped = ((P_v & -P_v) * 0x000000020406080aULL) >> 60; - n_flipped += COUNT_FLIP_4[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0000018244281000ULL) * 0x0101010101010101ULL) >> 56]; // A6E2H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v; - - P_v = P & 0x2020202020200000ULL; - n_flipped = ((P_v & -P_v) * 0x0000000102030405ULL) >> 60; - n_flipped += COUNT_FLIP_5[(P >> 8) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0001020488502000ULL) * 0x0101010101010101ULL) >> 56]; // A7F2H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d7; - - P_v = P & 0x4040404040400000ULL; - n_flipped = (((P_v & -P_v) >> 1) * 0x0000000102030405ULL) >> 60; - n_flipped += COUNT_FLIP_L[(P >> 7) & 0x7e]; - P_d7 = P & 0x0102040810200000ULL; - n_flipped += ((P_d7 & -P_d7) * 0x00000002081840a0ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H2. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H2(const unsigned long long P) -{ - int n_flipped; - unsigned long long P_v, P_d7; - - P_v = P & 0x8080808080800000ULL; - n_flipped = (((P_v & -P_v) >> 2) * 0x0000000102030405ULL) >> 60; - n_flipped += COUNT_FLIP_L[(P >> 8) & 0x7f]; - P_d7 = P & 0x0204081020400000ULL; - n_flipped += (((P_d7 & -P_d7) >> 2) * 0x0000000410308143ULL) >> 60; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x2010080402010101ULL) * 0x0102040404040404ULL) >> 56]; // A1A3F8 - n_flipped += COUNT_FLIP_R[(P >> 17) & 0x7f]; - n_flipped += COUNT_FLIP_5[((P & 0x0101010101010204ULL) * 0x2020201008040201ULL) >> 56]; // C1A3A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x4020100804020202ULL) * 0x0081020202020202ULL) >> 56]; // B1B3G8 - n_flipped += COUNT_FLIP_R[(P >> 18) & 0x3f]; - n_flipped += COUNT_FLIP_5[(((P & 0x0202020202020408ULL) >> 1) * 0x2020201008040201ULL) >> 56]; // D1B3B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000000102040810ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 16) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000008040201008ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x4040404040402010ULL) * 0x0010101020408102ULL) >> 56]; // E1G3G8 - n_flipped += COUNT_FLIP_L[(P >> 15) & 0x7e]; - n_flipped += COUNT_FLIP_5[(((P & 0x0204081020404040ULL) >> 1) * 0x0402010101010101ULL) >> 56]; // G1G3B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H3. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H3(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_2[((P & 0x8080808080804020ULL) * 0x0008080810204081ULL) >> 56]; // F1H3H8 - n_flipped += COUNT_FLIP_L[(P >> 16) & 0x7f]; - n_flipped += COUNT_FLIP_5[(((P & 0x0408102040808080ULL) >> 2) * 0x0402010101010101ULL) >> 56]; // H1H3C8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x1008040201010101ULL) * 0x0102040808080808ULL) >> 56]; // A1A4E8 - n_flipped += COUNT_FLIP_R[(P >> 25) & 0x7f]; - n_flipped += COUNT_FLIP_4[((P & 0x0101010101020408ULL) * 0x1010101008040201ULL) >> 56]; // D1A4A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x2010080402020202ULL) * 0x0081020404040404ULL) >> 56]; // B1B4F8 - n_flipped += COUNT_FLIP_R[(P >> 26) & 0x3f]; - n_flipped += COUNT_FLIP_4[(((P & 0x0202020202040810ULL) >> 1) * 0x1010101008040201ULL) >> 56]; // E1B4B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0000010204081020ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 24) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0000804020100804ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x4040404040201008ULL) * 0x0020202020408102ULL) >> 56]; // D1G4G8 - n_flipped += COUNT_FLIP_L[(P >> 23) & 0x7e]; - n_flipped += COUNT_FLIP_4[(((P & 0x0408102040404040ULL) >> 2) * 0x0804020101010101ULL) >> 56]; // G1G4C8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H4. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H4(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_3[((P & 0x8080808080402010ULL) * 0x0010101010204081ULL) >> 56]; // E1H4H8 - n_flipped += COUNT_FLIP_L[(P >> 24) & 0x7f]; - n_flipped += COUNT_FLIP_4[(((P & 0x0810204080808080ULL) >> 3) * 0x0804020101010101ULL) >> 56]; // H1H4D8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0804020101010101ULL) * 0x0102040810101010ULL) >> 56]; // A1A5D8 - n_flipped += COUNT_FLIP_R[(P >> 33) & 0x7f]; - n_flipped += COUNT_FLIP_3[((P & 0x0101010102040810ULL) * 0x0808080808040201ULL) >> 56]; // E1A5A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x1008040202020202ULL) * 0x0081020408080808ULL) >> 56]; // B1B5E8 - n_flipped += COUNT_FLIP_R[(P >> 34) & 0x3f]; - n_flipped += COUNT_FLIP_3[(((P & 0x0202020204081020ULL) >> 1) * 0x0808080808040201ULL) >> 56]; // F1B5B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0001020408102040ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 32) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0080402010080402ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x4040404020100804ULL) * 0x0040404040408102ULL) >> 56]; // C1G5G8 - n_flipped += COUNT_FLIP_L[(P >> 31) & 0x7e]; - n_flipped += COUNT_FLIP_3[(((P & 0x0810204040404040ULL) >> 3) * 0x1008040201010101ULL) >> 56]; // G1G5D8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H5. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H5(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_4[((P & 0x8080808040201008ULL) * 0x0020202020204081ULL) >> 56]; // D1H5H8 - n_flipped += COUNT_FLIP_L[(P >> 32) & 0x7f]; - n_flipped += COUNT_FLIP_3[(((P & 0x1020408080808080ULL) >> 4) * 0x1008040201010101ULL) >> 56]; // H1H5E8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0402010101010101ULL) * 0x0102040810202020ULL) >> 56]; // A1A6C8 - n_flipped += COUNT_FLIP_R[(P >> 41) & 0x7f]; - n_flipped += COUNT_FLIP_2[((P & 0x0101010204081020ULL) * 0x0404040404040201ULL) >> 56]; // F1A6A8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0804020202020202ULL) * 0x0081020408101010ULL) >> 56]; // B1B6D8 - n_flipped += COUNT_FLIP_R[(P >> 42) & 0x3f]; - n_flipped += COUNT_FLIP_2[(((P & 0x0202020408102040ULL) >> 1) * 0x0404040404040201ULL) >> 56]; // G1B6B8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0404040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x0102040810204080ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x1008040201000000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x0808080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0204081020408000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x2010080402010000ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x1010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0408102040800000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x4020100804020100ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x2020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[(P >> 40) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0810204080000000ULL) * 0x0101010101010101ULL) >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x8040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x4040402010080402ULL) * 0x0080808080808102ULL) >> 56]; // B1G6G8 - n_flipped += COUNT_FLIP_L[(P >> 39) & 0x7e]; - n_flipped += COUNT_FLIP_2[(((P & 0x1020404040404040ULL) >> 4) * 0x2010080402010101ULL) >> 56]; // G1G6E8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H6. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H6(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_5[((P & 0x8080804020100804ULL) * 0x0040404040404081ULL) >> 56]; // C1H6H8 - n_flipped += COUNT_FLIP_L[(P >> 40) & 0x7f]; - n_flipped += COUNT_FLIP_2[(((P & 0x2040808080808080ULL) >> 5) * 0x2010080402010101ULL) >> 56]; // H1H6F8 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0000010101010101ULL) * 0x0102040810204080ULL) >> 55]; - n_flipped += COUNT_FLIP_R[(P >> 49) & 0x7f]; - n_flipped += COUNT_FLIP_R[((P & 0x0000020408102040ULL) * 0x0101010101010101ULL) >> 57]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0000020202020202ULL) * 0x0081020408102040ULL) >> 55]; - n_flipped += COUNT_FLIP_R[(P >> 50) & 0x3f]; - n_flipped += COUNT_FLIP_R[((P & 0x0000040810204080ULL) * 0x0101010101010101ULL) >> 58]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0000040404040404ULL) * 0x0040810204081020ULL) >> 55]; - n_flipped += COUNT_FLIP_2[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_2[((P & 0x00040A1120408000ULL) * 0x0101010101010101ULL) >> 56]; // A5C7H2 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0000080808080808ULL) * 0x0020408102040810ULL) >> 55]; - n_flipped += COUNT_FLIP_3[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_3[((P & 0x0008142241800000ULL) * 0x0101010101010101ULL) >> 56]; // A4D7H3 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0000101010101010ULL) * 0x0010204081020408ULL) >> 55]; - n_flipped += COUNT_FLIP_4[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_4[((P & 0x0010284482010000ULL) * 0x0101010101010101ULL) >> 56]; // A3E7H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0000202020202020ULL) * 0x0008102040810204ULL) >> 55]; - n_flipped += COUNT_FLIP_5[(P >> 48) & 0xff]; - n_flipped += COUNT_FLIP_5[((P & 0x0020508804020100ULL) * 0x0101010101010101ULL) >> 56]; // A2F7H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0000404040404040ULL) * 0x0004081020408102ULL) >> 55]; - n_flipped += COUNT_FLIP_L[(P >> 47) & 0x7e]; - n_flipped += COUNT_FLIP_L[((P & 0x0000201008040201ULL) * 0x0101010101010101ULL) >> 55]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H7. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H7(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0000808080808080ULL) * 0x0002040810204081ULL) >> 55]; - n_flipped += COUNT_FLIP_L[(P >> 48) & 0x7f]; - n_flipped += COUNT_FLIP_L[((P & 0x0000402010080402ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square A8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_A8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0001010101010101ULL) * 0x0102040810204080ULL) >> 56]; - n_flipped += COUNT_FLIP_R[P >> 57]; - n_flipped += COUNT_FLIP_R[((P & 0x0002040810204080ULL) * 0x0101010101010101ULL) >> 57]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square B8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_B8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0002020202020202ULL) * 0x0081020408102040ULL) >> 56]; - n_flipped += COUNT_FLIP_R[P >> 58]; - n_flipped += COUNT_FLIP_R[((P & 0x0004081020408000ULL) * 0x0101010101010101ULL) >> 58]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square C8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_C8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0004040404040404ULL) * 0x0040810204081020ULL) >> 56]; - n_flipped += COUNT_FLIP_2[P >> 56]; - n_flipped += COUNT_FLIP_2[((P & 0x040A112040800000ULL) * 0x0101010101010101ULL) >> 56]; // A6C8H3 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square D8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_D8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0008080808080808ULL) * 0x0020408102040810ULL) >> 56]; - n_flipped += COUNT_FLIP_3[P >> 56]; - n_flipped += COUNT_FLIP_3[((P & 0x0814224180000000ULL) * 0x0101010101010101ULL) >> 56]; // A5D8H4 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square E8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_E8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0010101010101010ULL) * 0x0010204081020408ULL) >> 56]; - n_flipped += COUNT_FLIP_4[P >> 56]; - n_flipped += COUNT_FLIP_4[((P & 0x1028448201000000ULL) * 0x0101010101010101ULL) >> 56]; // A4E8H5 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square F8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_F8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0020202020202020ULL) * 0x0008102040810204ULL) >> 56]; - n_flipped += COUNT_FLIP_5[P >> 56]; - n_flipped += COUNT_FLIP_5[((P & 0x0050880402010000ULL) * 0x0101010101010101ULL) >> 56]; // A3F8H6 - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square G8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_G8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0040404040404040ULL) * 0x0004081020408102ULL) >> 56]; - n_flipped += COUNT_FLIP_L[(P >> 55) & 0x7e]; - n_flipped += COUNT_FLIP_L[((P & 0x0020100804020100ULL) * 0x0101010101010101ULL) >> 55]; - - return n_flipped; -} - -/** - * Count last flipped discs when playing on square H8. - * - * @param P player's disc pattern. - * @return flipped disc count. - */ -static int count_last_flip_H8(const unsigned long long P) -{ - int n_flipped; - - n_flipped = COUNT_FLIP_L[((P & 0x0080808080808080ULL) * 0x0002040810204081ULL) >> 56]; - n_flipped += COUNT_FLIP_L[(P >> 56) & 0x7f]; - n_flipped += COUNT_FLIP_L[((P & 0x0040201008040201ULL) * 0x0101010101010101ULL) >> 56]; - - return n_flipped; -} - -/** - * Count last flipped discs when plassing. - * - * @param P player's disc pattern (unused). - * @return zero. - */ -static int count_last_flip_pass(const unsigned long long P) -{ - (void) P; // useless code to shut-up compiler warning - return 0; -} - -/** Array of functions to count flipped discs of the last move */ -int (*count_last_flip[])(const unsigned long long) = { - count_last_flip_A1, count_last_flip_B1, count_last_flip_C1, count_last_flip_D1, - count_last_flip_E1, count_last_flip_F1, count_last_flip_G1, count_last_flip_H1, - count_last_flip_A2, count_last_flip_B2, count_last_flip_C2, count_last_flip_D2, - count_last_flip_E2, count_last_flip_F2, count_last_flip_G2, count_last_flip_H2, - count_last_flip_A3, count_last_flip_B3, count_last_flip_C3, count_last_flip_D3, - count_last_flip_E3, count_last_flip_F3, count_last_flip_G3, count_last_flip_H3, - count_last_flip_A4, count_last_flip_B4, count_last_flip_C4, count_last_flip_D4, - count_last_flip_E4, count_last_flip_F4, count_last_flip_G4, count_last_flip_H4, - count_last_flip_A5, count_last_flip_B5, count_last_flip_C5, count_last_flip_D5, - count_last_flip_E5, count_last_flip_F5, count_last_flip_G5, count_last_flip_H5, - count_last_flip_A6, count_last_flip_B6, count_last_flip_C6, count_last_flip_D6, - count_last_flip_E6, count_last_flip_F6, count_last_flip_G6, count_last_flip_H6, - count_last_flip_A7, count_last_flip_B7, count_last_flip_C7, count_last_flip_D7, - count_last_flip_E7, count_last_flip_F7, count_last_flip_G7, count_last_flip_H7, - count_last_flip_A8, count_last_flip_B8, count_last_flip_C8, count_last_flip_D8, - count_last_flip_E8, count_last_flip_F8, count_last_flip_G8, count_last_flip_H8, - count_last_flip_pass, -}; - ->>>>>>> b3f048d (copyright changes) -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/count_last_flip_lzcnt.c b/src/count_last_flip_lzcnt.c index 07b8cda..4094684 100644 --- a/src/count_last_flip_lzcnt.c +++ b/src/count_last_flip_lzcnt.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file count_last_flip_lzcnt.c * @@ -29,17 +25,7 @@ * */ -<<<<<<< HEAD -<<<<<<< HEAD #include "bit_intrinsics.h" -======= -#ifdef __LZCNT__ -#include -#endif ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -#include "bit_intrinsics.h" ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) /** precomputed count flip array */ static const unsigned char COUNT_FLIP[8][256] = { @@ -125,74 +111,6 @@ static const unsigned char COUNT_FLIP[8][256] = { } }; -<<<<<<< HEAD -<<<<<<< HEAD -#ifdef lzcnt_u64 - -/* bit masks for vertical and diagonal lines for A8..H8 */ -static const unsigned long long mask_9_7[8][2] = { - { 0x0204081020408000, 0x0000000000000000 }, - { 0x0204081020400000, 0x8000000000000000 }, - { 0x0204081020000000, 0x8040000000000000 }, - { 0x0204081000000000, 0x8040200000000000 }, - { 0x0204080000000000, 0x8040201000000000 }, - { 0x0204000000000000, 0x8040201008000000 }, - { 0x0200000000000000, 0x8040201008040000 }, - { 0x0000000000000000, 0x8040201008040200 } -======= -/* bit masks for diagonal lines */ -static const unsigned long long mask_d[2][64] = { - { - 0x0000000000000001ULL, 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, - 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, - 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, - 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, - 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, - 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, - 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, - 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, - 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, - 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, - 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, - 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, - 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, - 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, - 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, - 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL - }, - { - 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, - 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, 0x0000000000000080ULL, - 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, - 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, - 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, - 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, - 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, - 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, - 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, - 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, - 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, - 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, - 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, - 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, - 0x0100000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, - 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL - } -}; - -#ifdef __LZCNT__ - -/* bit masks for vertical and diagonal lines for A8..H8 */ -static const unsigned long long mask_7[8] = { - 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, - 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL, 0x0000000000000000ULL -}; - -static const unsigned long long mask_9[8] = { - 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, - 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #ifdef lzcnt_u64 /* bit masks for vertical and diagonal lines for A8..H8 */ @@ -205,15 +123,10 @@ static const unsigned long long mask_9_7[8][2] = { { 0x0204000000000000, 0x8040201008000000 }, { 0x0200000000000000, 0x8040201008040000 }, { 0x0000000000000000, 0x8040201008040200 } ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) }; #else -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) /* bit masks for vertical and diagonal lines for A1..H1 */ static const unsigned long long mask_9_7[8][2] = { { 0x0000000000000000, 0x4020100804020100 }, @@ -224,28 +137,10 @@ static const unsigned long long mask_9_7[8][2] = { { 0x0000000408102040, 0x0000000000020100 }, { 0x0000020408102040, 0x0000000000000100 }, { 0x0001020408102040, 0x0000000000000000 } -<<<<<<< HEAD -======= -/* bit masks for vertical and diagonal lines for A1..H2 */ -static const unsigned long long mask_7[8] = { - 0x0000000000000000ULL, 0x0000000000000040ULL, 0x0000000000002040ULL, 0x0000000000102040ULL, - 0x0000000008102040ULL, 0x0000000408102040ULL, 0x0000020408102040ULL, 0x0001020408102040ULL -}; - -static const unsigned long long mask_9[16] = { - 0x4020100804020100ULL, 0x0020100804020100ULL, 0x0000100804020100ULL, 0x0000000804020100ULL, - 0x0000000004020100ULL, 0x0000000000020100ULL, 0x0000000000000100ULL, 0x0000000000000000ULL ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) }; #endif -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) /* bit masks for diagonal lines for A5..H6 */ static const unsigned long long mask_d[16][2] = { { 0x0000000102040810, 0x0804020100000000 }, @@ -265,55 +160,18 @@ static const unsigned long long mask_d[16][2] = { { 0x1020408000000000, 0x0080402010080402 }, { 0x2040800000000000, 0x0000804020100804 } }; -<<<<<<< HEAD #ifdef HAS_CPU_64 #define packV(P, x) (((((P) >> (x)) & 0x0101010101010101) * 0x0102040810204080) >> 56) #define packD(PM) (((PM) * 0x0101010101010101) >> 56) -======= -#ifdef __x86_64__ - -#define packV(P, x) (((((P) >> (x)) & 0x0101010101010101ULL) * 0x0102040810204080ULL) >> 56) -#define packD(PM) (((PM) * 0x0101010101010101ULL) >> 56) -#define mask_8(x) (0x0101010101010101ULL << (x)) ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - -#ifdef HAS_CPU_64 - -#define packV(P, x) (((((P) >> (x)) & 0x0101010101010101) * 0x0102040810204080) >> 56) -#define packD(PM) (((PM) * 0x0101010101010101) >> 56) ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) #else #define packV(P, x) (((((((unsigned int)(P)) >> (x)) & 0x01010101u) + (((((unsigned int)((P) >> 32)) >> (x)) & 0x01010101u) << 4)) * 0x01020408u) >> 24) #define packD(PM) (((((unsigned int)(PM)) * 0x01010101u) + (((unsigned int)((PM) >> 32)) * 0x01010101u)) >> 24) -<<<<<<< HEAD -<<<<<<< HEAD - -#endif // HAS_CPU_64 -======= -#define mask_8(x) (((unsigned long long) (0x01010101u << (x)) << 32) | (0x01010101u << (x))) - -static int inline __lzcnt64(unsigned long long x) { - int y; - __asm__ ( - "lzcntl %1, %0\n\t" - "lzcntl %2, %2\n\t" - "leal (%0, %2), %0\n\t" - "cmovnc %2, %0" - : "=&r" (y) : "0" ((unsigned int) x), "r" ((unsigned int) (x >> 32)) ); - return y; -} - -#endif // __x86_64__ ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= #endif // HAS_CPU_64 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) /** * Count last flipped discs when playing on the last empty. @@ -326,333 +184,6 @@ static int inline __lzcnt64(unsigned long long x) { int last_flip(int pos, unsigned long long P) { unsigned long long P8, P7, P9; -<<<<<<< HEAD -<<<<<<< HEAD - int n_flipped; - int x = pos & 7; - - n_flipped = COUNT_FLIP[x][(unsigned char) (P >> (pos & 0x38))]; - -#ifdef lzcnt_u64 - - if (pos < 0x20) { - P = vertical_mirror(P); - pos ^= 0x38; - } - - if (pos >= 0x30) { - P <<= (64 - pos); - P8 = P & 0x0101010101010101; - P7 = P & mask_9_7[x][0]; - P9 = (P << 8) & mask_9_7[x][1]; - n_flipped += ((lzcnt_u64(P8) & 0x38) + (lzcnt_u64(P7) & 0x38) + (lzcnt_u64(P9) & 0x38)) >> 2; - - return n_flipped; - } - -#else // ls1b - slow - - if (pos & 0x10) { // 0 1 2 3 4 5 6 7 -> 0 1 4 5 4 5 0 1 - P = vertical_mirror(P); - pos ^= 0x38; - } - - if (pos < 0x10) { - P >>= (pos + 1); - P8 = P & 0x0080808080808080; - n_flipped += ((P8 & -P8) * 0x00004080c1014180) >> 60; - P7 = P & mask_9_7[x][0]; - n_flipped += ((P7 & -P7) * 0x0001040c2050c000) >> 60; - P9 = P & mask_9_7[x][1]; - n_flipped += ((P9 & -P9) * 0x000010100c080503) >> 60; - - return n_flipped; - } -#endif - - n_flipped += COUNT_FLIP[pos >> 3][packV(P, x)]; - P7 = P & mask_d[pos - 0x20][0]; - n_flipped += COUNT_FLIP[x][packD(P7)]; - P9 = P & mask_d[pos - 0x20][1]; - n_flipped += COUNT_FLIP[x][packD(P9)]; - - return n_flipped; -} -======= -/** - * @file count_last_flip_lzcnt.c - * - * - * A function is provided to count the number of fipped disc of the last move. - * - * The basic principle is to read into an array a precomputed result. Doing - * this is easy for a single line ; as we can use arrays of the form: - * - COUNT_FLIP[square where we play][8-bits disc pattern]. - * The problem is thus to convert any line of a 64-bits disc pattern into an - * 8-bits disc pattern. A fast way to do this is to select the right line, - * with a bit-mask, to gather the masked-bits into a continuous set by a simple - * multiplication and to right-shift the result to scale it into a number - * between 0 and 255. - * Once we get our 8-bits disc patterns, we directly get the number of - * flipped discs from the precomputed array, and add them from each flipping - * lines. - * For optimization purpose, the value returned is twice the number of flipped - * disc, to facilitate the computation of disc difference. - * - * @date 1998 - 2014 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 - * - */ - -#ifdef __LZCNT__ -#include -#endif - -/** precomputed count flip array */ -static const unsigned char COUNT_FLIP[8][256] = { - { - 0, 0, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 12, 12, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 10, 10, 10, 10, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - }, - { - 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 8, 10, 8, 8, 8, 10, 8, 8, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - }, - { - 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 6, 10, 8, 8, 6, 6, 6, 6, 6, 10, 8, 8, 6, 6, 6, 6, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - }, - { - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - } -}; - -/* bit masks for diagonal lines */ -static const unsigned long long mask_d[2][64] = { - { - 0x0000000000000001ULL, 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, - 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, - 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, - 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, - 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, - 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, - 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, - 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, - 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, - 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, - 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, - 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, - 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, - 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, - 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, - 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL - }, - { - 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, - 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, 0x0000000000000080ULL, - 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, - 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, - 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, - 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, - 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, - 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, - 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, - 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, - 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, - 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, - 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, - 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, - 0x0100000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, - 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL - } -}; - -#ifdef __LZCNT__ - -/* bit masks for vertical and diagonal lines for A8..H8 */ -static const unsigned long long mask_7[8] = { - 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, - 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL, 0x0000000000000000ULL -}; - -static const unsigned long long mask_9[8] = { - 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, - 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL -}; - -#else - -/* bit masks for vertical and diagonal lines for A1..H2 */ -static const unsigned long long mask_7[8] = { - 0x0000000000000000ULL, 0x0000000000000040ULL, 0x0000000000002040ULL, 0x0000000000102040ULL, - 0x0000000008102040ULL, 0x0000000408102040ULL, 0x0000020408102040ULL, 0x0001020408102040ULL -}; - -static const unsigned long long mask_9[16] = { - 0x4020100804020100ULL, 0x0020100804020100ULL, 0x0000100804020100ULL, 0x0000000804020100ULL, - 0x0000000004020100ULL, 0x0000000000020100ULL, 0x0000000000000100ULL, 0x0000000000000000ULL -}; - -#endif - -#ifdef __x86_64__ - -#define packV(P, x) (((((P) >> (x)) & 0x0101010101010101ULL) * 0x0102040810204080ULL) >> 56) -#define packD(PM) (((PM) * 0x0101010101010101ULL) >> 56) -#define mask_8(x) (0x0101010101010101ULL << (x)) - -#else - -#define packV(P, x) (((((((unsigned int)(P)) >> (x)) & 0x01010101u) + (((((unsigned int)((P) >> 32)) >> (x)) & 0x01010101u) << 4)) * 0x01020408u) >> 24) -#define packD(PM) (((((unsigned int)(PM)) * 0x01010101u) + (((unsigned int)((PM) >> 32)) * 0x01010101u)) >> 24) -#define mask_8(x) (((unsigned long long) (0x01010101u << (x)) << 32) | (0x01010101u << (x))) - -static int inline __lzcnt64(unsigned long long x) { - int y; - __asm__ ( - "lzcntl %1, %0\n\t" - "lzcntl %2, %2\n\t" - "leal (%0, %2), %0\n\t" - "cmovnc %2, %0" - : "=&r" (y) : "0" ((unsigned int) x), "r" ((unsigned int) (x >> 32)) ); - return y; -} - -#endif // __x86_64__ - -/** - * Count last flipped discs when playing on the last empty. - * - * @param pos the last empty square. - * @param P player's disc pattern. - * @return flipped disc count. - */ - -int last_flip(int pos, unsigned long long P) -{ - unsigned long long P8, P7, P9; - unsigned int t; - int n_flipped; - int x = pos & 7; - int y = pos & 0x38; - - n_flipped = COUNT_FLIP[x][(unsigned char) (P >> y)]; - - switch (pos & 0x30) { - -#ifdef __LZCNT__ - - case 0: - P = __builtin_bswap64(P); - y ^= 0x38; - // not break; - case 0x30: - P <<= (64 - y); - P8 = P & mask_8(x); - P7 = P & mask_7[x]; - P9 = P & mask_9[x]; - n_flipped += (((int) __lzcnt64(P8) & 0x38) + ((int) __lzcnt64(P7) & 0x38) + ((int) __lzcnt64(P9) & 0x38)) >> 2; - break; - -#else // ls1b - slow - case 0x30: - P = __builtin_bswap64(P); - pos ^= 0x38; - // not break; - case 0: - P >>= (pos + 1); - P8 = P & 0x0080808080808080ULL; - n_flipped += ((P8 & -P8) * 0x00004080c1014180ULL) >> 60; - P7 = P & mask_7[x]; - n_flipped += ((P7 & -P7) * 0x0001040c2050c000ULL) >> 60; - P9 = P & mask_9[x]; - n_flipped += ((P9 & -P9) * 0x000010100c080503ULL) >> 60; - break; -#endif - - default: - y = pos >> 3; - n_flipped += COUNT_FLIP[y][packV(P, x)]; - P7 = P & mask_d[0][pos]; - n_flipped += COUNT_FLIP[x][packD(P7)]; - P9 = P & mask_d[1][pos]; - n_flipped += COUNT_FLIP[x][packD(P9)]; - break; - } - return n_flipped; -} ->>>>>>> f24cc06 (avoid BMI2 for AMD; more lzcnt/tzcnt in count_last_flip_bitscan) -======= - unsigned int t; -======= ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) int n_flipped; int x = pos & 7; @@ -703,4 +234,3 @@ int last_flip(int pos, unsigned long long P) return n_flipped; } ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/count_last_flip_neon.c b/src/count_last_flip_neon.c index 4800bc7..45cb89e 100644 --- a/src/count_last_flip_neon.c +++ b/src/count_last_flip_neon.c @@ -17,28 +17,10 @@ * For optimization purpose, the value returned is twice the number of flipped * disc, to facilitate the computation of disc difference. * -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2023 * @author Richard Delorme * @author Toshihiko Okuhara * @version 4.5 -======= - * @date 1998 - 2020 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= - * @date 1998 - 2022 -======= - * @date 1998 - 2023 ->>>>>>> 4087529 (Revise board0 usage; fix unused flips) - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.5 ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) * */ @@ -128,14 +110,7 @@ const unsigned char COUNT_FLIP[8][256] = { }, }; -<<<<<<< HEAD -<<<<<<< HEAD #ifdef HAS_CPU_64 -======= ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= -#ifdef HAS_CPU_64 ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) /* bit masks for diagonal lines (interleaved) */ const uint64x2_t mask_dvhd[64][2] = { {{ 0x000000000000ff01, 0x0000000000000000 }, { 0x0801040102010101, 0x8001400120011001 }}, @@ -203,10 +178,6 @@ const uint64x2_t mask_dvhd[64][2] = { {{ 0x0000000000000000, 0xff40008000000000 }, { 0x0440024001400040, 0x4040204010400840 }}, {{ 0x0000000000000000, 0xff80000000000000 }, { 0x0880048002800180, 0x8080408020801080 }} }; -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) #else /* bit masks for diagonal lines */ const uint64x2_t mask_dvhd[64][2] = { @@ -276,11 +247,6 @@ const uint64x2_t mask_dvhd[64][2] = { {{ 0x8000000000000000, 0xff00000000000000 }, { 0x8080808080808080, 0x8040201008040201 }} }; #endif -<<<<<<< HEAD -======= ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) /** * Count last flipped discs when playing on the last empty. @@ -290,48 +256,6 @@ const uint64x2_t mask_dvhd[64][2] = { * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -int last_flip(int pos, unsigned long long P) -{ - unsigned int n_flips; - const unsigned char *COUNT_FLIP_X = COUNT_FLIP[pos & 7]; - const unsigned char *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3]; - uint64x2_t PP = vdupq_n_u64(P); - uint64x2_t II; -#ifdef HAS_CPU_64 // vaddvq - unsigned int t; - const uint64x2_t dmask = { 0x0808040402020101, 0x8080404020201010 }; - - PP = vreinterpretq_u64_u8(vzip1q_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(PP))); - II = vandq_u64(PP, mask_dvhd[pos][0]); // 2 dirs interleaved - t = vaddvq_u16(vreinterpretq_u16_u64(II)); - n_flips = COUNT_FLIP_X[t >> 8]; - n_flips += COUNT_FLIP_X[t & 0xFF]; - II = vandq_u64(vreinterpretq_u64_u8(vtstq_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(mask_dvhd[pos][1]))), dmask); - t = vaddvq_u16(vreinterpretq_u16_u64(II)); - n_flips += COUNT_FLIP_Y[t >> 8]; - n_flips += COUNT_FLIP_Y[t & 0xFF]; - -#else // Neon kindergarten - const uint64x2_t dmask = { 0x1020408001020408, 0x1020408001020408 }; - - II = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_u64(vandq_u64(PP, mask_dvhd[pos][0]))))); - n_flips = COUNT_FLIP_X[vgetq_lane_u32(vreinterpretq_u32_u64(II), 2)]; - n_flips += COUNT_FLIP_X[vgetq_lane_u32(vreinterpretq_u32_u64(II), 0)]; - II = vreinterpretq_u64_s8(vnegq_s8(vreinterpretq_s8_u8(vtstq_u8(vreinterpretq_u8_u64(PP), vreinterpretq_u8_u64(mask_dvhd[pos][1]))))); - II = vpaddlq_u32(vmulq_u32(vreinterpretq_u32_u64(dmask), vreinterpretq_u32_u64(II))); - n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(II), 11)]; - n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(II), 3)]; -#endif -<<<<<<< HEAD -======= -#ifndef HAS_CPU_64 -#define vaddvq_u16(x) vget_lane_u64(vpaddl_u32(vpaddl_u16(vadd_u16(vget_high_u16(x), vget_low_u16(x)))), 0) -#endif - -======= ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) int last_flip(int pos, unsigned long long P) { unsigned int n_flips; @@ -353,9 +277,6 @@ int last_flip(int pos, unsigned long long P) n_flips += COUNT_FLIP_Y[t >> 8]; n_flips += COUNT_FLIP_Y[t & 0xFF]; -<<<<<<< HEAD ->>>>>>> 343493d (More neon/sse optimizations; neon dispatch added for arm32) -======= #else // Neon kindergarten const uint64x2_t dmask = { 0x1020408001020408, 0x1020408001020408 }; @@ -367,10 +288,6 @@ int last_flip(int pos, unsigned long long P) n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(II), 11)]; n_flips += COUNT_FLIP_Y[vgetq_lane_u8(vreinterpretq_u8_u64(II), 3)]; #endif - ->>>>>>> 81dec96 (Kindergarten last flip for arm32; MSVC arm Windows build (not tested)) -======= ->>>>>>> 4087529 (Revise board0 usage; fix unused flips) return n_flips; } diff --git a/src/count_last_flip_plain.c b/src/count_last_flip_plain.c index 88fbd9a..e13e3aa 100644 --- a/src/count_last_flip_plain.c +++ b/src/count_last_flip_plain.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file count_last_flip_plain.c * @@ -163,15 +159,7 @@ const unsigned long long mask_d[2][64] = { #define packV(P, x) (((((((unsigned int)(P)) >> (x)) & 0x01010101u) + (((((unsigned int)((P) >> 32)) >> (x)) & 0x01010101u) << 4)) * 0x01020408u) >> 24) #define packD(PM) (((((unsigned int)(PM)) * 0x01010101u) + (((unsigned int)((PM) >> 32)) * 0x01010101u)) >> 24) -<<<<<<< HEAD -<<<<<<< HEAD #endif // HAS_CPU_64 -======= -#endif // __x86_64__ ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= -#endif // HAS_CPU_64 ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) /** * Count last flipped discs when playing on the last empty. @@ -180,24 +168,10 @@ const unsigned long long mask_d[2][64] = { * @param P player's disc pattern. * @return flipped disc count. */ -<<<<<<< HEAD -<<<<<<< HEAD -int last_flip(int pos, unsigned long long P) -{ - unsigned long long PM; - int n_flipped; -======= -inline int last_flip(int pos, unsigned long long P) -{ - unsigned long long PM; - unsigned char n_flipped; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= int last_flip(int pos, unsigned long long P) { unsigned long long PM; int n_flipped; ->>>>>>> 569c1f8 (More neon optimizations; split bit_intrinsics.h from bit.h) int x = pos & 0x07; int y = pos >> 3; @@ -210,194 +184,3 @@ int last_flip(int pos, unsigned long long P) return n_flipped; } -<<<<<<< HEAD -======= -/** - * @file count_last_flip_plain.c - * - * - * A function is provided to count the number of fipped disc of the last move. - * - * The basic principle is to read into an array a precomputed result. Doing - * this is easy for a single line ; as we can use arrays of the form: - * - COUNT_FLIP[square where we play][8-bits disc pattern]. - * The problem is thus to convert any line of a 64-bits disc pattern into an - * 8-bits disc pattern. A fast way to do this is to select the right line, - * with a bit-mask, to gather the masked-bits into a continuous set by a simple - * multiplication and to right-shift the result to scale it into a number - * between 0 and 255. - * Once we get our 8-bits disc patterns, we directly get the number of - * flipped discs from the precomputed array, and add them from each flipping - * lines. - * For optimization purpose, the value returned is twice the number of flipped - * disc, to facilitate the computation of disc difference. - * - * @date 1998 - 2017 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 - * - */ - -/** precomputed count flip array */ -const unsigned char COUNT_FLIP[8][256] = { - { - 0, 0, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 12, 12, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 10, 10, 10, 10, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - }, - { - 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 8, 10, 8, 8, 8, 10, 8, 8, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - }, - { - 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 6, 10, 8, 8, 6, 6, 6, 6, 6, 10, 8, 8, 6, 6, 6, 6, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - }, - { - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, -}; - -/* bit masks for diagonal lines */ -const unsigned long long mask_d[2][64] = { - { - 0x0000000000000001ULL, 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, - 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, - 0x0000000000000102ULL, 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, - 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, - 0x0000000000010204ULL, 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, - 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, - 0x0000000001020408ULL, 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, - 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, - 0x0000000102040810ULL, 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, - 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, - 0x0000010204081020ULL, 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, - 0x0408102040800000ULL, 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, - 0x0001020408102040ULL, 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, - 0x0810204080000000ULL, 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, - 0x0102040810204080ULL, 0x0204081020408000ULL, 0x0408102040800000ULL, 0x0810204080000000ULL, - 0x1020408000000000ULL, 0x2040800000000000ULL, 0x4080000000000000ULL, 0x8000000000000000ULL - }, - { - 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, - 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, 0x0000000000000080ULL, - 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, - 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, 0x0000000000008040ULL, - 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, - 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, 0x0000000000804020ULL, - 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, - 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, 0x0000000080402010ULL, - 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, - 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, 0x0000008040201008ULL, - 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, 0x2010080402010000ULL, - 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, 0x0000804020100804ULL, - 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, 0x1008040201000000ULL, - 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL, 0x0080402010080402ULL, - 0x0100000000000000ULL, 0x0201000000000000ULL, 0x0402010000000000ULL, 0x0804020100000000ULL, - 0x1008040201000000ULL, 0x2010080402010000ULL, 0x4020100804020100ULL, 0x8040201008040201ULL - } -}; - -#ifdef HAS_CPU_64 - -#define packV(P, x) (((((P) >> (x)) & 0x0101010101010101ULL) * 0x0102040810204080ULL) >> 56) -#define packD(PM) (((PM) * 0x0101010101010101ULL) >> 56) - -#else - -#define packV(P, x) (((((((unsigned int)(P)) >> (x)) & 0x01010101u) + (((((unsigned int)((P) >> 32)) >> (x)) & 0x01010101u) << 4)) * 0x01020408u) >> 24) -#define packD(PM) (((((unsigned int)(PM)) * 0x01010101u) + (((unsigned int)((PM) >> 32)) * 0x01010101u)) >> 24) - -#endif // __x86_64__ - -/** - * Count last flipped discs when playing on the last empty. - * - * @param pos the last empty square. - * @param P player's disc pattern. - * @return flipped disc count. - */ -inline int last_flip(int pos, unsigned long long P) -{ - unsigned long long PM; - unsigned char n_flipped; - int x = pos & 0x07; - int y = pos >> 3; - - n_flipped = COUNT_FLIP[y][packV(P, x)]; - n_flipped += COUNT_FLIP[x][(unsigned char) (P >> (y * 8))]; - PM = P & mask_d[0][pos]; - n_flipped += COUNT_FLIP[x][packD(PM)]; - PM = P & mask_d[1][pos]; - n_flipped += COUNT_FLIP[x][packD(PM)]; - - return n_flipped; -} ->>>>>>> b3f048d (copyright changes) -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/count_last_flip_sse.c b/src/count_last_flip_sse.c index 5ebc65c..45fe75d 100644 --- a/src/count_last_flip_sse.c +++ b/src/count_last_flip_sse.c @@ -1,7 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) /** * @file count_last_flip_sse.c * @@ -21,51 +17,18 @@ * For optimization purpose, the value returned is twice the number of flipped * disc, to facilitate the computation of disc difference. * -<<<<<<< HEAD -<<<<<<< HEAD * @date 1998 - 2023 * @author Richard Delorme * @author Toshihiko Okuhara * @version 4.5 -======= - * @date 1998 - 2020 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - * @date 1998 - 2023 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.5 ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) * */ #include "bit.h" -<<<<<<< HEAD -<<<<<<< HEAD -#include - -<<<<<<< HEAD -/** precomputed count flip array */ -const uint8_t COUNT_FLIP[8][256] = { -======= -======= #include ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) -#define DUPLO 0x44 - -======= ->>>>>>> 593fff4 (use appropriate _mm_set1) /** precomputed count flip array */ -<<<<<<< HEAD -const unsigned char COUNT_FLIP[8][256] = { ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= const uint8_t COUNT_FLIP[8][256] = { ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) { 0, 0, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, @@ -226,51 +189,12 @@ const V4DI mask_dvhd[64] = { int last_flip(int pos, unsigned long long P) { -<<<<<<< HEAD -<<<<<<< HEAD uint_fast8_t n_flips; unsigned int t; const uint8_t *COUNT_FLIP_X = COUNT_FLIP[pos & 7]; const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3]; #ifdef AVXLASTFLIP // no gain __m256i PP = _mm256_set1_epi64x(P); -<<<<<<< HEAD - - n_flips = COUNT_FLIP_X[(P >> (pos & 0x38)) & 0xFF]; - #ifdef __AVX512VL__ - t = _cvtmask32_u32(_mm256_test_epi8_mask(PP, mask_dvhd[pos].v4)); - #else - t = _mm256_movemask_epi8(_mm256_sub_epi8(_mm256_setzero_si256(), _mm256_and_si256(PP, mask_dvhd[pos].v4))); - #endif - n_flips += COUNT_FLIP_Y[t & 0xFF]; - t >>= 16; - - #else - __m128i PP = _mm_set1_epi64x(P); - __m128i II = _mm_sad_epu8(_mm_and_si128(PP, mask_dvhd[pos].v2[0]), _mm_setzero_si128()); - - n_flips = COUNT_FLIP_X[_mm_extract_epi16(II, 4)]; - n_flips += COUNT_FLIP_X[_mm_cvtsi128_si32(II)]; - #ifdef __AVX512VL__ - t = _cvtmask16_u32(_mm_test_epi8_mask(PP, mask_dvhd[pos].v2[1])); - #else - t = _mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(PP, mask_dvhd[pos].v2[1]))); - #endif - #endif - n_flips += COUNT_FLIP_Y[t >> 8]; - n_flips += COUNT_FLIP_Y[t & 0xFF]; -======= - unsigned char n_flips; -======= - uint_fast8_t n_flips; ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) - unsigned int t; - const uint8_t *COUNT_FLIP_X = COUNT_FLIP[pos & 7]; - const uint8_t *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3]; - #ifdef AVXLASTFLIP // no gain - __m256i PP = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(P)); -======= ->>>>>>> 593fff4 (use appropriate _mm_set1) n_flips = COUNT_FLIP_X[(P >> (pos & 0x38)) & 0xFF]; #ifdef __AVX512VL__ @@ -294,242 +218,8 @@ int last_flip(int pos, unsigned long long P) #endif #endif n_flips += COUNT_FLIP_Y[t >> 8]; -<<<<<<< HEAD -<<<<<<< HEAD - n_flips += COUNT_FLIP_Y[(unsigned char) t]; ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) -======= - n_flips += COUNT_FLIP_Y[t & 0xff]; ->>>>>>> 26dad03 (Use player bits only in board_score_1) -======= n_flips += COUNT_FLIP_Y[t & 0xFF]; ->>>>>>> c54de3f (uint_fast8_t to acc last flip; unsigned char cast to 0xFF mask) return n_flips; } -<<<<<<< HEAD -======= -/** - * @file count_last_flip_sse.c - * - * - * A function is provided to count the number of fipped disc of the last move. - * - * The basic principle is to read into an array a precomputed result. Doing - * this is easy for a single line ; as we can use arrays of the form: - * - COUNT_FLIP[square where we play][8-bits disc pattern]. - * The problem is thus to convert any line of a 64-bits disc pattern into an - * 8-bits disc pattern. A fast way to do this is to select the right line, - * with a bit-mask, to gather the masked-bits into a continuous set by the - * SSE PMOVMSKB or PSADBW instruction. - * Once we get our 8-bits disc patterns, we directly get the number of - * flipped discs from the precomputed array, and add them from each flipping - * lines. - * For optimization purpose, the value returned is twice the number of flipped - * disc, to facilitate the computation of disc difference. - * - * @date 1998 - 2020 - * @author Richard Delorme - * @author Toshihiko Okuhara - * @version 4.4 - * - */ - -#include "bit.h" - -/** precomputed count flip array */ -const unsigned char COUNT_FLIP[8][256] = { - { - 0, 0, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 12, 12, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 10, 10, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - 8, 8, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, 6, 6, 0, 0, 2, 2, 0, 0, 4, 4, 0, 0, 2, 2, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 10, 10, 10, 10, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 8, 8, 8, 8, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - 6, 6, 6, 6, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, - }, - { - 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 8, 10, 8, 8, 8, 10, 8, 8, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 6, 8, 6, 6, 6, 8, 6, 6, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - 4, 6, 4, 4, 4, 6, 4, 4, 0, 2, 0, 0, 0, 2, 0, 0, 2, 4, 2, 2, 2, 4, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, - }, - { - 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 6, 10, 8, 8, 6, 6, 6, 6, 6, 10, 8, 8, 6, 6, 6, 6, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 4, 8, 6, 6, 4, 4, 4, 4, 4, 8, 6, 6, 4, 4, 4, 4, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - 2, 6, 4, 4, 2, 2, 2, 2, 2, 6, 4, 4, 2, 2, 2, 2, 0, 4, 2, 2, 0, 0, 0, 0, 0, 4, 2, 2, 0, 0, 0, 0, - }, - { - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 4, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 6, 6, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 10, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 12, 10, 10, 8, 8, 8, 8, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, -}; - -/* bit masks for diagonal lines */ -const V4DI mask_dvhd[64] = { - {{ 0x0000000000000001, 0x00000000000000ff, 0x0101010101010101, 0x8040201008040201 }}, - {{ 0x0000000000000102, 0x00000000000000ff, 0x0202020202020202, 0x0080402010080402 }}, - {{ 0x0000000000010204, 0x00000000000000ff, 0x0404040404040404, 0x0000804020100804 }}, - {{ 0x0000000001020408, 0x00000000000000ff, 0x0808080808080808, 0x0000008040201008 }}, - {{ 0x0000000102040810, 0x00000000000000ff, 0x1010101010101010, 0x0000000080402010 }}, - {{ 0x0000010204081020, 0x00000000000000ff, 0x2020202020202020, 0x0000000000804020 }}, - {{ 0x0001020408102040, 0x00000000000000ff, 0x4040404040404040, 0x0000000000008040 }}, - {{ 0x0102040810204080, 0x00000000000000ff, 0x8080808080808080, 0x0000000000000080 }}, - {{ 0x0000000000000102, 0x000000000000ff00, 0x0101010101010101, 0x4020100804020100 }}, - {{ 0x0000000000010204, 0x000000000000ff00, 0x0202020202020202, 0x8040201008040201 }}, - {{ 0x0000000001020408, 0x000000000000ff00, 0x0404040404040404, 0x0080402010080402 }}, - {{ 0x0000000102040810, 0x000000000000ff00, 0x0808080808080808, 0x0000804020100804 }}, - {{ 0x0000010204081020, 0x000000000000ff00, 0x1010101010101010, 0x0000008040201008 }}, - {{ 0x0001020408102040, 0x000000000000ff00, 0x2020202020202020, 0x0000000080402010 }}, - {{ 0x0102040810204080, 0x000000000000ff00, 0x4040404040404040, 0x0000000000804020 }}, - {{ 0x0204081020408000, 0x000000000000ff00, 0x8080808080808080, 0x0000000000008040 }}, - {{ 0x0000000000010204, 0x0000000000ff0000, 0x0101010101010101, 0x2010080402010000 }}, - {{ 0x0000000001020408, 0x0000000000ff0000, 0x0202020202020202, 0x4020100804020100 }}, - {{ 0x0000000102040810, 0x0000000000ff0000, 0x0404040404040404, 0x8040201008040201 }}, - {{ 0x0000010204081020, 0x0000000000ff0000, 0x0808080808080808, 0x0080402010080402 }}, - {{ 0x0001020408102040, 0x0000000000ff0000, 0x1010101010101010, 0x0000804020100804 }}, - {{ 0x0102040810204080, 0x0000000000ff0000, 0x2020202020202020, 0x0000008040201008 }}, - {{ 0x0204081020408000, 0x0000000000ff0000, 0x4040404040404040, 0x0000000080402010 }}, - {{ 0x0408102040800000, 0x0000000000ff0000, 0x8080808080808080, 0x0000000000804020 }}, - {{ 0x0000000001020408, 0x00000000ff000000, 0x0101010101010101, 0x1008040201000000 }}, - {{ 0x0000000102040810, 0x00000000ff000000, 0x0202020202020202, 0x2010080402010000 }}, - {{ 0x0000010204081020, 0x00000000ff000000, 0x0404040404040404, 0x4020100804020100 }}, - {{ 0x0001020408102040, 0x00000000ff000000, 0x0808080808080808, 0x8040201008040201 }}, - {{ 0x0102040810204080, 0x00000000ff000000, 0x1010101010101010, 0x0080402010080402 }}, - {{ 0x0204081020408000, 0x00000000ff000000, 0x2020202020202020, 0x0000804020100804 }}, - {{ 0x0408102040800000, 0x00000000ff000000, 0x4040404040404040, 0x0000008040201008 }}, - {{ 0x0810204080000000, 0x00000000ff000000, 0x8080808080808080, 0x0000000080402010 }}, - {{ 0x0000000102040810, 0x000000ff00000000, 0x0101010101010101, 0x0804020100000000 }}, - {{ 0x0000010204081020, 0x000000ff00000000, 0x0202020202020202, 0x1008040201000000 }}, - {{ 0x0001020408102040, 0x000000ff00000000, 0x0404040404040404, 0x2010080402010000 }}, - {{ 0x0102040810204080, 0x000000ff00000000, 0x0808080808080808, 0x4020100804020100 }}, - {{ 0x0204081020408000, 0x000000ff00000000, 0x1010101010101010, 0x8040201008040201 }}, - {{ 0x0408102040800000, 0x000000ff00000000, 0x2020202020202020, 0x0080402010080402 }}, - {{ 0x0810204080000000, 0x000000ff00000000, 0x4040404040404040, 0x0000804020100804 }}, - {{ 0x1020408000000000, 0x000000ff00000000, 0x8080808080808080, 0x0000008040201008 }}, - {{ 0x0000010204081020, 0x0000ff0000000000, 0x0101010101010101, 0x0402010000000000 }}, - {{ 0x0001020408102040, 0x0000ff0000000000, 0x0202020202020202, 0x0804020100000000 }}, - {{ 0x0102040810204080, 0x0000ff0000000000, 0x0404040404040404, 0x1008040201000000 }}, - {{ 0x0204081020408000, 0x0000ff0000000000, 0x0808080808080808, 0x2010080402010000 }}, - {{ 0x0408102040800000, 0x0000ff0000000000, 0x1010101010101010, 0x4020100804020100 }}, - {{ 0x0810204080000000, 0x0000ff0000000000, 0x2020202020202020, 0x8040201008040201 }}, - {{ 0x1020408000000000, 0x0000ff0000000000, 0x4040404040404040, 0x0080402010080402 }}, - {{ 0x2040800000000000, 0x0000ff0000000000, 0x8080808080808080, 0x0000804020100804 }}, - {{ 0x0001020408102040, 0x00ff000000000000, 0x0101010101010101, 0x0201000000000000 }}, - {{ 0x0102040810204080, 0x00ff000000000000, 0x0202020202020202, 0x0402010000000000 }}, - {{ 0x0204081020408000, 0x00ff000000000000, 0x0404040404040404, 0x0804020100000000 }}, - {{ 0x0408102040800000, 0x00ff000000000000, 0x0808080808080808, 0x1008040201000000 }}, - {{ 0x0810204080000000, 0x00ff000000000000, 0x1010101010101010, 0x2010080402010000 }}, - {{ 0x1020408000000000, 0x00ff000000000000, 0x2020202020202020, 0x4020100804020100 }}, - {{ 0x2040800000000000, 0x00ff000000000000, 0x4040404040404040, 0x8040201008040201 }}, - {{ 0x4080000000000000, 0x00ff000000000000, 0x8080808080808080, 0x0080402010080402 }}, - {{ 0x0102040810204080, 0xff00000000000000, 0x0101010101010101, 0x0100000000000000 }}, - {{ 0x0204081020408000, 0xff00000000000000, 0x0202020202020202, 0x0201000000000000 }}, - {{ 0x0408102040800000, 0xff00000000000000, 0x0404040404040404, 0x0402010000000000 }}, - {{ 0x0810204080000000, 0xff00000000000000, 0x0808080808080808, 0x0804020100000000 }}, - {{ 0x1020408000000000, 0xff00000000000000, 0x1010101010101010, 0x1008040201000000 }}, - {{ 0x2040800000000000, 0xff00000000000000, 0x2020202020202020, 0x2010080402010000 }}, - {{ 0x4080000000000000, 0xff00000000000000, 0x4040404040404040, 0x4020100804020100 }}, - {{ 0x8000000000000000, 0xff00000000000000, 0x8080808080808080, 0x8040201008040201 }} -}; - -/** - * Count last flipped discs when playing on the last empty. - * - * @param pos the last empty square. - * @param P player's disc pattern. - * @return flipped disc count. - */ - -int last_flip(int pos, unsigned long long P) -{ - unsigned char n_flips; - unsigned int t; - const unsigned char *COUNT_FLIP_X = COUNT_FLIP[pos & 7]; - const unsigned char *COUNT_FLIP_Y = COUNT_FLIP[pos >> 3]; -#ifdef AVXLASTFLIP - __m256i MP = _mm256_and_si256(_mm256_broadcastq_epi64(_mm_cvtsi64_si128(P)), mask_dvhd[pos].v4); - - n_flips = COUNT_FLIP_X[(unsigned char) (P >> (pos & 0x38))]; - t = _mm256_movemask_epi8(_mm256_sub_epi8(_mm256_setzero_si256(), MP)); - n_flips += COUNT_FLIP_Y[(unsigned char) t]; - t >>= 16; -#else - __m128i PP, II; - - PP = _mm_cvtsi64_si128(P); - PP = _mm_unpacklo_epi64(PP, PP); - II = _mm_sad_epu8(_mm_and_si128(PP, mask_dvhd[pos].v2[0]), _mm_setzero_si128()); - n_flips = COUNT_FLIP_X[_mm_cvtsi128_si32(II)]; - n_flips += COUNT_FLIP_X[_mm_extract_epi16(II, 4)]; - t = _mm_movemask_epi8(_mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(PP, mask_dvhd[pos].v2[1]))); -#endif - n_flips += COUNT_FLIP_Y[t >> 8]; - n_flips += COUNT_FLIP_Y[(unsigned char) t]; - - return n_flips; -} -<<<<<<< HEAD ->>>>>>> 1dc032e (Improve visual c compatibility) -======= - ->>>>>>> 6506166 (More SSE optimizations) -======= ->>>>>>> 3e1ed4f (fix cr/lf in repository to lf) diff --git a/src/edax.c b/src/edax.c index 5801328..e25b975 100644 --- a/src/edax.c +++ b/src/edax.c @@ -173,15 +173,7 @@ void help_options(void) " verbose [n] set Edax verbosity (default 1).\n" " noise [n] start displaying Edax search result from this depth\n (default 5).\n" " witdh [n] display edax search results using characters\n (default 80).\n" -<<<<<<< HEAD -<<<<<<< HEAD " hash-table-size [n] set hashtable size (default 22 bits).\n" -======= - " hash-table-size [n] set hashtable size (default 18 bits).\n" ->>>>>>> 1b29848 (fix & optimize 32 bit build; other minor mods) -======= - " hash-table-size [n] set hashtable size (default 22 bits).\n" ->>>>>>> 42dc349 (add sfence to be sure; correct comments) " n-tasks [n] control the number of parallel threads used in searching\n (default 1).\n" " l|level [n] search using limited depth (default 21).\n" " t|game-time